535.113.01

This commit is contained in:
Maneet Singh 2023-09-21 10:43:43 -07:00
parent a8e01be6b2
commit f59818b751
No known key found for this signature in database
94 changed files with 2414 additions and 800 deletions

View File

@ -2,6 +2,12 @@
## Release 535 Entries
### [535.113.01] 2023-09-21
#### Fixed
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
### [535.104.05] 2023-08-22
### [535.98] 2023-08-08

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.104.05.
version 535.113.01.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.104.05 driver release. This can be achieved by installing
535.113.01 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -180,7 +180,7 @@ software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.104.05 release,
(see the table below). However, in the 535.113.01 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.104.05/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.113.01/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -856,6 +856,10 @@ Subsystem Device ID.
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.104.05\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.113.01\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;

View File

@ -5636,23 +5636,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
;;
migrate_vma_setup)
#
# Determine if migrate_vma_setup() function is present
#
# migrate_vma_setup() function was added by commit
# a7d1f22bb74f32cf3cd93f52776007e161f1a738 ("mm: turn migrate_vma
# upside down) in v5.4.
# (2019-08-20).
CODE="
#include <linux/migrate.h>
int conftest_migrate_vma_setup(void) {
migrate_vma_setup();
}"
compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
;;
migrate_vma_added_flags)
#
# Determine if migrate_vma structure has flags
@ -5743,23 +5726,25 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOASID_GET_PRESENT" "" "functions"
;;
mm_pasid_set)
mm_pasid_drop)
#
# Determine if mm_pasid_set() function is present
# Determine if mm_pasid_drop() function is present
#
# Added by commit 701fac40384f ("iommu/sva: Assign a PASID to mm
# on PASID allocation and free it on mm exit") in v5.18.
# Moved to linux/iommu.h in commit cd3891158a77 ("iommu/sva: Move
# PASID helpers to sva code") in v6.4.
#
# mm_pasid_set() function was added by commit
# 701fac40384f07197b106136012804c3cae0b3de (iommu/sva: Assign a
# PASID to mm on PASID allocation and free it on mm exit) in v5.18.
# (2022-02-15).
CODE="
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
#include <linux/sched/mm.h>
#endif
void conftest_mm_pasid_set(void) {
mm_pasid_set();
#include <linux/iommu.h>
void conftest_mm_pasid_drop(void) {
mm_pasid_drop();
}"
compile_check_conftest "$CODE" "NV_MM_PASID_SET_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
;;
drm_crtc_state_has_no_vblank)
@ -6341,6 +6326,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
;;
mpol_preferred_many_present)
#
# Determine if MPOL_PREFERRED_MANY enum is present or not
#
# Added by commit b27abaccf8e8b ("mm/mempolicy: add
# MPOL_PREFERRED_MANY for multiple preferred nodes") in
# v5.15
#
CODE="
#include <linux/mempolicy.h>
int mpol_preferred_many = MPOL_PREFERRED_MANY;
"
compile_check_conftest "$CODE" "NV_MPOL_PREFERRED_MANY_PRESENT" "" "types"
;;
mmu_interval_notifier)
#
# Determine if mmu_interval_notifier struct is present or not

View File

@ -81,8 +81,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
@ -110,6 +109,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018 NVIDIA Corporation
Copyright (c) 2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -149,7 +149,11 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
mode = vma_policy->mode;
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
if ((mode == MPOL_BIND)
#if defined(NV_MPOL_PREFERRED_MANY_PRESENT)
|| (mode == MPOL_PREFERRED_MANY)
#endif
|| (mode == MPOL_PREFERRED)) {
int home_node = NUMA_NO_NODE;
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
@ -467,6 +471,10 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
else
uvm_page_mask_zero(write_fault_mask);
// There are no pending faults beyond write faults to RO region.
if (uvm_page_mask_empty(read_fault_mask))
return status;
}
ats_batch_select_residency(gpu_va_space, vma, ats_context);

View File

@ -32,19 +32,23 @@
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
// conftest-able. We instead look for the presence of ioasid_get() or
// mm_pasid_set(). ioasid_get() was added in the same patch series as
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
// mm_pasid_drop(). ioasid_get() was added in the same patch series as
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_drop() was added in the
// same patch as the removal of ioasid_get(). We assume the presence of
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_drop(v5.18+) is
// present.
//
// arm_smmu_sva_bind() was added with commit
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
// and added mm_pasid_set().
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
#define UVM_ATS_SVA_SUPPORTED() 1
// and added mm_pasid_drop().
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
#if defined(CONFIG_IOMMU_SVA)
#define UVM_ATS_SVA_SUPPORTED() 1
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif

View File

@ -191,7 +191,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
for (i = 0; i < REDUCTIONS; ++i) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
}
// Without a sys membar the channel tracking semaphore can and does complete
@ -577,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
for (i = 0; i < REDUCTIONS; i++) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
}
status = uvm_push_end_and_wait(&push);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -697,9 +697,6 @@ static inline int cmp_access_type(uvm_fault_access_type_t a, uvm_fault_access_ty
typedef enum
{
// Fetch a batch of faults from the buffer.
FAULT_FETCH_MODE_BATCH_ALL,
// Fetch a batch of faults from the buffer. Stop at the first entry that is
// not ready yet
FAULT_FETCH_MODE_BATCH_READY,
@ -857,9 +854,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
// written out of order
UVM_SPIN_WHILE(!gpu->parent->fault_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
// We have some entry to work on. Let's do the rest later.
if (fetch_mode != FAULT_FETCH_MODE_ALL &&
fetch_mode != FAULT_FETCH_MODE_BATCH_ALL &&
fault_index > 0)
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
goto done;
}
@ -888,6 +883,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
current_entry->va_space = NULL;
current_entry->filtered = false;
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
if (current_entry->fault_source.utlb_id > batch_context->max_utlb_id) {
UVM_ASSERT(current_entry->fault_source.utlb_id < replayable_faults->utlb_count);
@ -1378,7 +1374,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(current_entry->fault_access_type ==
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
current_entry->is_fatal = false;
// Unserviceable faults were already skipped by the caller. There are no
// unserviceable fault types that could be in the same VA block as a
// serviceable fault.
UVM_ASSERT(!current_entry->is_fatal);
current_entry->is_throttled = false;
current_entry->is_invalid_prefetch = false;
@ -1735,6 +1734,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_access_type_t access_type = current_entry->fault_access_type;
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
// ATS faults can't be unserviceable, since unserviceable faults require
// GMMU PTEs.
UVM_ASSERT(!current_entry->is_fatal);
i++;
update_batch_and_notify_fault(gpu_va_space->gpu,
@ -2044,8 +2047,10 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
uvm_va_space_mm_release_unlock(va_space, mm);
mm = NULL;
va_space = NULL;
status = NV_OK;
continue;
}
if (status != NV_OK)
goto fail;
@ -2273,27 +2278,9 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
return false;
}
typedef enum
{
// Only cancel faults flagged as fatal
FAULT_CANCEL_MODE_FATAL,
// Cancel all faults in the batch unconditionally
FAULT_CANCEL_MODE_ALL,
} fault_cancel_mode_t;
// Cancel faults in the given fault service batch context. The function provides
// two different modes depending on the value of cancel_mode:
// - If cancel_mode == FAULT_CANCEL_MODE_FATAL, only faults flagged as fatal
// will be cancelled. In this case, the reason reported to tools is the one
// contained in the fault entry itself.
// - If cancel_mode == FAULT_CANCEL_MODE_ALL, all faults will be cancelled
// unconditionally. In this case, the reason reported to tools for non-fatal
// faults is the one passed to this function.
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
fault_cancel_mode_t cancel_mode,
UvmEventFatalReason reason)
// Cancel just the faults flagged as fatal in the given fault service batch
// context.
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
NV_STATUS status = NV_OK;
NV_STATUS fault_status;
@ -2301,8 +2288,6 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
NvU32 i;
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
if (cancel_mode == FAULT_CANCEL_MODE_ALL)
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
@ -2320,12 +2305,66 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
uvm_va_space_down_read(va_space);
// We don't need to check whether a buffer flush is required
// (due to VA range destruction).
// - For cancel_mode == FAULT_CANCEL_MODE_FATAL, once a fault is
// flagged as fatal we need to cancel it, even if its VA range no
// longer exists.
// - For cancel_mode == FAULT_CANCEL_MODE_ALL we don't care about
// any of this, we just want to trigger RC in RM.
// (due to VA range destruction). Once a fault is flagged as fatal
// we need to cancel it, even if its VA range no longer exists.
}
// See the comment for the same check in cancel_faults_all
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id))
continue;
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
if (status != NV_OK)
break;
}
}
if (va_space != NULL)
uvm_va_space_up_read(va_space);
// See the comment on flushing in cancel_faults_all
fault_status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
// We report the first encountered error.
if (status == NV_OK)
status = fault_status;
return status;
}
// Cancel all faults in the given fault service batch context, even those not
// marked as fatal.
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
UvmEventFatalReason reason)
{
NV_STATUS status = NV_OK;
NV_STATUS fault_status;
uvm_va_space_t *va_space = NULL;
NvU32 i;
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
uvm_fault_cancel_va_mode_t cancel_va_mode;
UVM_ASSERT(current_entry->va_space);
if (current_entry->va_space != va_space) {
// Fault on a different va_space, drop the lock of the old one...
if (va_space != NULL)
uvm_va_space_up_read(va_space);
va_space = current_entry->va_space;
// ... and take the lock of the new one
uvm_va_space_down_read(va_space);
}
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
@ -2337,32 +2376,28 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
continue;
}
// Cancel the fault
if (cancel_mode == FAULT_CANCEL_MODE_ALL || current_entry->is_fatal) {
uvm_fault_cancel_va_mode_t cancel_va_mode = current_entry->replayable.cancel_va_mode;
// If cancelling unconditionally and the fault was not fatal,
// set the cancel reason passed to this function
if (!current_entry->is_fatal) {
current_entry->fatal_reason = reason;
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
if (status != NV_OK)
break;
// If the fault was already marked fatal, use its reason and cancel
// mode. Otherwise use the provided reason.
if (current_entry->is_fatal) {
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
cancel_va_mode = current_entry->replayable.cancel_va_mode;
}
else {
current_entry->fatal_reason = reason;
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
if (status != NV_OK)
break;
}
if (va_space != NULL)
uvm_va_space_up_read(va_space);
// After cancelling the fatal faults, the fault buffer is flushed to remove
// any potential duplicated fault that may have been added while processing
// the faults in this batch. This flush also avoids doing unnecessary
// processing after the fatal faults have been cancelled, so all the rest
// are unlikely to remain after a replay because the context is probably in
// the process of dying.
// Because each cancel itself triggers a replay, there may be a large number
// of new duplicated faults in the buffer after cancelling all the known
// ones. Flushing the buffer discards them to avoid unnecessary processing.
fault_status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
@ -2410,12 +2445,12 @@ static void cancel_fault_batch(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
UvmEventFatalReason reason)
{
if (gpu->parent->fault_cancel_va_supported) {
cancel_faults_precise_va(gpu, batch_context, FAULT_CANCEL_MODE_ALL, reason);
return;
}
cancel_fault_batch_tlb(gpu, batch_context, reason);
// Return code is ignored since we're on a global error path and wouldn't be
// able to recover anyway.
if (gpu->parent->fault_cancel_va_supported)
cancel_faults_all(gpu, batch_context, reason);
else
cancel_fault_batch_tlb(gpu, batch_context, reason);
}
@ -2582,12 +2617,8 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
UVM_ASSERT(batch_context->has_fatal_faults);
if (gpu->parent->fault_cancel_va_supported) {
return cancel_faults_precise_va(gpu,
batch_context,
FAULT_CANCEL_MODE_FATAL,
UvmEventFatalReasonInvalid);
}
if (gpu->parent->fault_cancel_va_supported)
return cancel_faults_precise_va(gpu, batch_context);
return cancel_faults_precise_tlb(gpu, batch_context);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Copyright (c) 2020-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -368,7 +368,10 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_hopper(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
NvU32 depth,
uvm_page_directory_t *child_dir)
{
NvU32 entry_count = entries_per_index_hopper(depth);
NvU64 *entry_bits = (NvU64 *)entry;

View File

@ -153,10 +153,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define VM_MIXEDMAP 0x00000000
#endif
#if !defined(MPOL_PREFERRED_MANY)
#define MPOL_PREFERRED_MANY 5
#endif
//
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
// routines pick up our version

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -106,7 +106,10 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_maxwell(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_maxwell(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
NvU32 depth,
uvm_page_directory_t *child_dir)
{
NvU64 pde_bits = 0;
UVM_ASSERT(depth == 0);

View File

@ -51,7 +51,7 @@ typedef struct
#if defined(CONFIG_MIGRATE_VMA_HELPER)
#define UVM_MIGRATE_VMA_SUPPORTED 1
#else
#if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
#define UVM_MIGRATE_VMA_SUPPORTED 1
#endif
#endif

View File

@ -323,37 +323,153 @@ static void uvm_mmu_page_table_cpu_memset_16(uvm_gpu_t *gpu,
uvm_mmu_page_table_cpu_unmap(gpu, phys_alloc);
}
static void pde_fill_cpu(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr)
{
NvU64 pde_data[2], entry_size;
NvU32 i;
UVM_ASSERT(uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(directory->depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
for (i = 0; i < pde_count; i++) {
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i]);
if (entry_size == sizeof(pde_data[0]))
uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
else
uvm_mmu_page_table_cpu_memset_16(tree->gpu, &directory->phys_alloc, start_index + i, pde_data, 1);
}
}
static void pde_fill_gpu(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
NvU64 pde_data[2], entry_size;
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
NvU32 max_inline_entries;
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
uvm_gpu_address_t inline_data_addr;
uvm_push_inline_data_t inline_data;
NvU32 entry_count, i, j;
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(directory->depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
pde_entry_addr.address += start_index * entry_size;
for (i = 0; i < pde_count;) {
// All but the first memory operation can be pipelined. We respect the
// caller's pipelining settings for the first push.
if (i != 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
entry_count = min(pde_count - i, max_inline_entries);
// No membar is needed until the last memory operation. Otherwise,
// use caller's membar flag.
if ((i + entry_count) < pde_count)
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
uvm_push_set_flag(push, push_membar_flag);
uvm_push_inline_data_begin(push, &inline_data);
for (j = 0; j < entry_count; j++) {
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i + j]);
uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
}
inline_data_addr = uvm_push_inline_data_end(&inline_data);
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * entry_size);
i += entry_count;
pde_entry_addr.address += entry_size * entry_count;
}
}
// pde_fill() populates pde_count PDE entries (starting at start_index) with
// the same mapping, i.e., with the same physical address (phys_addr).
// pde_fill() is optimized for pde_count == 1, which is the common case. The
// map_remap() function is the only case where pde_count > 1, only used on GA100
// GPUs for 512MB page size mappings.
static void pde_fill(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, directory->depth, UVM_PAGE_SIZE_AGNOSTIC));
if (push)
pde_fill_gpu(tree, directory, start_index, pde_count, phys_addr, push);
else
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
}
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
{
NvU64 clear_bits[2];
uvm_mmu_mode_hal_t *hal = tree->hal;
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
if (dir->depth == tree->hal->page_table_depth(page_size)) {
*clear_bits = 0; // Invalid PTE
}
else {
// passing in NULL for the phys_allocs will mark the child entries as invalid
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
hal->make_pde(clear_bits, phys_allocs, dir->depth);
// Passing in NULL for the phys_allocs will mark the child entries as
// invalid.
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
// Make sure that using only clear_bits[0] will work
UVM_ASSERT(hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
}
// Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
// than 512 entries. We initialize them all with the same clean PDE.
// Additionally, only ATS systems may require clean PDEs bit settings based
// on the mapping VA.
if (dir->depth == tree->hal->page_table_depth(page_size) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
NvU64 clear_bits[2];
// initialize the memory to a reasonable value
if (push) {
tree->gpu->parent->ce_hal->memset_8(push,
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
// If it is not a PTE, make a clean PDE.
if (dir->depth != tree->hal->page_table_depth(page_size)) {
tree->hal->make_pde(clear_bits, phys_allocs, dir->depth, dir->entries[0]);
// Make sure that using only clear_bits[0] will work.
UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
}
else {
*clear_bits = 0;
}
// Initialize the memory to a reasonable value.
if (push) {
tree->gpu->parent->ce_hal->memset_8(push,
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
*clear_bits,
dir->phys_alloc.size);
}
else {
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
&dir->phys_alloc,
0,
*clear_bits,
dir->phys_alloc.size);
dir->phys_alloc.size / sizeof(*clear_bits));
}
}
else {
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
&dir->phys_alloc,
0,
*clear_bits,
dir->phys_alloc.size / sizeof(*clear_bits));
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
}
}
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
@ -367,8 +483,10 @@ static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
NvLength phys_alloc_size = hal->allocation_size(depth, page_size);
uvm_page_directory_t *dir;
// The page tree doesn't cache PTEs so space is not allocated for entries that are always PTEs.
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not page_size.
// The page tree doesn't cache PTEs so space is not allocated for entries
// that are always PTEs.
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not
// page_size.
if (depth == hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC))
entry_count = 0;
else
@ -409,108 +527,6 @@ static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, N
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
}
static void pde_fill_cpu(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr)
{
NvU64 pde_data[2], entry_size;
UVM_ASSERT(uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
tree->hal->make_pde(pde_data, phys_addr, depth);
if (entry_size == sizeof(pde_data[0]))
uvm_mmu_page_table_cpu_memset_8(tree->gpu, directory, start_index, pde_data[0], pde_count);
else
uvm_mmu_page_table_cpu_memset_16(tree->gpu, directory, start_index, pde_data, pde_count);
}
static void pde_fill_gpu(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
NvU64 pde_data[2], entry_size;
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->addr);
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
tree->hal->make_pde(pde_data, phys_addr, depth);
pde_entry_addr.address += start_index * entry_size;
if (entry_size == sizeof(pde_data[0])) {
tree->gpu->parent->ce_hal->memset_8(push, pde_entry_addr, pde_data[0], sizeof(pde_data[0]) * pde_count);
}
else {
NvU32 max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / sizeof(pde_data);
uvm_gpu_address_t inline_data_addr;
uvm_push_inline_data_t inline_data;
NvU32 membar_flag = 0;
NvU32 i;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
for (i = 0; i < pde_count;) {
NvU32 j;
NvU32 entry_count = min(pde_count - i, max_inline_entries);
uvm_push_inline_data_begin(push, &inline_data);
for (j = 0; j < entry_count; j++)
uvm_push_inline_data_add(&inline_data, pde_data, sizeof(pde_data));
inline_data_addr = uvm_push_inline_data_end(&inline_data);
// All but the first memcopy can be pipelined. We respect the
// caller's pipelining settings for the first push.
if (i != 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
// No membar is needed until the last copy. Otherwise, use
// caller's membar flag.
if (i + entry_count < pde_count)
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
else if (membar_flag)
uvm_push_set_flag(push, membar_flag);
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * sizeof(pde_data));
i += entry_count;
pde_entry_addr.address += sizeof(pde_data) * entry_count;
}
}
}
// pde_fill() populates pde_count PDE entries (starting at start_index) with
// the same mapping, i.e., with the same physical address (phys_addr).
static void pde_fill(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, depth, UVM_PAGE_SIZE_AGNOSTIC));
if (push)
pde_fill_gpu(tree, depth, directory, start_index, pde_count, phys_addr, push);
else
pde_fill_cpu(tree, depth, directory, start_index, pde_count, phys_addr);
}
static uvm_page_directory_t *host_pde_write(uvm_page_directory_t *dir,
uvm_page_directory_t *parent,
NvU32 index_in_parent)
@ -540,7 +556,7 @@ static void pde_write(uvm_page_tree_t *tree,
phys_allocs[i] = &entry->phys_alloc;
}
pde_fill(tree, dir->depth, &dir->phys_alloc, entry_index, 1, phys_allocs, push);
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
}
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
@ -813,8 +829,11 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
static void map_remap_deinit(uvm_page_tree_t *tree)
{
if (tree->map_remap.pde0.size)
phys_mem_deallocate(tree, &tree->map_remap.pde0);
if (tree->map_remap.pde0) {
phys_mem_deallocate(tree, &tree->map_remap.pde0->phys_alloc);
uvm_kvfree(tree->map_remap.pde0);
tree->map_remap.pde0 = NULL;
}
if (tree->map_remap.ptes_invalid_4k.size)
phys_mem_deallocate(tree, &tree->map_remap.ptes_invalid_4k);
@ -839,10 +858,16 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
// PDE1-depth(512M) PTE. We first map it to the pde0 directory, then we
// return the PTE for the get_ptes()'s caller.
if (tree->hal->page_sizes() & UVM_PAGE_SIZE_512M) {
status = allocate_page_table(tree, UVM_PAGE_SIZE_2M, &tree->map_remap.pde0);
if (status != NV_OK)
tree->map_remap.pde0 = allocate_directory(tree,
UVM_PAGE_SIZE_2M,
tree->hal->page_table_depth(UVM_PAGE_SIZE_2M),
UVM_PMM_ALLOC_FLAGS_EVICT);
if (tree->map_remap.pde0 == NULL) {
status = NV_ERR_NO_MEMORY;
goto error;
}
}
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "map remap init");
if (status != NV_OK)
goto error;
@ -864,22 +889,23 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
NvU32 depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_4K) - 1;
size_t index_4k = tree->hal->entry_offset(depth, UVM_PAGE_SIZE_4K);
// pde0 depth equals UVM_PAGE_SIZE_2M.
NvU32 pde0_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_2M);
NvU32 pde0_entries = tree->map_remap.pde0.size / tree->hal->entry_size(pde0_depth);
NvU32 pde0_entries = tree->map_remap.pde0->phys_alloc.size / tree->hal->entry_size(tree->map_remap.pde0->depth);
// The big-page entry is NULL which makes it an invalid entry.
phys_allocs[index_4k] = &tree->map_remap.ptes_invalid_4k;
// By default CE operations include a MEMBAR_SYS. MEMBAR_GPU is
// sufficient when pde0 is allocated in VIDMEM.
if (tree->map_remap.pde0.addr.aperture == UVM_APERTURE_VID)
if (tree->map_remap.pde0->phys_alloc.addr.aperture == UVM_APERTURE_VID)
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
// This is an orphan directory, make_pde() requires a directory to
// compute the VA. The UVM depth map_remap() operates on is not in the
// range make_pde() must operate. We only need to supply the fields used
// by make_pde() to not access invalid memory addresses.
pde_fill(tree,
pde0_depth,
&tree->map_remap.pde0,
tree->map_remap.pde0,
0,
pde0_entries,
(uvm_mmu_page_table_alloc_t **)&phys_allocs,
@ -1332,10 +1358,9 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
if (uvm_page_table_range_aperture(range) == UVM_APERTURE_VID)
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
phys_alloc[0] = &tree->map_remap.pde0;
phys_alloc[0] = &tree->map_remap.pde0->phys_alloc;
pde_fill(tree,
range->table->depth,
&range->table->phys_alloc,
range->table,
range->start_index,
range->entry_count,
(uvm_mmu_page_table_alloc_t **)&phys_alloc,

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -219,7 +219,7 @@ struct uvm_mmu_mode_hal_struct
// point to two items for dual PDEs).
// any of allocs are allowed to be NULL, in which case they are to be
// treated as empty.
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth);
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth, uvm_page_directory_t *child_dir);
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
// (in the case of Pascal dual PDEs)
@ -229,7 +229,7 @@ struct uvm_mmu_mode_hal_struct
NvU32 (*entries_per_index)(NvU32 depth);
// For dual PDEs, this is ether 1 or 0, depending on the page size.
// This is used to index the host copy only. GPU PDEs are always entirely
// This is used to index the host copy only. GPU PDEs are always entirely
// re-written using make_pde.
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
@ -295,9 +295,8 @@ struct uvm_page_tree_struct
// PDE0 where all big-page entries are invalid, and small-page entries
// point to ptes_invalid_4k.
// pde0 is only used on Pascal-Ampere, i.e., they have the same PDE
// format.
uvm_mmu_page_table_alloc_t pde0;
// pde0 is used on Pascal+ GPUs, i.e., they have the same PDE format.
uvm_page_directory_t *pde0;
} map_remap;
// Tracker for all GPU operations on the tree
@ -365,21 +364,32 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
// the same page size without an intervening put_ptes. To duplicate a subset of
// an existing range or change the size of an existing range, use
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *range);
// Same as uvm_page_tree_get_ptes(), but doesn't synchronize the GPU work.
//
// All pending operations can be waited on with uvm_page_tree_wait().
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *range);
// Returns a single-entry page table range for the addresses passed.
// The size parameter must be a page size supported by this tree.
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
// page_size.
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *single);
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single);
// For a single-entry page table range, write the PDE (which could be a dual
// PDE) to the GPU.
@ -478,8 +488,8 @@ NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
// new_range_vec will contain the upper portion of range_vec, starting at
// new_end + 1.
//
// new_end + 1 is required to be within the address range of range_vec and be aligned to
// range_vec's page_size.
// new_end + 1 is required to be within the address range of range_vec and be
// aligned to range_vec's page_size.
//
// On failure, the original range vector is left unmodified.
NV_STATUS uvm_page_table_range_vec_split_upper(uvm_page_table_range_vec_t *range_vec,
@ -501,18 +511,22 @@ void uvm_page_table_range_vec_destroy(uvm_page_table_range_vec_t *range_vec);
// for each offset.
// The caller_data pointer is what the caller passed in as caller_data to
// uvm_page_table_range_vec_write_ptes().
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
void *caller_data);
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec,
NvU64 offset,
void *caller_data);
// Write all PTEs covered by the range vector using the given PTE making function.
// Write all PTEs covered by the range vector using the given PTE making
// function.
//
// After writing all the PTEs a TLB invalidate operation is performed including
// the passed in tlb_membar.
//
// See comments about uvm_page_table_range_pte_maker_t for details about the
// PTE making callback.
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec, uvm_membar_t tlb_membar,
uvm_page_table_range_pte_maker_t pte_maker, void *caller_data);
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec,
uvm_membar_t tlb_membar,
uvm_page_table_range_pte_maker_t pte_maker,
void *caller_data);
// Set all PTEs covered by the range vector to an empty PTE
//
@ -636,8 +650,9 @@ static NvU64 uvm_page_table_range_size(uvm_page_table_range_t *range)
// Get the physical address of the entry at entry_index within the range
// (counted from range->start_index).
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree, uvm_page_table_range_t *range,
size_t entry_index)
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree,
uvm_page_table_range_t *range,
size_t entry_index)
{
NvU32 entry_size = uvm_mmu_pte_size(tree, range->page_size);
uvm_gpu_phys_address_t entry = range->table->phys_alloc.addr;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -146,9 +146,15 @@ static void fake_tlb_invals_disable(void)
g_fake_tlb_invals_tracking_enabled = false;
}
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
// Fake TLB invalidate VA that just saves off the parameters so that they can be
// verified later.
static void fake_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar)
{
if (!g_fake_tlb_invals_tracking_enabled)
return;
@ -210,8 +216,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
}
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
expected_membar ? "a" : "no",
uvm_membar_string(g_last_fake_inval->membar));
expected_membar ? "a" : "no",
uvm_membar_string(g_last_fake_inval->membar));
result = false;
}
@ -230,7 +236,8 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
}
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
g_last_fake_inval->base,
g_last_fake_inval->base + g_last_fake_inval->size);
return false;
}
if (g_last_fake_inval->depth != expected_depth) {
@ -247,15 +254,16 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
if (g_fake_invals_count == 0) {
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
base, base + size);
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
return false;
}
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
base, base + size,
inval->base, inval->base + inval->size);
base,
base + size,
inval->base,
inval->base + inval->size);
return false;
}
if (inval->depth != expected_depth) {
@ -270,7 +278,13 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
return true;
}
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
static bool assert_invalidate_range(NvU64 base,
NvU64 size,
NvU32 page_size,
bool allow_inval_all,
NvU32 range_depth,
NvU32 all_depth,
bool expected_membar)
{
NvU32 i;
@ -488,7 +502,6 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
return NV_OK;
}
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
{
uvm_page_tree_t tree;
@ -842,6 +855,7 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
TEST_CHECK_RET(range2.entry_count == 256);
TEST_CHECK_RET(range2.table->ref_count == 512);
TEST_CHECK_RET(range1.table == range2.table);
// 4k page is second entry in a dual PDE
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
TEST_CHECK_RET(range1.start_index == 256);
@ -871,6 +885,7 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
TEST_CHECK_RET(range64k.entry_count == 16);
TEST_CHECK_RET(range64k.table->ref_count == 16);
// 4k page is second entry in a dual PDE
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
TEST_CHECK_RET(range64k.start_index == 16);
@ -1030,10 +1045,13 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
// Depth 4
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
// Depth 3
NvU64 extent_pde0 = extent_pte * (1ull << 8);
// Depth 2
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
// Depth 1
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
@ -1081,7 +1099,11 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
return status;
}
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
NvU64 base,
NvU64 size,
NvU32 min_page_size,
NvU32 max_page_size)
{
NV_STATUS status = NV_OK;
uvm_push_t push;
@ -1205,7 +1227,11 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
if (*pte != expected_pte) {
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
*pte, expected_pte, offset, range_vec->start, range_vec->size);
*pte,
expected_pte,
offset,
range_vec->start,
range_vec->size);
return false;
}
offset += range_vec->page_size;
@ -1226,7 +1252,11 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
TEST_CHECK_RET(data.status == NV_OK);
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
range_vec->start,
range_vec->size,
range_vec->page_size,
page_table_depth,
membar != UVM_MEMBAR_NONE));
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
fake_tlb_invals_disable();
@ -1249,7 +1279,11 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
return NV_OK;
}
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
uvm_page_table_range_vec_t **range_vec_out)
{
uvm_page_table_range_vec_t *range_vec;
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
@ -1552,17 +1586,17 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
memset(phys_allocs, 0, sizeof(phys_allocs));
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits == 0x0L);
phys_allocs[0] = &alloc_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
phys_allocs[0] = &alloc_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
for (j = 0; j <= 2; j++) {
@ -1632,6 +1666,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
// big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
@ -1639,31 +1674,31 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0);
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
// Dual PDEs
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
@ -1727,36 +1762,36 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0);
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
// Dual PDEs
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
// NO_ATS PDE1 (depth 2)
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 2);
hal->make_pde(pde_bits, phys_allocs, 2, NULL);
if (g_uvm_global.ats.enabled)
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
else
@ -1805,32 +1840,32 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0);
// Cleared PDEs work as expected for big and small PDEs.
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 4);
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs, uncached ATS allowed.
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
// Dual PDEs, uncached.
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 4);
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 4);
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
@ -2303,7 +2338,8 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
gpu->parent = parent_gpu;
// At least test_tlb_invalidates() relies on global state
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
// time.
uvm_mutex_lock(&g_uvm_global.global_lock);
// Allocate the fake TLB tracking state. Notably tests still need to enable

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2020 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -140,7 +140,10 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_pascal(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
NvU32 depth,
uvm_page_directory_t *child_dir)
{
NvU32 entry_count = entries_per_index_pascal(depth);
NvU64 *entry_bits = (NvU64 *)entry;

View File

@ -10155,6 +10155,30 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
return preferred_location;
// Check if we should map the closest resident processor remotely on remote CPU fault
//
// When faulting on CPU, there's a linux process on behalf of it, which is associated
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
// (local) fault, and we may want to migrate a page from GPU to CPU.
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
// VM, we might preserve residence.
//
// Establishing a remote fault without access counters means the memory could stay in
// the wrong spot for a long time, which is why we prefer to avoid creating remote
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
// in place for NIC accesses.
//
// The logic that's used to detect remote faulting also keeps memory in place for
// ptrace accesses. We would prefer to control those policies separately, but the
// NIC case takes priority.
if (UVM_ID_IS_CPU(processor_id) &&
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
va_block_context->mm != current->mm) {
UVM_ASSERT(va_block_context->mm != NULL);
return closest_resident_processor;
}
// If the page is resident on a processor other than the preferred location,
// or the faulting processor can't access the preferred location, we select
// the faulting processor as the new residency.

View File

@ -193,7 +193,8 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, uvm
for ((node) = uvm_va_policy_node_iter_first((va_block), (start), (end)), \
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)); \
(node); \
(node) = (next))
(node) = (next), \
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)))
// Returns the first policy in the range [start, end], if any.
// Locking: The va_block lock must be held.

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -145,7 +145,10 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_volta(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_volta(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
NvU32 depth,
uvm_page_directory_t *child_dir)
{
NvU32 entry_count = entries_per_index_volta(depth);
NvU64 *entry_bits = (NvU64 *)entry;

View File

@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
#ifdef __cplusplus
}
#endif

View File

@ -28,6 +28,11 @@
#include "nv-time.h"
#include <linux/mmzone.h>
#include <linux/numa.h>
#include <linux/pid.h>
extern char *NVreg_TemporaryFilePath;
#define MAX_ERROR_STRING 512
@ -2122,6 +2127,43 @@ void NV_API_CALL os_nv_cap_close_fd
nv_cap_close_fd(fd);
}
/*
* Reads the total memory and free memory of a NUMA node from the kernel.
*/
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage
(
NvS32 node_id,
NvU64 *free_memory_bytes,
NvU64 *total_memory_bytes
)
{
struct pglist_data *pgdat;
struct zone *zone;
NvU32 zone_id;
if (node_id >= MAX_NUMNODES)
{
nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n");
return NV_ERR_INVALID_ARGUMENT;
}
pgdat = NODE_DATA(node_id);
*free_memory_bytes = 0;
*total_memory_bytes = 0;
for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++)
{
zone = &(pgdat->node_zones[zone_id]);
if (!populated_zone(zone))
continue;
*free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE);
*total_memory_bytes += (zone->present_pages * PAGE_SIZE);
}
return NV_OK;
}
typedef struct os_numa_gpu_mem_hotplug_notifier_s
{
NvU64 start_pa;
@ -2373,3 +2415,28 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
#endif
}
void* NV_API_CALL os_get_pid_info(void)
{
return get_task_pid(current, PIDTYPE_PID);
}
void NV_API_CALL os_put_pid_info(void *pid_info)
{
if (pid_info != NULL)
put_pid(pid_info);
}
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid)
{
if ((pid_info == NULL) || (ns_pid == NULL))
return NV_ERR_INVALID_ARGUMENT;
*ns_pid = pid_vnr((struct pid *)pid_info);
// The call returns 0 if the PID is not found in the current ns
if (*ns_pid == 0)
return NV_ERR_OBJECT_NOT_FOUND;
return NV_OK;
}

View File

@ -4740,7 +4740,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
{
LinkTrainingType preferredTrainingType = trainType;
bool result;
bool bEnableFecOnSor;
//
// Validate link config against caps
//
@ -4832,16 +4832,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
result = postLTAdjustment(activeLinkConfig, force);
}
bEnableFecOnSor = lConfig.bEnableFEC;
if (main->isEDP())
{
DeviceImpl * nativeDev = findDeviceInList(Address());
if (nativeDev && nativeDev->bIsPreviouslyFakedMuxDevice)
bEnableFecOnSor = activeLinkConfig.bEnableFEC;
}
if((lConfig.lanes != 0) && result && bEnableFecOnSor)
if((lConfig.lanes != 0) && result && activeLinkConfig.bEnableFEC)
{
//
// Extended latency from link-train end to FEC enable pattern
@ -6057,7 +6048,7 @@ void ConnectorImpl::notifyLongPulseInternal(bool statusConnected)
if (this->bReassessMaxLink)
{
//
// If the highest assessed LC is not equal to
// If the highest assessed LC is not equal to
// max possible link config, re-assess link
//
NvU8 retries = 0U;

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r537_13
#define NV_BUILD_BRANCH r537_41
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r537_13
#define NV_PUBLIC_BRANCH r537_41
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_13-260"
#define NV_BUILD_CHANGELIST_NUM (33206197)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_41-286"
#define NV_BUILD_CHANGELIST_NUM (33292694)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_13-260"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33206197)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_41-286"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r537_13-1"
#define NV_BUILD_CHANGELIST_NUM (33194057)
#define NV_BUILD_BRANCH_VERSION "r537_41-1"
#define NV_BUILD_CHANGELIST_NUM (33292694)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "537.17"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33194057)
#define NV_BUILD_NAME "537.42"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.104.05"
#define NV_VERSION_STRING "535.113.01"
#else

View File

@ -20,7 +20,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fb_h_
#define __gh100_dev_fb_h_
#define NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT 8 /* */
@ -29,4 +29,25 @@
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI 0x00100A38 /* RW-4R */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR 31:0 /* RWIVF */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK 0x000FFFFF /* ----V */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#endif // __gh100_dev_fb_h_

View File

@ -0,0 +1,29 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fbpa_h_
#define __gh100_dev_fbpa_h_
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 4 /* */
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x009025A0+(i)*4) /* RW-4A */
#endif // __gh100_dev_fbpa_h_

View File

@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_ltc_h_
#define __gh100_dev_ltc_h_
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT 0x001404f8 /* RW-4R */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __gh100_dev_ltc_h_

View File

@ -0,0 +1,52 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_nv_xpl_h_
#define __gh100_dev_nv_xpl_h_
#define NV_XPL_DL_ERR_COUNT_RBUF 0x00000a54 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_RBUF__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT 0x00000a58 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_RESET 0x00000a5c /* RW-4R */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT 0:0 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT 1:1 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT 16:16 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT 17:17 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#endif // __gh100_dev_nv_xpl_h__

View File

@ -24,4 +24,7 @@
#ifndef __gh100_dev_xtl_ep_pri_h__
#define __gh100_dev_xtl_ep_pri_h__
#define NV_EP_PCFGM 0x92FFF:0x92000 /* RW--D */
#define NV_XTL_EP_PRI_DED_ERROR_STATUS 0x0000043C /* RW-4R */
#define NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS 0x000003C8 /* RW-4R */
#endif // __gh100_dev_xtl_ep_pri_h__

View File

@ -21,3 +21,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 52
#define NV_LTC_PRI_STRIDE 8192
#define NV_LTS_PRI_STRIDE 512
#define NV_FBPA_PRI_STRIDE 16384
#define NV_SCAL_LITTER_NUM_FBPAS 24
#define NV_XPL_BASE_ADDRESS 540672
#define NV_XTL_BASE_ADDRESS 593920

View File

@ -47,5 +47,17 @@
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT 3:3
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT_PENDING 0x1
#define NV_XAL_EP_SCPM_PRI_DUMMY_DATA_PATTERN_INIT 0xbadf0200
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT 0x0010f364 /* RW-4R */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT 0x0010f37c /* RW-4R */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __gh100_pri_nv_xal_ep_h__

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -635,4 +635,7 @@
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT 28:28 /* RWIVF */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_SUPPORTED 0x00000001 /* RWI-V */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_NOT_SUPPORTED 0x00000000 /* RW--V */
#define NV_NVLIPT_LNK_SCRATCH_WARM 0x000007c0 /* RW-4R */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA 31:0 /* RWEVF */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT 0xdeadbaad /* RWE-V */
#endif // __ls10_dev_nvlipt_lnk_ip_h__

View File

@ -439,6 +439,11 @@ NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/

View File

@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
#ifdef __cplusplus
}
#endif

View File

@ -198,3 +198,48 @@ nvlink_lib_is_registerd_device_with_reduced_config(void)
return bIsReducedConfg;
}
/*
* Get the number of devices that have the device type deviceType
*/
NvlStatus
nvlink_lib_return_device_count_by_type
(
NvU32 deviceType,
NvU32 *numDevices
)
{
NvlStatus lock_status = NVL_SUCCESS;
nvlink_device *dev = NULL;
NvU32 device_count = 0;
if (nvlink_lib_is_initialized())
{
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Loop through device list
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
if (dev->type == deviceType)
{
device_count++;
}
}
// Release top-level lock
nvlink_lib_top_lock_release();
}
*numDevices = device_count;
return NVL_SUCCESS;
}

View File

@ -213,6 +213,7 @@
_op(void, nvswitch_reset_persistent_link_hw_state, (nvswitch_device *device, NvU32 linkNumber), _arch)\
_op(void, nvswitch_store_topology_information, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(void, nvswitch_init_lpwr_regs, (nvlink_link *link), _arch) \
_op(void, nvswitch_program_l1_scratch_reg, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvlStatus, nvswitch_set_training_mode, (nvswitch_device *device), _arch) \
_op(NvU32, nvswitch_get_sublink_width, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvBool, nvswitch_i2c_is_device_access_allowed, (nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead), _arch) \

View File

@ -583,9 +583,12 @@ typedef struct
NvBool bDisabledRemoteEndLinkMaskCached;
} lr10_device;
#define NVSWITCH_NUM_DEVICES_PER_DELTA_LR10 6
typedef struct {
NvU32 switchPhysicalId;
NvU64 linkMask;
NvU64 accessLinkMask;
NvU64 trunkLinkMask;
} lr10_links_connected_to_disabled_remote_end;
#define NVSWITCH_GET_CHIP_DEVICE_LR10(_device) \
@ -649,6 +652,7 @@ void nvswitch_setup_link_loopback_mode_lr10(nvswitch_device *device, NvU32
void nvswitch_reset_persistent_link_hw_state_lr10(nvswitch_device *device, NvU32 linkNumber);
void nvswitch_store_topology_information_lr10(nvswitch_device *device, nvlink_link *link);
void nvswitch_init_lpwr_regs_lr10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_lr10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_set_training_mode_lr10(nvswitch_device *device);
NvBool nvswitch_i2c_is_device_access_allowed_lr10(nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead);
NvU32 nvswitch_get_sublink_width_lr10(nvswitch_device *device,NvU32 linkNumber);

View File

@ -529,10 +529,20 @@ typedef struct
{
NvBool bLinkErrorsCallBackEnabled;
NvBool bLinkStateCallBackEnabled;
NvBool bResetAndDrainRetry;
NvU64 lastRetrainTime;
NvU64 lastLinkUpTime;
} NVLINK_LINK_ERROR_REPORTING_STATE;
typedef struct
{
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
} NVLINK_LINK_ERROR_REPORTING_DATA;
typedef struct
{
NVLINK_LINK_ERROR_REPORTING_STATE state;
NVLINK_LINK_ERROR_REPORTING_DATA data;
} NVLINK_LINK_ERROR_REPORTING;
typedef struct
@ -834,7 +844,6 @@ typedef const struct
#define nvswitch_setup_link_loopback_mode_ls10 nvswitch_setup_link_loopback_mode_lr10
#define nvswitch_link_lane_reversed_ls10 nvswitch_link_lane_reversed_lr10
#define nvswitch_request_tl_link_state_ls10 nvswitch_request_tl_link_state_lr10
#define nvswitch_i2c_get_port_info_ls10 nvswitch_i2c_get_port_info_lr10
#define nvswitch_i2c_set_hw_speed_mode_ls10 nvswitch_i2c_set_hw_speed_mode_lr10
@ -929,6 +938,7 @@ void nvswitch_corelib_clear_link_state_lr10(nvlink_link *link);
NvlStatus nvswitch_corelib_set_dl_link_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
NvlStatus nvswitch_corelib_set_tx_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
void nvswitch_init_lpwr_regs_ls10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_ls10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device *device, NvU32 instance);
@ -986,6 +996,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
NvlStatus nvswitch_request_tl_link_state_ls10(nvlink_link *link, NvU32 tlLinkState, NvBool bSync);
//
// SU generated functions

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -46,6 +46,9 @@ typedef enum _MINION_STATUS
MINION_ALARM_BUSY = 80,
} MINION_STATUS;
#define LINKSTATUS_RESET 0x0
#define LINKSTATUS_UNINIT 0x1
#define LINKSTATUS_LANESHUTDOWN 0x13
#define LINKSTATUS_EMERGENCY_SHUTDOWN 0x29
#define LINKSTATUS_INITPHASE1 0x24
#define LINKSTATUS_ACTIVE_PENDING 0x25
#endif // _MINION_NVLINK_DEFINES_PUBLIC_H_

View File

@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@ -43,40 +43,68 @@
#include "nvswitch/lr10/dev_nvlipt_ip.h"
#include "nvswitch/lr10/dev_nport_ip.h"
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 8 // This must be incremented if any entries are added to the array below
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 12 // This must be incremented if any entries are added to the array below
lr10_links_connected_to_disabled_remote_end nvswitchDisconnetedRemoteLinkMasks[] =
{
{
0x8, // switchPhysicalId
0x56A000500 //linkMask
0x8, // switchPhysicalId
0x56A000500, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x9, // switchPhysicalId
0x509009900 //linkMask
0x9, // switchPhysicalId
0x509009900, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0xb, // switchPhysicalId
0x56A000600 //linkMask
0xa, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0xc, // switchPhysicalId
0x4A9009400 //linkMask
0xb, // switchPhysicalId
0x56A000600, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x18, // switchPhysicalId
0x56A000500 //linkMask
0xc, // switchPhysicalId
0x4A9009400, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x19, // switchPhysicalId
0x509009900 //linkMask
0xd, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1b, // switchPhysicalId
0x56A000600 //linkMask
0x18, // switchPhysicalId
0x56A000500, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1c, // switchPhysicalId
0x4A9009400 //linkMask
0x19, // switchPhysicalId
0x509009900, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1a, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1b, // switchPhysicalId
0x56A000600, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1c, // switchPhysicalId
0x4A9009400, // accessLinkMask
0xFF00FF // trunkLinkMask
},
{
0x1d, // switchPhysicalId
0x0, // accessLinkMask
0xFF00FF // trunkLinkMask
},
};
ct_assert(sizeof(nvswitchDisconnetedRemoteLinkMasks)/sizeof(lr10_links_connected_to_disabled_remote_end) == NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK);
@ -653,6 +681,15 @@ nvswitch_init_lpwr_regs_lr10
tempRegVal);
}
void
nvswitch_program_l1_scratch_reg_lr10
(
nvswitch_device *device,
NvU32 linkNumber
)
{
// Not Implemented for LR10
}
void
nvswitch_init_buffer_ready_lr10
@ -841,7 +878,6 @@ nvswitch_corelib_set_dl_link_mode_lr10
if (nvswitch_does_link_need_termination_enabled(device, link))
{
if (mode == NVLINK_LINKSTATE_INITPHASE1)
{
status = nvswitch_link_termination_setup(device, link);
@ -2372,6 +2408,8 @@ nvswitch_load_link_disable_settings_lr10
NvU32 val;
NVLINK_CONFIG_DATA_LINKENTRY *vbios_link_entry = NULL;
NVSWITCH_BIOS_NVLINK_CONFIG *bios_config;
NvlStatus status;
lr10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
bios_config = nvswitch_get_bios_nvlink_config(device);
if ((bios_config == NULL) || (bios_config->bit_address == 0))
@ -2412,15 +2450,16 @@ nvswitch_load_link_disable_settings_lr10
__FUNCTION__, link->linkNumber);
return;
}
val = FLD_SET_DRF(_NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, _LINK_DISABLE,
_DISABLED, val);
NVSWITCH_LINK_WR32_LR10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, val);
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
nvswitch_destroy_link(link);
status = nvswitch_link_termination_setup(device, link);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: Failed to enable termination on link #%d\n", __FUNCTION__, link->linkNumber);
return;
}
// add link to disabledRemoteEndLinkMask
chip_device->disabledRemoteEndLinkMask |= NVBIT64(link->linkNumber);
return;
}
@ -2488,6 +2527,8 @@ nvswitch_does_link_need_termination_enabled_lr10
NvU32 i;
NvU32 physicalId;
lr10_device *chip_device;
NvU32 numNvswitches;
NvlStatus status;
physicalId = nvswitch_read_physical_id(device);
chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
@ -2510,16 +2551,30 @@ nvswitch_does_link_need_termination_enabled_lr10
chip_device->disabledRemoteEndLinkMask = 0;
if (nvlink_lib_is_registerd_device_with_reduced_config())
{
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
{
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
{
chip_device->disabledRemoteEndLinkMask =
nvswitchDisconnetedRemoteLinkMasks[i].linkMask;
break;
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
{
chip_device->disabledRemoteEndLinkMask |=
nvswitchDisconnetedRemoteLinkMasks[i].accessLinkMask;
status = nvlink_lib_return_device_count_by_type(NVLINK_DEVICE_TYPE_NVSWITCH, &numNvswitches);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: Failed to get nvswitch device count!\n", __FUNCTION__);
break;
}
if (numNvswitches <= NVSWITCH_NUM_DEVICES_PER_DELTA_LR10)
{
chip_device->disabledRemoteEndLinkMask |=
nvswitchDisconnetedRemoteLinkMasks[i].trunkLinkMask;
}
break;
}
}
}
}
chip_device->bDisabledRemoteEndLinkMaskCached = NV_TRUE;
}

View File

@ -5525,7 +5525,7 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
INFOROM_NVLINK_ERROR_EVENT error_event;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@ -5565,13 +5565,13 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
NvU32 bit = BIT(localLinkIdx);
if (!chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.bPending)
if (!chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.bPending)
{
return;
}
// Grab the cached interrupt data
regData = chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.regData;
regData = chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.regData;
// get all possible interrupting links associated with this minion
report.raw_enable = link;
@ -5628,7 +5628,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
NvU32 bit = BIT(localLinkIdx);
if (!chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.bPending)
if (!chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.bPending)
{
return;
}
@ -5637,7 +5637,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN);
// Grab the cached interrupt data
regData = chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.regData;
regData = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.regData;
// get all possible interrupting links associated with this minion
report.raw_enable = link;
@ -5675,7 +5675,7 @@ _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10
NvU32 pending, bit, reg;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@ -5723,8 +5723,8 @@ _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10
NvU32 injected;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1;
injected = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1;
injected = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@ -5760,7 +5760,7 @@ _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10
INFOROM_NVLINK_ERROR_EVENT error_event;
// Only enabled link errors are deffered
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk;
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk;
report.raw_pending = pending;
report.raw_enable = pending;
report.mask = report.raw_enable;
@ -5805,11 +5805,11 @@ _nvswitch_clear_deferred_link_errors_ls10
)
{
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
NVLINK_LINK_ERROR_REPORTING *pLinkErrors;
NVLINK_LINK_ERROR_REPORTING_DATA *pLinkErrorsData;
pLinkErrors = &chip_device->deferredLinkErrors[link];
pLinkErrorsData = &chip_device->deferredLinkErrors[link].data;
nvswitch_os_memset(pLinkErrors, 0, sizeof(NVLINK_LINK_ERROR_REPORTING));
nvswitch_os_memset(pLinkErrorsData, 0, sizeof(NVLINK_LINK_ERROR_REPORTING_DATA));
}
static void
@ -5824,36 +5824,47 @@ _nvswitch_deferred_link_state_check_ls10
NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance;
NvU32 link = pErrorReportParams->link;
ls10_device *chip_device;
nvlink_link *pLink;
NvU64 linkState;
NvU64 lastLinkUpTime;
NvU64 lastRetrainTime;
NvU64 current_time = nvswitch_os_get_platform_time();
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
pLink = nvswitch_get_link(device, pErrorReportParams->link);
lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime;
lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime;
// If is there a retry for reset_and_drain then re-create the state check for the current link
if (chip_device->deferredLinkErrors[link].bResetAndDrainRetry == NV_TRUE)
// Sanity Check
NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link));
nvswitch_os_free(pErrorReportParams);
pErrorReportParams = NULL;
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE;
// Link came up after last retrain
if (lastLinkUpTime >= lastRetrainTime)
{
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
return;
}
if ((pLink == NULL) ||
(device->hal.nvswitch_corelib_get_dl_link_mode(pLink, &linkState) != NVL_SUCCESS) ||
((linkState != NVLINK_LINKSTATE_HS) && (linkState != NVLINK_LINKSTATE_SLEEP)))
//
// If the last time this link was up was before the last
// reset_and_drain execution and not enough time has past since the last
// retrain then schedule another callback.
//
if (lastLinkUpTime < lastRetrainTime)
{
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
if ((current_time - lastRetrainTime) < NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS)
{
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
return;
}
}
//
// Otherwise, the link hasn't retrained within the timeout so emit the
// deferred errors.
//
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
nvswitch_os_free(pErrorReportParams);
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_FALSE;
}
void
@ -5868,7 +5879,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
NvlStatus status;
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
if (chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled)
{
return;
}
@ -5889,7 +5900,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
if (status == NVL_SUCCESS)
{
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_TRUE;
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_TRUE;
}
else
{
@ -5916,25 +5927,29 @@ _nvswitch_deferred_link_errors_check_ls10
ls10_device *chip_device;
NvU32 pending;
nvswitch_os_free(pErrorReportParams);
pErrorReportParams = NULL;
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_FALSE;
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending) ||
FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending) )
{
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
}
else
{
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
}
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
if (pErrorReportParams)
{
nvswitch_os_free(pErrorReportParams);
}
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
// A link fault was observed which means we also did the retrain and
// scheduled a state check task. We can exit.
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending))
return;
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending))
return;
//
// No link fault, emit the deferred errors.
// It is assumed that this callback runs long before a link could have been
// retrained and hit errors again.
//
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
_nvswitch_clear_deferred_link_errors_ls10(device, link);
}
static void
@ -5949,13 +5964,11 @@ _nvswitch_create_deferred_link_errors_task_ls10
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
NvlStatus status;
if (chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled)
if (chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled)
{
return;
}
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
status = NVL_ERR_GENERIC;
pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
if(pErrorReportParams != NULL)
@ -5972,7 +5985,7 @@ _nvswitch_create_deferred_link_errors_task_ls10
if (status == NVL_SUCCESS)
{
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_TRUE;
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_TRUE;
}
else
{
@ -6026,7 +6039,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1);
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
}
@ -6049,7 +6062,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
@ -6344,8 +6357,8 @@ _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10
bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1);
if (nvswitch_test_flags(pending, bit))
{
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1 |= bit;
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected |= injected;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1 |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected |= injected;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected))
@ -6628,8 +6641,10 @@ _nvswitch_service_nvlipt_lnk_status_ls10
NvU32 pending, enabled, unhandled, bit;
NvU64 mode;
nvlink_link *link;
link = nvswitch_get_link(device, link_id);
ls10_device *chip_device;
link = nvswitch_get_link(device, link_id);
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS);
enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN);
pending &= enabled;
@ -6669,7 +6684,13 @@ _nvswitch_service_nvlipt_lnk_status_ls10
//
nvswitch_corelib_training_complete_ls10(link);
nvswitch_init_buffer_ready(device, link, NV_TRUE);
link->bRxDetected = NV_TRUE;
link->bRxDetected = NV_TRUE;
//
// Clear out any cached interrupts for the link and update the last link up timestamp
//
_nvswitch_clear_deferred_link_errors_ls10(device, link_id);
chip_device->deferredLinkErrors[link_id].state.lastLinkUpTime = nvswitch_os_get_platform_time();
}
else if (mode == NVLINK_LINKSTATE_FAULT)
{
@ -6706,8 +6727,6 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
)
{
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
nvlink_link *link_info = nvswitch_get_link(device, link);
NvU32 lnkStateRequest, linkState;
NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
NvU32 pending, bit, unhandled;
INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
@ -6743,27 +6762,10 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
if (nvswitch_test_flags(pending, bit))
{
//
// Read back LINK_STATE_REQUESTS and TOP_LINK_STATE registers
// If request == ACTIVE and TOP_LINK_STATE == FAULT there is a pending
// fault on training so re-run reset_and_drain
// Mark that the defered link error mechanism as seeing a reset_and_train re-try so
// the deferred task needs to re-create itself instead of continuing with the linkstate
// checks
// based off of HW's assertion. FAILEDMINIONREQUEST always trails a DL fault. So no need to
// do reset_and_drain here
//
linkState = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, NVLDL,
_NVLDL, _TOP_LINK_STATE);
lnkStateRequest = NVSWITCH_LINK_RD32_LS10(device, link,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_REQUEST);
if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, _ACTIVE, lnkStateRequest) &&
linkState == NV_NVLDL_TOP_LINK_STATE_STATE_FAULT)
{
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
}
chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk |= bit;
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk |= bit;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
nvswitch_clear_flags(&unhandled, bit);
}
@ -7001,9 +7003,9 @@ _nvswitch_service_nvlw_nonfatal_ls10
return NVL_SUCCESS;
}
status[0] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
status[1] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
status[2] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
status[0] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
status[1] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
status[2] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) &&
(status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) &&
@ -7373,6 +7375,28 @@ nvswitch_lib_service_interrupts_ls10
// 2. Clear leaf interrupt
// 3. Run leaf specific interrupt handler
//
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
if (val != 0)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
__FUNCTION__, val);
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
{
if (val & NVBIT(i))
{
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
__FUNCTION__, i, status);
return_status = status;
}
}
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val);
if (val != 0)
@ -7397,28 +7421,6 @@ nvswitch_lib_service_interrupts_ls10
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
if (val != 0)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
__FUNCTION__, val);
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
{
if (val & NVBIT(i))
{
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
__FUNCTION__, i, status);
return_status = status;
}
}
}
}
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE);
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val);
if (val != 0)
@ -7757,16 +7759,16 @@ nvswitch_service_nvldl_fatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
{
dlDeferredIntrLinkMask |= bit;
dlDeferredIntrLinkMask |= bit;
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
nvswitch_clear_flags(&unhandled, bit);
}
@ -7774,41 +7776,25 @@ nvswitch_service_nvldl_fatal_link_ls10
if (nvswitch_test_flags(pending, bit))
{
{
dlDeferredIntrLinkMask |= bit;
dlDeferredIntrLinkMask |= bit;
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
//
// Since reset and drain will reset the link, including clearing
// pending interrupts, skip the clear write below. There are cases
// where link clocks will not be on after reset and drain so there
// maybe PRI errors on writing to the register
//
bRequireResetAndDrain = NV_TRUE;
}
nvswitch_clear_flags(&unhandled, bit);
}
if (bRequireResetAndDrain)
{
//
// If there is a link state callback enabled for this link then
// we hit a consecutive FAULT_UP error. set bResetAndDrainRetry
// so the current callback on completion can create a new
// callback to retry the link state check to account for the added
// delay caused by taking a 2nd fault and having to re-train
//
// If there is no callback enabled then set the error mask
// and create the link errors deferred task.
//
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
{
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
}
else
{
chip_device->deferredLinkErrors[link].fatalIntrMask.dl = dlDeferredIntrLinkMask;
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
}
chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl |= dlDeferredIntrLinkMask;
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
chip_device->deferredLinkErrors[link].state.lastRetrainTime = nvswitch_os_get_platform_time();
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
}
NVSWITCH_UNHANDLED_CHECK(device, unhandled);
@ -7916,7 +7902,7 @@ nvswitch_service_minion_link_ls10
case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT:
case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL:
case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT:
chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr =
chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr =
minionLinkIntr;
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
break;
@ -7928,7 +7914,7 @@ nvswitch_service_minion_link_ls10
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ:
chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr =
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr =
minionLinkIntr;
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY:

View File

@ -98,6 +98,30 @@ _nvswitch_configure_reserved_throughput_counters
DRF_DEF(_NVLTLC_TX_LNK, _DEBUG_TP_CNTR_CTRL_0, _ENABLE, _ENABLE));
}
void
nvswitch_program_l1_scratch_reg_ls10
(
nvswitch_device *device,
NvU32 linkNumber
)
{
NvU32 scrRegVal;
NvU32 tempRegVal;
// Read L1 register and store initial/VBIOS L1 Threshold Value in Scratch register
tempRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD);
scrRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM);
// Update the scratch register value only if it has not been written to before
if (scrRegVal == NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT)
{
NVSWITCH_LINK_WR32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM, tempRegVal);
}
}
#define BUG_3797211_LS10_VBIOS_VERSION 0x9610410000
void
nvswitch_init_lpwr_regs_ls10
(
@ -110,33 +134,56 @@ nvswitch_init_lpwr_regs_ls10
NvU32 tempRegVal, lpEntryThreshold;
NvU8 softwareDesired;
NvBool bLpEnable;
NvU64 biosVersion;
if (device->regkeys.enable_pm == NV_SWITCH_REGKEY_ENABLE_PM_NO)
{
return;
}
// bios_config = nvswitch_get_bios_nvlink_config(device);
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
if (nvswitch_lib_get_bios_version(device, &biosVersion) != NVL_SUCCESS)
{
//
// TODO: get from bios. Refer Bug 3626523 for more info.
//
// The threshold is measured in 100us unit. So lpEntryThreshold = 1
// means the threshold is set to 100us in the register.
//
lpEntryThreshold = 1;
NVSWITCH_PRINT(device, WARN, "%s Get VBIOS version failed.\n",
__FUNCTION__);
biosVersion = 0;
}
// bios_config = nvswitch_get_bios_nvlink_config(device);
if (biosVersion >= BUG_3797211_LS10_VBIOS_VERSION)
{
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
{
//
// Do nothing since VBIOS (version 96.10.41.00.00 and above)
// sets the default L1 threshold.
// Refer Bug 3797211 for more info.
//
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
}
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
}
// IC Enter Threshold
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
{
lpEntryThreshold = 1;
}
else
{
lpEntryThreshold = device->regkeys.lp_threshold;
}
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
tempRegVal = 0;
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
}
//LP Entry Enable
bLpEnable = NV_TRUE;
@ -1423,7 +1470,7 @@ nvswitch_load_link_disable_settings_ls10
nvswitch_device *device,
nvlink_link *link
)
{
{
NvU32 regVal;
// Read state from NVLIPT HW
@ -1432,7 +1479,7 @@ nvswitch_load_link_disable_settings_ls10
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
{
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
@ -1473,7 +1520,7 @@ nvswitch_execute_unilateral_link_shutdown_ls10
// Status is explicitly ignored here since we are required to soldier-on
// in this scenario
//
status = nvswitch_request_tl_link_state_lr10(link,
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN, NV_TRUE);
if (status == NVL_SUCCESS)
@ -1492,22 +1539,22 @@ nvswitch_execute_unilateral_link_shutdown_ls10
{
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
NVSWITCH_PRINT(device, INFO,
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
__FUNCTION__, link_intr_subcode);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
NVSWITCH_PRINT(device, INFO,
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
__FUNCTION__, link_intr_subcode);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
}
else
{
@ -1542,6 +1589,12 @@ nvswitch_reset_and_train_link_ls10
nvswitch_execute_unilateral_link_shutdown_ls10(link);
nvswitch_corelib_clear_link_state_ls10(link);
//
// When a link faults there could be a race between the driver requesting
// reset and MINION processing Emergency Shutdown. Minion will notify if
// such a collision happens and will deny the reset request, so try the
// request up to 3 times
//
do
{
status = nvswitch_request_tl_link_state_ls10(link,
@ -1565,24 +1618,24 @@ nvswitch_reset_and_train_link_ls10
{
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
{
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
status = nvswitch_request_tl_link_state_ls10(link,
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
//
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
// or MINION_ALARM_BUSY
//
retry_count--;
}
else
{
break;
}
}
else
{
break;
}
}
else
{
// failed to query minion for the link_intr_subcode so retry
@ -1597,15 +1650,18 @@ nvswitch_reset_and_train_link_ls10
"%s: NvLink Reset has failed for link %d\n",
__FUNCTION__, link->linkNumber);
// Re-register links.
status = nvlink_lib_register_link(device->nvlink_device, link);
if (status != NVL_SUCCESS)
{
nvswitch_destroy_link(link);
return status;
}
return status;
}
status = nvswitch_launch_ALI_link_training(device, link, NV_FALSE);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR,
"%s: NvLink failed to request ACTIVE for link %d\n",
__FUNCTION__, link->linkNumber);
return status;
}
return NVL_SUCCESS;
}
@ -1657,6 +1713,76 @@ nvswitch_are_link_clocks_on_ls10
return NV_TRUE;
}
NvlStatus
nvswitch_request_tl_link_state_ls10
(
nvlink_link *link,
NvU32 tlLinkState,
NvBool bSync
)
{
nvswitch_device *device = link->dev->pDevInfo;
NvlStatus status = NVL_SUCCESS;
NvU32 linkStatus;
NvU32 lnkErrStatus;
NvU32 bit;
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
{
NVSWITCH_PRINT(device, ERROR,
"%s: link #%d invalid\n",
__FUNCTION__, link->linkNumber);
return -NVL_UNBOUND_DEVICE;
}
// Wait for the TL link state register to report ready
status = nvswitch_wait_for_tl_request_ready_lr10(link);
if (status != NVL_SUCCESS)
{
return status;
}
// Clear any pending FAILEDMINIONREQUEST status that maybe populated as it is stale now
bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
lnkErrStatus = NVSWITCH_LINK_RD32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
if (nvswitch_test_flags(lnkErrStatus, bit))
{
NVSWITCH_LINK_WR32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0,
bit);
}
// Request state through CTRL_LINK_STATE_REQUEST
NVSWITCH_LINK_WR32_LS10(device, link->linkNumber,
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST,
DRF_NUM(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, tlLinkState));
if (bSync)
{
// Wait for the TL link state register to complete
status = nvswitch_wait_for_tl_request_ready_lr10(link);
if (status != NVL_SUCCESS)
{
return status;
}
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
tlLinkState)
{
NVSWITCH_PRINT(device, ERROR,
"%s: TL link state request to state 0x%x for link #%d did not complete!\n",
__FUNCTION__, tlLinkState, link->linkNumber);
return -NVL_ERR_GENERIC;
}
}
return status;
}
NvBool
nvswitch_does_link_need_termination_enabled_ls10
(

View File

@ -1353,7 +1353,53 @@ nvswitch_init_warm_reset_ls10
)
{
NVSWITCH_PRINT(device, WARN, "%s: Function not implemented\n", __FUNCTION__);
}
}
//
// Helper funcction to query MINION to see if DL clocks are on
// return NV_TRUE if the clocks are on
// NV_FALSE if the clocks are off
static
NvBool
_nvswitch_are_dl_clocks_on
(
nvswitch_device *device,
NvU32 linkNumber
)
{
NvU32 link_state;
NvU32 stat_data;
NvlStatus status = NVL_SUCCESS;
nvlink_link * link= nvswitch_get_link(device, linkNumber);
if (link == NULL)
{
NVSWITCH_PRINT(device, ERROR, "%s: invalid link %d\n",
__FUNCTION__, linkNumber);
return NV_FALSE;
}
status = nvswitch_minion_get_dl_status(device, linkNumber,
NV_NVLSTAT_UC01, 0, &stat_data);
if (status != NVL_SUCCESS)
{
return NV_FALSE;
}
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
switch(link_state)
{
case LINKSTATUS_RESET:
case LINKSTATUS_UNINIT:
return NV_FALSE;
case LINKSTATUS_LANESHUTDOWN:
case LINKSTATUS_ACTIVE_PENDING:
return nvswitch_are_link_clocks_on_ls10(device, link,
NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK));
}
return NV_TRUE;
}
//
// Implement reset and drain sequence for ls10
@ -1586,10 +1632,10 @@ nvswitch_reset_and_drain_links_ls10
nvswitch_soe_restore_nport_state_ls10(device, link);
// Step 7.0 : Re-program the routing table for DBEs
// Step 8.0 : Reset NVLW and NPORT interrupt state
_nvswitch_link_reset_interrupts_ls10(device, link);
// Re-register links.
status = nvlink_lib_register_link(device->nvlink_device, link_info);
if (status != NVL_SUCCESS)
@ -1625,21 +1671,9 @@ nvswitch_reset_and_drain_links_ls10
do
{
bKeepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
bAreDlClocksOn = _nvswitch_are_dl_clocks_on(device, link);
status = nvswitch_minion_get_dl_status(device, link_info->linkNumber,
NV_NVLSTAT_UC01, 0, &stat_data);
if (status != NVL_SUCCESS)
{
continue;
}
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
bAreDlClocksOn = (link_state != LINKSTATUS_INITPHASE1) ?
NV_TRUE:NV_FALSE;
if (bAreDlClocksOn == NV_TRUE)
if (bAreDlClocksOn)
{
break;
}

View File

@ -1345,7 +1345,6 @@ nvswitch_lib_initialize_device
NvU8 link_num;
nvlink_link *link = NULL;
NvBool is_blacklisted_by_os = NV_FALSE;
NvU64 mode;
if (!NVSWITCH_IS_DEVICE_ACCESSIBLE(device))
{
@ -1508,18 +1507,19 @@ nvswitch_lib_initialize_device
nvswitch_reset_persistent_link_hw_state(device, link_num);
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
//
// During Nvswitch initialization, the default L1 thresholds are programmed by the
// BIOS from the BIOS tables. Save these L1 Threshold Values in scratch registers
// for use when resetting the thresholds to default.
//
nvswitch_program_l1_scratch_reg(device, link_num);
//
// WAR : Initializing the L1 threshold registers at this point as a WAR for
// Bug 3963639 where it was discussed that the L1 threshold register should have
// the default value for all available links and not just for active links.
//
nvswitch_init_lpwr_regs(link);
}
retval = nvswitch_set_training_mode(device);
@ -1623,6 +1623,10 @@ nvswitch_lib_post_init_device
)
{
NvlStatus retval;
NvlStatus status;
NvU32 link_num;
NvU64 mode;
nvlink_link *link;
if (!NVSWITCH_IS_DEVICE_INITIALIZED(device))
{
@ -1634,7 +1638,7 @@ nvswitch_lib_post_init_device
{
return retval;
}
if (nvswitch_is_bios_supported(device))
{
retval = nvswitch_bios_get_image(device);
@ -1670,6 +1674,41 @@ nvswitch_lib_post_init_device
(void)nvswitch_launch_ALI(device);
}
//
// There is an edge case where a hypervisor may not send same number
// of reset to switch and GPUs, so try to re-train links in fault
// if possible
//
for (link_num=0; link_num < nvswitch_get_num_links(device); link_num++)
{
// Sanity check
if (!nvswitch_is_link_valid(device, link_num))
{
continue;
}
status = nvlink_lib_get_link(device->nvlink_device, link_num, &link);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: Failed to get link for LinkId %d\n",
__FUNCTION__, link_num);
continue;
}
// If the link is in fault then re-train
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
__FUNCTION__, link_num);
}
else if(mode == NVLINK_LINKSTATE_FAULT)
{
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
__FUNCTION__, link_num);
nvswitch_reset_and_train_link(device, link);
}
}
return NVL_SUCCESS;
}
@ -4617,6 +4656,16 @@ nvswitch_init_lpwr_regs
device->hal.nvswitch_init_lpwr_regs(link);
}
void
nvswitch_program_l1_scratch_reg
(
nvswitch_device *device,
NvU32 linkNumber
)
{
device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
}
NvlStatus
nvswitch_launch_ALI
(

View File

@ -121,7 +121,8 @@
#define NVLINK_FLA_PRIV_ERR (137)
#define ROBUST_CHANNEL_DLA_ERROR (138)
#define ROBUST_CHANNEL_FAST_PATH_ERROR (139)
#define ROBUST_CHANNEL_LAST_ERROR (ROBUST_CHANNEL_FAST_PATH_ERROR)
#define UNRECOVERABLE_ECC_ERROR_ESCAPE (140)
#define ROBUST_CHANNEL_LAST_ERROR (UNRECOVERABLE_ECC_ERROR_ESCAPE)
// Indexed CE reference

View File

@ -95,7 +95,7 @@ NV_CRASHCAT_PACKET_FORMAT_VERSION crashcatPacketHeaderFormatVersion(NvCrashCatPa
static NV_INLINE
NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
{
NvU8 unitShift;
NvU8 unitShift = 0;
NV_CRASHCAT_MEM_UNIT_SIZE unitSize =
(NV_CRASHCAT_MEM_UNIT_SIZE)DRF_VAL64(_CRASHCAT, _PACKET_HEADER, _PAYLOAD_UNIT_SIZE, hdr);
switch (unitSize)
@ -104,7 +104,6 @@ NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
case NV_CRASHCAT_MEM_UNIT_SIZE_1KB: unitShift = 10; break;
case NV_CRASHCAT_MEM_UNIT_SIZE_4KB: unitShift = 12; break;
case NV_CRASHCAT_MEM_UNIT_SIZE_64KB: unitShift = 16; break;
default: return 0;
}
// Increment size, since the size in the header is size - 1 (payload of 0 size is not encodable)

View File

@ -1221,6 +1221,29 @@ typedef struct _NVEvoDevRec {
} apiHead[NVKMS_MAX_HEADS_PER_DISP];
} NVDevEvoRec;
static inline NvBool nvEvoIsConsoleActive(const NVDevEvoRec *pDevEvo)
{
/*
* If (pDevEvo->modesetOwner == NULL) that means either the vbios
* console or the NVKMS console might be active.
*/
if (pDevEvo->modesetOwner == NULL) {
return TRUE;
}
/*
* If (pDevEvo->modesetOwner != NULL) but
* pDevEvo->modesetOwnerChanged is TRUE, that means the modeset
* ownership is grabbed by the external client but it hasn't
* performed any modeset and the console is still active.
*/
if ((pDevEvo->modesetOwner != NULL) && pDevEvo->modesetOwnerChanged) {
return TRUE;
}
return FALSE;
}
/*
* The NVHwModeTimingsEvo structure stores all the values necessary to
* perform a modeset with EVO

View File

@ -274,8 +274,12 @@ NvU32 nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr, size_t l2CacheSize)
NvU32 i;
NvU32 status;
/* If DIFR is disabled it's because we know we were or will be flipping. */
if (pDifr->hwDisabled) {
/*
* If DIFR is disabled it's because we know we were or will be flipping, or
* if console is active then the scanout surfaces will get updated by the
* OS console driver without any knowledge of NVKMS.
*/
if (pDifr->hwDisabled || nvEvoIsConsoleActive(pDevEvo)) {
return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
}

View File

@ -1069,23 +1069,11 @@ void nvHsConfigInitSwapGroup(
NVHsChannelConfig *pChannelConfig = &pHsConfigOneHead->channelConfig;
/*
* If (pDevEvo->modesetOwner == NULL) that means either the vbios
* console or the NVKMS console might be active, the console
* surface may not be set up to be the source of headSurface
* operations, and NVKMS may be unloaded, so we can't have the
* display rely on headSurface.
* The console surface may not be set up to be the source of
* headSurface operations, and NVKMS may be unloaded, so we can't
* have the display rely on headSurface.
*/
if (pDevEvo->modesetOwner == NULL) {
continue;
}
/*
* If (pDevEvo->modesetOwner != NULL) but
* pDevEvo->modesetOwnerChanged is TRUE, that means the modeset
* ownership is grabbed by the external client but it hasn't
* performed any modeset and the console is still active.
*/
if ((pDevEvo->modesetOwner != NULL) && pDevEvo->modesetOwnerChanged) {
if (nvEvoIsConsoleActive(pDevEvo)) {
continue;
}

View File

@ -186,6 +186,7 @@ CSINFO chipsetInfo[] =
{PCI_VENDOR_ID_INTEL, 0x7A82, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc},
{PCI_VENDOR_ID_INTEL, 0x7A84, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc},
{PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", NULL},
{PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", NULL},
{PCI_VENDOR_ID_INTEL, 0x18DC, CS_INTEL_18DC, "Intel-IceLake", NULL},
{PCI_VENDOR_ID_INTEL, 0x7A04, CS_INTEL_7A04, "Intel-RaptorLake", Intel_7A04_setupFunc},

View File

@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;

View File

@ -684,6 +684,21 @@ NV_STATUS osGetCurrentThread(OS_THREAD_HANDLE *pThreadId)
return rmStatus;
}
void* osGetPidInfo(void)
{
return os_get_pid_info();
}
void osPutPidInfo(void *pOsPidInfo)
{
os_put_pid_info(pOsPidInfo);
}
NV_STATUS osFindNsPid(void *pOsPidInfo, NvU32 *pNsPid)
{
return os_find_ns_pid(pOsPidInfo, pNsPid);
}
NV_STATUS osAttachToProcess(void** ppProcessInfo, NvU32 ProcessId)
{
//
@ -5371,6 +5386,28 @@ osReleaseGpuOsInfo
nv_put_file_private(pOsInfo);
}
/*!
* @brief Get free, total memory of a NUMA node by NUMA node ID from kernel.
*
* @param[in] numaId NUMA node ID.
* @param[out] free_memory_bytes free memory in bytes.
* @param[out] total_memory_bytes total memory in bytes.
*
*/
void
osGetNumaMemoryUsage
(
NvS32 numaId,
NvU64 *free_memory_bytes,
NvU64 *total_memory_bytes
)
{
NV_STATUS status = os_get_numa_node_memory_usage(numaId,
free_memory_bytes,
total_memory_bytes);
NV_ASSERT(status == NV_OK);
}
/*!
* @brief Add GPU memory as a NUMA node.
*

View File

@ -140,6 +140,7 @@ struct RmClient {
NvU32 Flags;
NvU32 ClientDebuggerState;
void *pOSInfo;
void *pOsPidInfo;
char name[100];
CLI_SYSTEM_EVENT_INFO CliSysEventInfo;
PSECURITY_TOKEN pSecurityToken;

View File

@ -492,6 +492,17 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
pThis->__gpuWriteFunctionConfigRegEx__ = &gpuWriteFunctionConfigRegEx_GM107;
}
// Hal function -- gpuReadVgpuConfigReg
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__gpuReadVgpuConfigReg__ = &gpuReadVgpuConfigReg_GH100;
}
// default
else
{
pThis->__gpuReadVgpuConfigReg__ = &gpuReadVgpuConfigReg_46f6a7;
}
// Hal function -- gpuGetIdInfo
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{

View File

@ -877,6 +877,7 @@ struct OBJGPU {
NV_STATUS (*__gpuReadFunctionConfigReg__)(struct OBJGPU *, NvU32, NvU32, NvU32 *);
NV_STATUS (*__gpuWriteFunctionConfigReg__)(struct OBJGPU *, NvU32, NvU32, NvU32);
NV_STATUS (*__gpuWriteFunctionConfigRegEx__)(struct OBJGPU *, NvU32, NvU32, NvU32, THREAD_STATE_NODE *);
NV_STATUS (*__gpuReadVgpuConfigReg__)(struct OBJGPU *, NvU32, NvU32 *);
void (*__gpuGetIdInfo__)(struct OBJGPU *);
void (*__gpuHandleSanityCheckRegReadError__)(struct OBJGPU *, NvU32, NvU32);
void (*__gpuHandleSecFault__)(struct OBJGPU *);
@ -1007,6 +1008,9 @@ struct OBJGPU {
NvU32 subdeviceInstance;
NvS32 numaNodeId;
_GPU_UUID gpuUuid;
NvU32 gpuPhysicalId;
NvU32 gpuTerminatedLinkMask;
NvBool gpuLinkTerminationEnabled;
NvBool gspRmInitialized;
_GPU_PCIE_PEER_CLIQUE pciePeerClique;
NvU32 i2cPortForExtdev;
@ -1427,6 +1431,8 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
#define gpuWriteFunctionConfigReg_HAL(pGpu, function, reg, data) gpuWriteFunctionConfigReg_DISPATCH(pGpu, function, reg, data)
#define gpuWriteFunctionConfigRegEx(pGpu, function, reg, data, pThreadState) gpuWriteFunctionConfigRegEx_DISPATCH(pGpu, function, reg, data, pThreadState)
#define gpuWriteFunctionConfigRegEx_HAL(pGpu, function, reg, data, pThreadState) gpuWriteFunctionConfigRegEx_DISPATCH(pGpu, function, reg, data, pThreadState)
#define gpuReadVgpuConfigReg(pGpu, index, data) gpuReadVgpuConfigReg_DISPATCH(pGpu, index, data)
#define gpuReadVgpuConfigReg_HAL(pGpu, index, data) gpuReadVgpuConfigReg_DISPATCH(pGpu, index, data)
#define gpuGetIdInfo(pGpu) gpuGetIdInfo_DISPATCH(pGpu)
#define gpuGetIdInfo_HAL(pGpu) gpuGetIdInfo_DISPATCH(pGpu)
#define gpuHandleSanityCheckRegReadError(pGpu, addr, value) gpuHandleSanityCheckRegReadError_DISPATCH(pGpu, addr, value)
@ -2422,6 +2428,19 @@ static inline void gpuUpdateUserSharedData(struct OBJGPU *pGpu) {
#define gpuUpdateUserSharedData_HAL(pGpu) gpuUpdateUserSharedData(pGpu)
void gpuGetTerminatedLinkMask_GA100(struct OBJGPU *pGpu, NvU32 arg0);
#ifdef __nvoc_gpu_h_disabled
static inline void gpuGetTerminatedLinkMask(struct OBJGPU *pGpu, NvU32 arg0) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
}
#else //__nvoc_gpu_h_disabled
#define gpuGetTerminatedLinkMask(pGpu, arg0) gpuGetTerminatedLinkMask_GA100(pGpu, arg0)
#endif //__nvoc_gpu_h_disabled
#define gpuGetTerminatedLinkMask_HAL(pGpu, arg0) gpuGetTerminatedLinkMask(pGpu, arg0)
NV_STATUS gpuJtVersionSanityCheck_TU102(struct OBJGPU *pGpu);
@ -2970,6 +2989,16 @@ static inline NV_STATUS gpuWriteFunctionConfigRegEx_DISPATCH(struct OBJGPU *pGpu
return pGpu->__gpuWriteFunctionConfigRegEx__(pGpu, function, reg, data, pThreadState);
}
NV_STATUS gpuReadVgpuConfigReg_GH100(struct OBJGPU *pGpu, NvU32 index, NvU32 *data);
static inline NV_STATUS gpuReadVgpuConfigReg_46f6a7(struct OBJGPU *pGpu, NvU32 index, NvU32 *data) {
return NV_ERR_NOT_SUPPORTED;
}
static inline NV_STATUS gpuReadVgpuConfigReg_DISPATCH(struct OBJGPU *pGpu, NvU32 index, NvU32 *data) {
return pGpu->__gpuReadVgpuConfigReg__(pGpu, index, data);
}
void gpuGetIdInfo_GM107(struct OBJGPU *pGpu);
void gpuGetIdInfo_GH100(struct OBJGPU *pGpu);

View File

@ -137,10 +137,14 @@ void __nvoc_dtor_KernelFsp(KernelFsp *pThis) {
void __nvoc_init_dataField_KernelFsp(KernelFsp *pThis, RmHalspecOwner *pRmhalspecowner) {
ChipHal *chipHal = &pRmhalspecowner->chipHal;
const unsigned long chipHal_HalVarIdx = (unsigned long)chipHal->__nvoc_HalVarIdx;
RmVariantHal *rmVariantHal = &pRmhalspecowner->rmVariantHal;
const unsigned long rmVariantHal_HalVarIdx = (unsigned long)rmVariantHal->__nvoc_HalVarIdx;
PORT_UNREFERENCED_VARIABLE(pThis);
PORT_UNREFERENCED_VARIABLE(pRmhalspecowner);
PORT_UNREFERENCED_VARIABLE(chipHal);
PORT_UNREFERENCED_VARIABLE(chipHal_HalVarIdx);
PORT_UNREFERENCED_VARIABLE(rmVariantHal);
PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx);
// NVOC Property Hal field -- PDB_PROP_KFSP_IS_MISSING
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
@ -152,6 +156,12 @@ void __nvoc_init_dataField_KernelFsp(KernelFsp *pThis, RmHalspecOwner *pRmhalspe
{
pThis->setProperty(pThis, PDB_PROP_KFSP_IS_MISSING, ((NvBool)(0 == 0)));
}
// NVOC Property Hal field -- PDB_PROP_KFSP_DISABLE_FRTS_SYSMEM
if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* RmVariantHal: PF_KERNEL_ONLY */
{
pThis->setProperty(pThis, PDB_PROP_KFSP_DISABLE_FRTS_SYSMEM, ((NvBool)(0 == 0)));
}
}
NV_STATUS __nvoc_ctor_OBJENGSTATE(OBJENGSTATE* );
@ -171,10 +181,14 @@ __nvoc_ctor_KernelFsp_exit:
static void __nvoc_init_funcTable_KernelFsp_1(KernelFsp *pThis, RmHalspecOwner *pRmhalspecowner) {
ChipHal *chipHal = &pRmhalspecowner->chipHal;
const unsigned long chipHal_HalVarIdx = (unsigned long)chipHal->__nvoc_HalVarIdx;
RmVariantHal *rmVariantHal = &pRmhalspecowner->rmVariantHal;
const unsigned long rmVariantHal_HalVarIdx = (unsigned long)rmVariantHal->__nvoc_HalVarIdx;
PORT_UNREFERENCED_VARIABLE(pThis);
PORT_UNREFERENCED_VARIABLE(pRmhalspecowner);
PORT_UNREFERENCED_VARIABLE(chipHal);
PORT_UNREFERENCED_VARIABLE(chipHal_HalVarIdx);
PORT_UNREFERENCED_VARIABLE(rmVariantHal);
PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx);
pThis->__kfspConstructEngine__ = &kfspConstructEngine_IMPL;

View File

@ -425,6 +425,28 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
pThis->__kmemsysRemoveAllAtsPeers__ = &kmemsysRemoveAllAtsPeers_GV100;
}
// Hal function -- kmemsysCheckEccCounts
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__kmemsysCheckEccCounts__ = &kmemsysCheckEccCounts_GH100;
}
// default
else
{
pThis->__kmemsysCheckEccCounts__ = &kmemsysCheckEccCounts_b3696a;
}
// Hal function -- kmemsysClearEccCounts
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__kmemsysClearEccCounts__ = &kmemsysClearEccCounts_GH100;
}
// default
else
{
pThis->__kmemsysClearEccCounts__ = &kmemsysClearEccCounts_56cd7a;
}
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelMemorySystem_engstateConstructEngine;
pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelMemorySystem_engstateStateInitLocked;

View File

@ -222,6 +222,8 @@ struct KernelMemorySystem {
void (*__kmemsysNumaRemoveAllMemory__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysSetupAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
void (*__kmemsysRemoveAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
void (*__kmemsysCheckEccCounts__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysClearEccCounts__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysStateLoad__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStateUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStatePostUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
@ -323,6 +325,10 @@ NV_STATUS __nvoc_objCreate_KernelMemorySystem(KernelMemorySystem**, Dynamic*, Nv
#define kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysSetupAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysRemoveAllAtsPeers(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysCheckEccCounts(pGpu, pKernelMemorySystem) kmemsysCheckEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem) kmemsysCheckEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysClearEccCounts(pGpu, pKernelMemorySystem) kmemsysClearEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysClearEccCounts_HAL(pGpu, pKernelMemorySystem) kmemsysClearEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysStateLoad(pGpu, pEngstate, arg0) kmemsysStateLoad_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStateUnload(pGpu, pEngstate, arg0) kmemsysStateUnload_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStatePostUnload(pGpu, pEngstate, arg0) kmemsysStatePostUnload_DISPATCH(pGpu, pEngstate, arg0)
@ -733,6 +739,26 @@ static inline void kmemsysRemoveAllAtsPeers_DISPATCH(OBJGPU *pGpu, struct Kernel
pKernelMemorySystem->__kmemsysRemoveAllAtsPeers__(pGpu, pKernelMemorySystem);
}
void kmemsysCheckEccCounts_GH100(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
static inline void kmemsysCheckEccCounts_b3696a(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
return;
}
static inline void kmemsysCheckEccCounts_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
pKernelMemorySystem->__kmemsysCheckEccCounts__(pGpu, pKernelMemorySystem);
}
NV_STATUS kmemsysClearEccCounts_GH100(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
static inline NV_STATUS kmemsysClearEccCounts_56cd7a(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
return NV_OK;
}
static inline NV_STATUS kmemsysClearEccCounts_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
return pKernelMemorySystem->__kmemsysClearEccCounts__(pGpu, pKernelMemorySystem);
}
static inline NV_STATUS kmemsysStateLoad_DISPATCH(POBJGPU pGpu, struct KernelMemorySystem *pEngstate, NvU32 arg0) {
return pEngstate->__kmemsysStateLoad__(pGpu, pEngstate, arg0);
}

View File

@ -221,6 +221,16 @@ static void __nvoc_init_funcTable_KernelCE_1(KernelCE *pThis, RmHalspecOwner *pR
pThis->__kceServiceNotificationInterrupt__ = &kceServiceNotificationInterrupt_IMPL;
// Hal function -- kceGetP2PCes
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__kceGetP2PCes__ = &kceGetP2PCes_GH100;
}
else
{
pThis->__kceGetP2PCes__ = &kceGetP2PCes_GV100;
}
// Hal function -- kceGetNvlinkAutoConfigCeValues
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */
{

View File

@ -113,6 +113,7 @@ struct KernelCE {
NV_STATUS (*__kceStateUnload__)(OBJGPU *, struct KernelCE *, NvU32);
void (*__kceRegisterIntrService__)(OBJGPU *, struct KernelCE *, IntrServiceRecord *);
NV_STATUS (*__kceServiceNotificationInterrupt__)(OBJGPU *, struct KernelCE *, IntrServiceServiceNotificationInterruptArguments *);
NV_STATUS (*__kceGetP2PCes__)(struct KernelCE *, OBJGPU *, NvU32, NvU32 *);
NV_STATUS (*__kceGetNvlinkAutoConfigCeValues__)(OBJGPU *, struct KernelCE *, NvU32 *, NvU32 *, NvU32 *);
NvBool (*__kceGetNvlinkMaxTopoForTable__)(OBJGPU *, struct KernelCE *, struct NVLINK_TOPOLOGY_PARAMS *, void *, NvU32, NvU32 *);
NvBool (*__kceIsCurrentMaxTopology__)(OBJGPU *, struct KernelCE *, struct NVLINK_TOPOLOGY_PARAMS *, NvU32 *, NvU32 *);
@ -190,6 +191,8 @@ NV_STATUS __nvoc_objCreate_KernelCE(KernelCE**, Dynamic*, NvU32);
#define kceStateUnload_HAL(pGpu, pKCe, flags) kceStateUnload_DISPATCH(pGpu, pKCe, flags)
#define kceRegisterIntrService(arg0, arg1, arg2) kceRegisterIntrService_DISPATCH(arg0, arg1, arg2)
#define kceServiceNotificationInterrupt(arg0, arg1, arg2) kceServiceNotificationInterrupt_DISPATCH(arg0, arg1, arg2)
#define kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_DISPATCH(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
#define kceGetP2PCes_HAL(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_DISPATCH(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
#define kceGetNvlinkAutoConfigCeValues(pGpu, pKCe, arg0, arg1, arg2) kceGetNvlinkAutoConfigCeValues_DISPATCH(pGpu, pKCe, arg0, arg1, arg2)
#define kceGetNvlinkAutoConfigCeValues_HAL(pGpu, pKCe, arg0, arg1, arg2) kceGetNvlinkAutoConfigCeValues_DISPATCH(pGpu, pKCe, arg0, arg1, arg2)
#define kceGetNvlinkMaxTopoForTable(pGpu, pKCe, arg0, arg1, arg2, arg3) kceGetNvlinkMaxTopoForTable_DISPATCH(pGpu, pKCe, arg0, arg1, arg2, arg3)
@ -305,20 +308,6 @@ static inline NvBool kceIsCeNvlinkP2P(OBJGPU *pGpu, struct KernelCE *pKCe) {
#define kceIsCeNvlinkP2P_HAL(pGpu, pKCe) kceIsCeNvlinkP2P(pGpu, pKCe)
NV_STATUS kceGetP2PCes_GV100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
#ifdef __nvoc_kernel_ce_h_disabled
static inline NV_STATUS kceGetP2PCes(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask) {
NV_ASSERT_FAILED_PRECOMP("KernelCE was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_kernel_ce_h_disabled
#define kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_GV100(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
#endif //__nvoc_kernel_ce_h_disabled
#define kceGetP2PCes_HAL(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
void kceGetSysmemRWLCEs_GV100(struct KernelCE *arg0, NvU32 *rd, NvU32 *wr);
@ -397,6 +386,14 @@ static inline NV_STATUS kceServiceNotificationInterrupt_DISPATCH(OBJGPU *arg0, s
return arg1->__kceServiceNotificationInterrupt__(arg0, arg1, arg2);
}
NV_STATUS kceGetP2PCes_GV100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
NV_STATUS kceGetP2PCes_GH100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
static inline NV_STATUS kceGetP2PCes_DISPATCH(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask) {
return arg0->__kceGetP2PCes__(arg0, pGpu, gpuMask, nvlinkP2PCeMask);
}
NV_STATUS kceGetNvlinkAutoConfigCeValues_TU102(OBJGPU *pGpu, struct KernelCE *pKCe, NvU32 *arg0, NvU32 *arg1, NvU32 *arg2);
NV_STATUS kceGetNvlinkAutoConfigCeValues_GA100(OBJGPU *pGpu, struct KernelCE *pKCe, NvU32 *arg0, NvU32 *arg1, NvU32 *arg2);

View File

@ -105,10 +105,6 @@ static NV_STATUS __nvoc_thunk_MemoryMulticastFabric_memControl(struct Memory *pM
return memorymulticastfabricControl((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_Memory.offset), pCallContext, pParams);
}
static NV_STATUS __nvoc_thunk_MemoryMulticastFabric_rmresControl_Prologue(struct RmResource *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
return memorymulticastfabricControl_Prologue((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_RmResource.offset), pCallContext, pParams);
}
static NvBool __nvoc_thunk_MemoryMulticastFabric_memIsGpuMapAllowed(struct Memory *pMemoryMulticastFabric, struct OBJGPU *pGpu) {
return memorymulticastfabricIsGpuMapAllowed((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_Memory.offset), pGpu);
}
@ -137,6 +133,10 @@ static void __nvoc_thunk_RsResource_memorymulticastfabricAddAdditionalDependants
resAddAdditionalDependants(pClient, (struct RsResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RsResource.offset), pReference);
}
static NV_STATUS __nvoc_thunk_RmResource_memorymulticastfabricControl_Prologue(struct MemoryMulticastFabric *pResource, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
return rmresControl_Prologue((struct RmResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RmResource.offset), pCallContext, pParams);
}
static NV_STATUS __nvoc_thunk_RsResource_memorymulticastfabricUnmapFrom(struct MemoryMulticastFabric *pResource, RS_RES_UNMAP_FROM_PARAMS *pParams) {
return resUnmapFrom((struct RsResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RsResource.offset), pParams);
}
@ -324,8 +324,6 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
pThis->__memorymulticastfabricControl__ = &memorymulticastfabricControl_IMPL;
pThis->__memorymulticastfabricControl_Prologue__ = &memorymulticastfabricControl_Prologue_IMPL;
pThis->__memorymulticastfabricIsGpuMapAllowed__ = &memorymulticastfabricIsGpuMapAllowed_IMPL;
pThis->__memorymulticastfabricGetMapAddrSpace__ = &memorymulticastfabricGetMapAddrSpace_IMPL;
@ -356,8 +354,6 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
pThis->__nvoc_base_Memory.__memControl__ = &__nvoc_thunk_MemoryMulticastFabric_memControl;
pThis->__nvoc_base_Memory.__nvoc_base_RmResource.__rmresControl_Prologue__ = &__nvoc_thunk_MemoryMulticastFabric_rmresControl_Prologue;
pThis->__nvoc_base_Memory.__memIsGpuMapAllowed__ = &__nvoc_thunk_MemoryMulticastFabric_memIsGpuMapAllowed;
pThis->__nvoc_base_Memory.__memGetMapAddrSpace__ = &__nvoc_thunk_MemoryMulticastFabric_memGetMapAddrSpace;
@ -372,6 +368,8 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
pThis->__memorymulticastfabricAddAdditionalDependants__ = &__nvoc_thunk_RsResource_memorymulticastfabricAddAdditionalDependants;
pThis->__memorymulticastfabricControl_Prologue__ = &__nvoc_thunk_RmResource_memorymulticastfabricControl_Prologue;
pThis->__memorymulticastfabricUnmapFrom__ = &__nvoc_thunk_RsResource_memorymulticastfabricUnmapFrom;
pThis->__memorymulticastfabricControl_Epilogue__ = &__nvoc_thunk_RmResource_memorymulticastfabricControl_Epilogue;

View File

@ -158,7 +158,6 @@ struct MemoryMulticastFabric {
NV_STATUS (*__memorymulticastfabricCopyConstruct__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_ALLOC_PARAMS_INTERNAL *);
NV_STATUS (*__memorymulticastfabricIsReady__)(struct MemoryMulticastFabric *, NvBool);
NV_STATUS (*__memorymulticastfabricControl__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
NV_STATUS (*__memorymulticastfabricControl_Prologue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
NvBool (*__memorymulticastfabricIsGpuMapAllowed__)(struct MemoryMulticastFabric *, struct OBJGPU *);
NV_STATUS (*__memorymulticastfabricGetMapAddrSpace__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, NvU32, NV_ADDRESS_SPACE *);
NV_STATUS (*__memorymulticastfabricCtrlGetInfo__)(struct MemoryMulticastFabric *, NV00FD_CTRL_GET_INFO_PARAMS *);
@ -171,6 +170,7 @@ struct MemoryMulticastFabric {
NV_STATUS (*__memorymulticastfabricMapTo__)(struct MemoryMulticastFabric *, RS_RES_MAP_TO_PARAMS *);
NvU32 (*__memorymulticastfabricGetRefCount__)(struct MemoryMulticastFabric *);
void (*__memorymulticastfabricAddAdditionalDependants__)(struct RsClient *, struct MemoryMulticastFabric *, RsResourceRef *);
NV_STATUS (*__memorymulticastfabricControl_Prologue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
NV_STATUS (*__memorymulticastfabricUnmapFrom__)(struct MemoryMulticastFabric *, RS_RES_UNMAP_FROM_PARAMS *);
void (*__memorymulticastfabricControl_Epilogue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
NV_STATUS (*__memorymulticastfabricControlLookup__)(struct MemoryMulticastFabric *, struct RS_RES_CONTROL_PARAMS_INTERNAL *, const struct NVOC_EXPORTED_METHOD_DEF **);
@ -220,7 +220,6 @@ NV_STATUS __nvoc_objCreate_MemoryMulticastFabric(MemoryMulticastFabric**, Dynami
#define memorymulticastfabricCopyConstruct(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricCopyConstruct_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
#define memorymulticastfabricIsReady(pMemoryMulticastFabric, bCopyConstructorContext) memorymulticastfabricIsReady_DISPATCH(pMemoryMulticastFabric, bCopyConstructorContext)
#define memorymulticastfabricControl(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricControl_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
#define memorymulticastfabricControl_Prologue(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricControl_Prologue_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
#define memorymulticastfabricIsGpuMapAllowed(pMemoryMulticastFabric, pGpu) memorymulticastfabricIsGpuMapAllowed_DISPATCH(pMemoryMulticastFabric, pGpu)
#define memorymulticastfabricGetMapAddrSpace(pMemoryMulticastFabric, pCallContext, mapFlags, pAddrSpace) memorymulticastfabricGetMapAddrSpace_DISPATCH(pMemoryMulticastFabric, pCallContext, mapFlags, pAddrSpace)
#define memorymulticastfabricCtrlGetInfo(pMemoryMulticastFabric, pParams) memorymulticastfabricCtrlGetInfo_DISPATCH(pMemoryMulticastFabric, pParams)
@ -233,6 +232,7 @@ NV_STATUS __nvoc_objCreate_MemoryMulticastFabric(MemoryMulticastFabric**, Dynami
#define memorymulticastfabricMapTo(pResource, pParams) memorymulticastfabricMapTo_DISPATCH(pResource, pParams)
#define memorymulticastfabricGetRefCount(pResource) memorymulticastfabricGetRefCount_DISPATCH(pResource)
#define memorymulticastfabricAddAdditionalDependants(pClient, pResource, pReference) memorymulticastfabricAddAdditionalDependants_DISPATCH(pClient, pResource, pReference)
#define memorymulticastfabricControl_Prologue(pResource, pCallContext, pParams) memorymulticastfabricControl_Prologue_DISPATCH(pResource, pCallContext, pParams)
#define memorymulticastfabricUnmapFrom(pResource, pParams) memorymulticastfabricUnmapFrom_DISPATCH(pResource, pParams)
#define memorymulticastfabricControl_Epilogue(pResource, pCallContext, pParams) memorymulticastfabricControl_Epilogue_DISPATCH(pResource, pCallContext, pParams)
#define memorymulticastfabricControlLookup(pResource, pParams, ppEntry) memorymulticastfabricControlLookup_DISPATCH(pResource, pParams, ppEntry)
@ -271,12 +271,6 @@ static inline NV_STATUS memorymulticastfabricControl_DISPATCH(struct MemoryMulti
return pMemoryMulticastFabric->__memorymulticastfabricControl__(pMemoryMulticastFabric, pCallContext, pParams);
}
NV_STATUS memorymulticastfabricControl_Prologue_IMPL(struct MemoryMulticastFabric *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams);
static inline NV_STATUS memorymulticastfabricControl_Prologue_DISPATCH(struct MemoryMulticastFabric *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
return pMemoryMulticastFabric->__memorymulticastfabricControl_Prologue__(pMemoryMulticastFabric, pCallContext, pParams);
}
NvBool memorymulticastfabricIsGpuMapAllowed_IMPL(struct MemoryMulticastFabric *pMemoryMulticastFabric, struct OBJGPU *pGpu);
static inline NvBool memorymulticastfabricIsGpuMapAllowed_DISPATCH(struct MemoryMulticastFabric *pMemoryMulticastFabric, struct OBJGPU *pGpu) {
@ -339,6 +333,10 @@ static inline void memorymulticastfabricAddAdditionalDependants_DISPATCH(struct
pResource->__memorymulticastfabricAddAdditionalDependants__(pClient, pResource, pReference);
}
static inline NV_STATUS memorymulticastfabricControl_Prologue_DISPATCH(struct MemoryMulticastFabric *pResource, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
return pResource->__memorymulticastfabricControl_Prologue__(pResource, pCallContext, pParams);
}
static inline NV_STATUS memorymulticastfabricUnmapFrom_DISPATCH(struct MemoryMulticastFabric *pResource, RS_RES_UNMAP_FROM_PARAMS *pParams) {
return pResource->__memorymulticastfabricUnmapFrom__(pResource, pParams);
}

View File

@ -1007,6 +1007,10 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B0, 0x16fa, 0x10de, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B0, 0x16fa, 0x17aa, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B1, 0x180c, 0x1028, "NVIDIA RTX 4500 Ada Generation" },
{ 0x27B1, 0x180c, 0x103c, "NVIDIA RTX 4500 Ada Generation" },
{ 0x27B1, 0x180c, 0x10de, "NVIDIA RTX 4500 Ada Generation" },
{ 0x27B1, 0x180c, 0x17aa, "NVIDIA RTX 4500 Ada Generation" },
{ 0x27B2, 0x181b, 0x1028, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B2, 0x181b, 0x103c, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B2, 0x181b, 0x10de, "NVIDIA RTX 4000 Ada Generation" },

View File

@ -880,6 +880,10 @@ NV_STATUS osReserveCpuAddressSpaceUpperBound(void **ppSectionHandle,
NvU64 maxSectionSize);
void osReleaseCpuAddressSpaceUpperBound(void *pSectionHandle);
void* osGetPidInfo(void);
void osPutPidInfo(void *pOsPidInfo);
NV_STATUS osFindNsPid(void *pOsPidInfo, NvU32 *pNsPid);
// OS Tegra IPC functions
NV_STATUS osTegraDceRegisterIpcClient(NvU32 interfaceType, void *usrCtx,
NvU32 *clientId);
@ -1249,6 +1253,8 @@ static NV_INLINE NV_STATUS isrWrapper(NvBool testIntr, OBJGPU *pGpu)
#define OS_PCIE_CAP_MASK_REQ_ATOMICS_64 NVBIT(1)
#define OS_PCIE_CAP_MASK_REQ_ATOMICS_128 NVBIT(2)
void osGetNumaMemoryUsage(NvS32 numaId, NvU64 *free_memory_bytes, NvU64 *total_memory_bytes);
NV_STATUS osNumaAddGpuMemory(OS_GPU_INFO *pOsGpuInfo, NvU64 offset,
NvU64 size, NvU32 *pNumaNodeId);
void osNumaRemoveGpuMemory(OS_GPU_INFO *pOsGpuInfo, NvU64 offset,

View File

@ -32,6 +32,7 @@
#include "published/hopper/gh100/dev_pmc.h"
#include "published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h"
#include "published/hopper/gh100/pri_nv_xal_ep.h"
#include "published/hopper/gh100/dev_xtl_ep_pri.h"
#include "ctrl/ctrl2080/ctrl2080mc.h"
@ -77,6 +78,28 @@ gpuReadBusConfigReg_GH100
return gpuReadBusConfigCycle(pGpu, index, pData);
}
/*!
* @brief Read the non-private registers on vGPU through mirror space
*
* @param[in] pGpu GPU object pointer
* @param[in] index Register offset in PCIe config space
* @param[out] pData Value of the register
*
* @returns NV_OK on success
*/
NV_STATUS
gpuReadVgpuConfigReg_GH100
(
OBJGPU *pGpu,
NvU32 index,
NvU32 *pData
)
{
*pData = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index);
return NV_OK;
}
/*!
* @brief Get GPU ID based on PCIE config reads.
* Also determine other properties of the PCIE capabilities.

View File

@ -45,6 +45,7 @@
#define NV_CE_NUM_FBPCE 4
#define NV_CE_NUM_PCES_NO_LINK_CASE 12
#define NV_CE_MAX_PCE_PER_GRCE 2
#define NV_CE_HSHUBNVL_ID_0 2
/*
* Table for setting the PCE2LCE mapping for WAR configs that cannot be implemented
@ -931,3 +932,181 @@ kceGetMappings_GH100
NV_PRINTF(LEVEL_INFO, "status = %d, statusC2C = %d\n", status, statusC2C);
return NV_OK;
}
NV_STATUS kceGetP2PCes_GH100(KernelCE *pKCe, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask)
{
//
// Currently Bug 4103154 requires an updated algorithm described below
// to assign the proper LCE. Cases without MODS enabled can default back
// to the previous version.
//
return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask);
NvU32 gpuCount = gpumgrGetSubDeviceCount(gpuMask);
NvU32 minP2PLce = (NV_CE_EVEN_ASYNC_LCE_MASK | NV_CE_ODD_ASYNC_LCE_MASK) & NV_CE_MAX_LCE_MASK;
NvU32 i;
KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
if (pKernelNvlink == NULL)
{
return NV_WARN_NOTHING_TO_DO;
}
if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
{
return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask);
}
LOWESTBITIDX_32(minP2PLce);
*nvlinkP2PCeMask = 0;
if (gpuCount == 1)
{
*nvlinkP2PCeMask |= NVBIT(minP2PLce);
for (i = minP2PLce; i < gpuGetNumCEs(pGpu); i++)
{
*nvlinkP2PCeMask |= NVBIT(i);
}
}
else if (gpuCount > 2)
{
// if gpuCount > 2, this is an invalid request. Print warning and return NV_OK
NV_PRINTF(LEVEL_INFO, "GPU %d invalid request for gpuCount %d\n", gpuGetInstance(pGpu), gpuCount);
return NV_ERR_INVALID_STATE;
}
else
{
OBJGPU *pRemoteGpu = NULL;
KernelCE *pKCeLoop = NULL;
NvU32 peerLinkMask = 0;
NvU32 gpuInstance = 0;
NvU32 phyLinkId, status, targetPceMask, numPces;
//
// The LCE returned should be the LCE which has the most PCEs mapped
// on the given HSHUB. This HSHUB should be determined by
// tracking where the majority of links are connected.
//
NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0};
NvU32 maxLinksConnectedHshub = 0;
NvU32 maxConnectedHshubId = NV_CE_MAX_HSHUBS;
NvU32 lceAssignedMask = 0;
KernelCE *maxLcePerHshub[NV_CE_MAX_HSHUBS] = {0};
NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params;
if (pKernelNvlink != NULL)
{
// Get the remote GPU
while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
{
if (pRemoteGpu != pGpu)
break;
}
NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE);
gpuInstance = gpuGetInstance(pRemoteGpu);
peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu);
}
portMemSet(&params, 0, sizeof(params));
params.linkMask = peerLinkMask;
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS,
(void *)&params, sizeof(params));
NV_ASSERT_OK_OR_RETURN(status);
FOR_EACH_INDEX_IN_MASK(32, phyLinkId, peerLinkMask)
{
NvU32 hshubId = params.hshubIds[phyLinkId];
linksPerHshub[hshubId]++;
if (linksPerHshub[hshubId] > maxLinksConnectedHshub)
{
maxLinksConnectedHshub = linksPerHshub[hshubId];
maxConnectedHshubId = hshubId;
}
}
FOR_EACH_INDEX_IN_MASK_END;
//
// Iterate through all Async LCEs to track which HSHUB should
// be using which LCE. This is decided based on the majority. If
// there is a tie, then LCE with the lower index is preferred.
//
KCE_ITER_ALL_BEGIN(pGpu, pKCeLoop, minP2PLce)
NvU32 localMaxPcePerHshub = 0;
KernelCE *localMaxLcePerHshub;
NvU32 localMaxHshub = NV_CE_MAX_HSHUBS;
// if LCE is stubbed or LCE is already assigned to another peer
if (pKCeLoop->bStubbed)
{
continue;
}
// LCE is already assigned to this peer
if ((pKCeLoop->nvlinkPeerMask & NVBIT(gpuInstance)) != 0)
{
maxLcePerHshub[maxConnectedHshubId] = pKCeLoop;
break;
}
// LCE is already assigned to another peer
else if (pKCeLoop->nvlinkPeerMask != 0)
{
continue;
}
NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS params = {0};
params.ceEngineType = NV2080_ENGINE_TYPE_COPY(pKCeLoop->publicID);
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK,
(void *)&params, sizeof(params));
NV_ASSERT_OK_OR_RETURN(status);
//
// An LCE may be utilized across several HSHUBs. Loop through all HSHUBs
// in order to decide which HSHUB holds the majority of this specific LCE.
// To help with this, create a mask of PCEs only on the HSHUB which the peer
// is most connected to by shifting the HSHUB PCE mask
//
for (i = NV_CE_HSHUBNVL_ID_0; i < NV_CE_MAX_HSHUBS; i++)
{
targetPceMask = params.pceMask & ((NVBIT(NV_CE_PCE_PER_HSHUB) - 1) << ((i - NV_CE_HSHUBNVL_ID_0) * NV_CE_PCE_PER_HSHUB));
numPces = nvPopCount32(targetPceMask);
if (numPces > localMaxPcePerHshub && !(lceAssignedMask & NVBIT(pKCeLoop->publicID)))
{
localMaxPcePerHshub = numPces;
localMaxLcePerHshub = pKCeLoop;
localMaxHshub = i;
}
}
if (localMaxHshub < NV_CE_MAX_HSHUBS)
{
maxLcePerHshub[localMaxHshub] = localMaxLcePerHshub;
lceAssignedMask |= NVBIT(localMaxLcePerHshub->publicID);
}
KCE_ITER_END
if (maxLcePerHshub[maxConnectedHshubId] != NULL)
{
NV_PRINTF(LEVEL_INFO,
"GPU %d Assigning Peer %d to LCE %d\n",
gpuGetInstance(pGpu), gpuInstance,
maxLcePerHshub[maxConnectedHshubId]->publicID);
maxLcePerHshub[maxConnectedHshubId]->nvlinkPeerMask = NVBIT(gpuInstance);
*nvlinkP2PCeMask = NVBIT(maxLcePerHshub[maxConnectedHshubId]->publicID);
}
}
return NV_OK;
}

View File

@ -51,6 +51,9 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
ConfidentialCompute *pConfCompute,
ENGDESCRIPTOR engDesc)
{
OBJSYS *pSys = SYS_GET_INSTANCE();
NvU32 data = 0;
NvBool bForceEnableCC = 0;
pConfCompute->pSpdm = NULL;
portMemSet(&pConfCompute->ccStaticInfo, 0, sizeof(pConfCompute->ccStaticInfo));
pConfCompute->gspProxyRegkeys = 0;
@ -74,6 +77,20 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
if (pConfCompute->getProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENABLED))
{
bForceEnableCC = (osReadRegistryDword(pGpu, NV_REG_STR_RM_CONFIDENTIAL_COMPUTE, &data) == NV_OK) &&
FLD_TEST_DRF(_REG_STR, _RM_CONFIDENTIAL_COMPUTE, _ENABLED, _YES, data);
if (!RMCFG_FEATURE_PLATFORM_GSP && !RMCFG_FEATURE_PLATFORM_MODS && !bForceEnableCC)
{
if (!(sysGetStaticConfig(pSys)->bOsCCEnabled))
{
NV_PRINTF(LEVEL_ERROR, "CPU does not support confidential compute.\n");
NV_ASSERT(0);
pConfCompute->setProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENABLED, NV_FALSE);
return NV_ERR_INVALID_OPERATION;
}
}
NV_CHECK_OR_RETURN(LEVEL_ERROR, confComputeIsGpuCcCapable_HAL(pGpu, pConfCompute), NV_ERR_INVALID_OPERATION);
if (pGpu->getProperty(pGpu, PDB_PROP_GPU_APM_FEATURE_CAPABLE))
@ -92,7 +109,7 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
}
else
{
NV_PRINTF(LEVEL_ERROR, "GPU does not support confidential compute");
NV_PRINTF(LEVEL_ERROR, "GPU does not support confidential compute.\n");
NV_ASSERT(0);
return NV_ERR_INVALID_OPERATION;
}

View File

@ -50,6 +50,8 @@
#include "kernel/gpu/intr/engine_idx.h"
#include "gpu/external_device/external_device.h"
#include "ctrl/ctrl2080.h"
#include "class/cl5070.h"
@ -490,6 +492,8 @@ void
kdispStateDestroy_IMPL(OBJGPU *pGpu,
KernelDisplay *pKernelDisplay)
{
extdevDestroy(pGpu);
if (pKernelDisplay->pInst != NULL)
{
instmemStateDestroy(pGpu, pKernelDisplay->pInst);

View File

@ -264,7 +264,7 @@ void *kcrashcatEngineMapBufferDescriptor_IMPL
memdescMap(pMemDesc, 0, memdescGetSize(pMemDesc), NV_TRUE,
NV_PROTECT_READABLE, &pBuf, &pPriv),
{
if (pBufDesc->pEngPriv == NULL)
if (!pBufDesc->bRegistered)
memdescDestroy(pMemDesc);
return NULL;
});

View File

@ -4941,12 +4941,19 @@ gpuReadBusConfigCycle_IMPL
NvU8 device = gpuGetDevice(pGpu);
NvU8 function = 0;
if (pGpu->hPci == NULL)
if (IS_PASSTHRU(pGpu))
{
pGpu->hPci = osPciInitHandle(domain, bus, device, function, NULL, NULL);
gpuReadVgpuConfigReg_HAL(pGpu, index, pData);
}
else
{
if (pGpu->hPci == NULL)
{
pGpu->hPci = osPciInitHandle(domain, bus, device, function, NULL, NULL);
}
*pData = osPciReadDword(pGpu->hPci, index);
*pData = osPciReadDword(pGpu->hPci, index);
}
return NV_OK;
}

View File

@ -647,6 +647,20 @@ _gpuiIsPidSavedAlready
return NV_FALSE;
}
static NV_STATUS
_gpuConvertPid
(
RmClient *pClient,
NvU32 *pNsPid
)
{
if (pClient->pOsPidInfo != NULL)
return osFindNsPid(pClient->pOsPidInfo, pNsPid);
*pNsPid = pClient->ProcID;
return NV_OK;
}
//
// Searches through clients to find processes with clients that have
// allocated an ElementType of class, defined by elementID. The return values
@ -673,6 +687,7 @@ gpuGetProcWithObject_IMPL
RmClient *pClient;
RsClient *pRsClient;
RsResourceRef *pResourceRef;
NV_STATUS status;
NV_ASSERT_OR_RETURN((pPidArray != NULL), NV_ERR_INVALID_ARGUMENT);
NV_ASSERT_OR_RETURN((pPidArrayCount != NULL), NV_ERR_INVALID_ARGUMENT);
@ -782,8 +797,15 @@ gpuGetProcWithObject_IMPL
}
if (elementInClient)
{
pPidArray[pidcount] = pClient->ProcID;
pidcount++;
status = _gpuConvertPid(pClient, &pPidArray[pidcount]);
if (status == NV_OK)
{
pidcount++;
}
else if (status != NV_ERR_OBJECT_NOT_FOUND)
{
return status;
}
if (pidcount == NV2080_CTRL_GPU_GET_PIDS_MAX_COUNT)
{

View File

@ -29,6 +29,7 @@
#include "gpu/conf_compute/conf_compute.h"
#include "gpu/fsp/kern_fsp.h"
#include "gpu/gsp/kernel_gsp.h"
#include "gpu/mem_sys/kern_mem_sys.h"
#include "gsp/gspifpub.h"
#include "vgpu/rpc.h"
@ -523,6 +524,7 @@ kgspBootstrapRiscvOSEarly_GH100
{
KernelFalcon *pKernelFalcon = staticCast(pKernelGsp, KernelFalcon);
KernelFsp *pKernelFsp = GPU_GET_KERNEL_FSP(pGpu);
KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
NV_STATUS status = NV_OK;
// Only for GSP client builds
@ -532,8 +534,16 @@ kgspBootstrapRiscvOSEarly_GH100
return NV_ERR_NOT_SUPPORTED;
}
// Clear ECC errors before attempting to load GSP
status = kmemsysClearEccCounts_HAL(pGpu, pKernelMemorySystem);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Issue clearing ECC counts! Status:0x%x\n", status);
}
// Setup the descriptors that GSP-FMC needs to boot GSP-RM
NV_ASSERT_OK_OR_RETURN(kgspSetupGspFmcArgs_HAL(pGpu, pKernelGsp, pGspFw));
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
kgspSetupGspFmcArgs_HAL(pGpu, pKernelGsp, pGspFw), exit);
kgspSetupLibosInitArgs(pGpu, pKernelGsp);
@ -562,7 +572,8 @@ kgspBootstrapRiscvOSEarly_GH100
{
NV_PRINTF(LEVEL_NOTICE, "Starting to boot GSP via FSP.\n");
pKernelFsp->setProperty(pKernelFsp, PDB_PROP_KFSP_GSP_MODE_GSPRM, NV_TRUE);
NV_ASSERT_OK_OR_RETURN(kfspSendBootCommands_HAL(pGpu, pKernelFsp));
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
kfspSendBootCommands_HAL(pGpu, pKernelFsp), exit);
}
else
{
@ -585,7 +596,7 @@ kgspBootstrapRiscvOSEarly_GH100
kfspDumpDebugState_HAL(pGpu, pKernelFsp);
}
return status;
goto exit;
}
}
@ -606,7 +617,7 @@ kgspBootstrapRiscvOSEarly_GH100
kflcnRegRead_HAL(pGpu, pKernelFalcon, NV_PFALCON_FALCON_MAILBOX0));
NV_PRINTF(LEVEL_ERROR, "NV_PGSP_FALCON_MAILBOX1 = 0x%x\n",
kflcnRegRead_HAL(pGpu, pKernelFalcon, NV_PFALCON_FALCON_MAILBOX1));
return status;
goto exit;
}
// Start polling for libos logs now that lockdown is released
@ -640,6 +651,11 @@ kgspBootstrapRiscvOSEarly_GH100
NV_PRINTF(LEVEL_INFO, "GSP FW RM ready.\n");
exit:
// If GSP fails to boot, check if there's any DED error.
if (status != NV_OK)
{
kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem);
}
NV_ASSERT(status == NV_OK);
return status;

View File

@ -799,7 +799,7 @@ kgspHealthCheck_TU102
objDelete(pReport);
}
return bHealthy;
goto exit_health_check;
}
NvU32 mb0 = GPU_REG_RD32(pGpu, NV_PGSP_MAILBOX(0));
@ -845,6 +845,12 @@ kgspHealthCheck_TU102
"********************************************************************************\n");
}
exit_health_check:
if (!bHealthy)
{
KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem);
}
return bHealthy;
}

View File

@ -2438,7 +2438,8 @@ kgspInitRm_IMPL
if (pKernelGsp->pLogElf == NULL)
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, nvlogRegisterFlushCb(kgspNvlogFlushCb, pKernelGsp), done);
// Wait for GFW_BOOT OK status
// Reset thread state timeout and wait for GFW_BOOT OK status
threadStateResetTimeout(pGpu);
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
// Fail early if WPR2 is up

View File

@ -494,19 +494,6 @@ memmgrStateLoad_IMPL
memmgrScrubInit_HAL(pGpu, pMemoryManager);
}
if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
{
//
// NUMA onlined memory size should not exceed memory size assigned to PMA.
// TODO : Currently in selfhosted and P9+GV100 systems numaOnlined size is less
// than PMA Memory Size. Ideally both of them should be identical. Bug 4051320.
//
NvU64 pmaTotalMemorySize;
NvU64 numaOnlineSize = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)->numaOnlineSize;
pmaGetTotalMemory(&GPU_GET_HEAP(pGpu)->pmaObject, &pmaTotalMemorySize);
NV_ASSERT_OR_RETURN(pmaTotalMemorySize >= numaOnlineSize, NV_ERR_INVALID_STATE);
}
// Dump FB regions
memmgrDumpFbRegions(pGpu, pMemoryManager);
@ -1978,6 +1965,7 @@ memmgrSetPartitionableMem_IMPL
{
PMA_REGION_DESCRIPTOR *pFirstPmaRegionDesc = NULL;
NvU32 numPmaRegions;
NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
NV_ASSERT_OK_OR_RETURN(pmaGetRegionInfo(&pHeap->pmaObject,
&numPmaRegions, &pFirstPmaRegionDesc));
@ -1986,6 +1974,8 @@ memmgrSetPartitionableMem_IMPL
pmaGetFreeMemory(&pHeap->pmaObject, &freeMem);
pmaGetTotalMemory(&pHeap->pmaObject, &size);
NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig));
//
// MIG won't be used alongside APM and hence the check below is of no use
// Even if we enable the check for APM the check will fail given that after
@ -1996,8 +1986,11 @@ memmgrSetPartitionableMem_IMPL
// channels are required to be in CPR vidmem. This changes the calculation below
// We can ignore this for the non-MIG case.
//
if (!gpuIsCCorApmFeatureEnabled(pGpu) ||
IS_MIG_ENABLED(pGpu))
// When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
// and hence free memory can not be expected to be same as total memory.
//
if ((!gpuIsCCorApmFeatureEnabled(pGpu) || IS_MIG_ENABLED(pGpu)) &&
!(pmaConfig & PMA_QUERY_NUMA_ONLINED))
{
//
// PMA should be completely free at this point, otherwise we risk
@ -2891,6 +2884,7 @@ memmgrPmaRegisterRegions_IMPL
NvU32 blPageIndex;
NvU32 blackListCount;
NvU64 base, size;
NvU64 pmaTotalMemorySize = 0;
NV_STATUS status = NV_OK;
const MEMORY_SYSTEM_STATIC_CONFIG *pMemsysConfig =
kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
@ -2983,6 +2977,7 @@ memmgrPmaRegisterRegions_IMPL
}
}
pmaTotalMemorySize += (pmaRegion.limit - pmaRegion.base + 1);
NV_PRINTF(LEVEL_INFO,
"Register FB region %llx..%llx of size %llx with PMA\n",
pmaRegion.base, pmaRegion.limit,
@ -3008,6 +3003,18 @@ memmgrPmaRegisterRegions_IMPL
pmaRegionIdx++;
}
if (gpuIsSelfHosted(pGpu) && osNumaOnliningEnabled(pGpu->pOsGpuInfo))
{
//
// NUMA onlined memory size should not exceed memory size assigned to PMA.
// TODO : Currently in selfhosted and P9+GV100 systems numaOnlined size is less
// than PMA Memory Size. Ideally both of them should be identical. Bug 4051320.
//
NvU64 numaTotalSize = 0;
NvU64 numaFreeSize = 0;
osGetNumaMemoryUsage(pPma->numaNodeId, &numaFreeSize, &numaTotalSize);
NV_ASSERT_OR_RETURN(pmaTotalMemorySize >= numaTotalSize, NV_ERR_INVALID_STATE);
}
//
// bug #200354346, make sure the RM reserved region(s) are
// scrubbed during the region creation itself. Top Down scrubber,

View File

@ -69,11 +69,13 @@ static NvU32 _scrubMemory(OBJMEMSCRUB *pScrubber, RmPhysAddr base, NvU64 size,
NvU32 dstCpuCacheAttrib, NvU32 freeToken);
static void _scrubWaitAndSave(OBJMEMSCRUB *pScrubber, PSCRUB_NODE pList, NvLength itemsToSave);
static NvU64 _scrubGetFreeEntries(OBJMEMSCRUB *pScrubber);
static NvU64 _scrubCheckAndSubmit(OBJMEMSCRUB *pScrubber, NvU64 chunkSize, NvU64 *pPages,
NvU64 pageCount, PSCRUB_NODE pList, NvLength pagesToScrubCheck);
static NvU64 _scrubCheckAndSubmit(OBJMEMSCRUB *pScrubber, NvU64 pageCount, PSCRUB_NODE pList,
PSCRUB_NODE pScrubListCopy, NvLength pagesToScrubCheck);
static void _scrubCopyListItems(OBJMEMSCRUB *pScrubber, PSCRUB_NODE pList, NvLength itemsToSave);
static NV_STATUS _scrubCheckLocked(OBJMEMSCRUB *pScrubber, PSCRUB_NODE *ppList, NvU64 *pSize);
static NV_STATUS _scrubCombinePages(NvU64 *pPages, NvU64 pageSize, NvU64 pageCount,
PSCRUB_NODE *ppScrubList, NvU64 *pSize);
/**
* Constructs the memory scrubber object and signals
@ -403,63 +405,78 @@ scrubSubmitPages
{
NvU64 curPagesSaved = 0;
PSCRUB_NODE pScrubList = NULL;
PSCRUB_NODE pScrubListCopy = NULL;
NvU64 scrubListSize = 0;
NvLength pagesToScrubCheck = 0;
NvU64 totalSubmitted = 0;
NvU64 numFinished = 0;
NvU64 freeEntriesInList = 0;
NvU64 scrubCount = 0;
NvU64 numPagesToScrub = pageCount;
NvU64 numPagesToScrub = 0;
NV_STATUS status = NV_OK;
portSyncMutexAcquire(pScrubber->pScrubberMutex);
*pSize = 0;
*ppList = pScrubList;
NV_CHECK_OR_GOTO(LEVEL_INFO, pageCount > 0, cleanup);
NV_PRINTF(LEVEL_INFO, "submitting pages, pageCount = 0x%llx chunkSize = 0x%llx\n", pageCount, chunkSize);
freeEntriesInList = _scrubGetFreeEntries(pScrubber);
if (freeEntriesInList < pageCount)
{
pScrubList = (PSCRUB_NODE)
portMemAllocNonPaged((NvLength)(sizeof(SCRUB_NODE) * (pageCount - freeEntriesInList)));
if (pScrubList == NULL)
NV_ASSERT_OK_OR_GOTO(status,
_scrubCombinePages(pPages,
chunkSize,
pageCount,
&pScrubList,
&scrubListSize),
cleanup);
numPagesToScrub = scrubListSize;
if (freeEntriesInList < scrubListSize)
{
pScrubListCopy = (PSCRUB_NODE)
portMemAllocNonPaged((NvLength)(sizeof(SCRUB_NODE) * (scrubListSize - freeEntriesInList)));
if (pScrubListCopy == NULL)
{
status = NV_ERR_NO_MEMORY;
goto cleanup;
}
while (freeEntriesInList < pageCount)
while (freeEntriesInList < scrubListSize)
{
if (pageCount > MAX_SCRUB_ITEMS)
if (scrubListSize > MAX_SCRUB_ITEMS)
{
pagesToScrubCheck = (NvLength)(MAX_SCRUB_ITEMS - freeEntriesInList);
scrubCount = MAX_SCRUB_ITEMS;
}
else
{
pagesToScrubCheck = (NvLength)(pageCount - freeEntriesInList);
scrubCount = pageCount;
pagesToScrubCheck = (NvLength)(scrubListSize - freeEntriesInList);
scrubCount = scrubListSize;
}
numFinished = _scrubCheckAndSubmit(pScrubber, chunkSize, &pPages[totalSubmitted],
scrubCount, &pScrubList[curPagesSaved],
numFinished = _scrubCheckAndSubmit(pScrubber, scrubCount,
&pScrubList[totalSubmitted],
&pScrubListCopy[curPagesSaved],
pagesToScrubCheck);
pageCount -= numFinished;
scrubListSize -= numFinished;
curPagesSaved += pagesToScrubCheck;
totalSubmitted += numFinished;
freeEntriesInList = _scrubGetFreeEntries(pScrubber);
}
*ppList = pScrubList;
*ppList = pScrubListCopy;
*pSize = curPagesSaved;
}
else
{
totalSubmitted = _scrubCheckAndSubmit(pScrubber, chunkSize, pPages,
pageCount, NULL,
0);
totalSubmitted = _scrubCheckAndSubmit(pScrubber, scrubListSize,
pScrubList, NULL, 0);
*ppList = NULL;
*pSize = 0;
}
@ -467,6 +484,12 @@ scrubSubmitPages
cleanup:
portSyncMutexRelease(pScrubber->pScrubberMutex);
if (pScrubList != NULL)
{
portMemFree(pScrubList);
pScrubList = NULL;
}
NV_CHECK_OK_OR_RETURN(LEVEL_INFO, status);
if (totalSubmitted == numPagesToScrub)
@ -507,15 +530,33 @@ scrubWaitPages
)
{
NvU32 iter = 0;
NV_STATUS status = NV_OK;
NvU32 iter = 0;
NV_STATUS status = NV_OK;
PSCRUB_NODE pScrubList = NULL;
NvU64 scrubListSize = 0;
NV_ASSERT_OK_OR_RETURN(_scrubCombinePages(pPages,
chunkSize,
pageCount,
&pScrubList,
&scrubListSize));
portSyncMutexAcquire(pScrubber->pScrubberMutex);
for (iter = 0; iter < pageCount; iter++)
for (iter = 0; iter < scrubListSize; iter++)
{
_waitForPayload(pScrubber, pPages[iter], (pPages[iter] + chunkSize - 1));
_waitForPayload(pScrubber,
pScrubList[iter].base,
(pScrubList[iter].base + pScrubList[iter].size - 1));
}
portSyncMutexRelease(pScrubber->pScrubberMutex);
if (pScrubList != NULL)
{
portMemFree(pScrubList);
pScrubList = NULL;
}
return status;
}
@ -644,29 +685,28 @@ _scrubCopyListItems
/* This function is used to check and submit work items always within the
* available / maximum scrub list size.
*
* @param[in] pScrubber OBJMEMSCRUB pointer
* @param[in] chunkSize size of each page
* @param[in] pPages Array of base address
* @param[in] pageCount number of pages in the array
* @param[in] pList pointer will store the return check array
* @param[in] pScrubber OBJMEMSCRUB pointer
* @param[in] pageCount number of pages in the array
* @param[in] pList pointer will store the return check array
* @param[in] pScrubListCopy List where pages are saved
* @param[in] pagesToScrubCheck How many pages will need to be saved
* @returns the number of work successfully submitted, else 0
*/
static NvU64
_scrubCheckAndSubmit
(
OBJMEMSCRUB *pScrubber,
NvU64 chunkSize,
NvU64 *pPages,
NvU64 pageCount,
PSCRUB_NODE pList,
PSCRUB_NODE pScrubListCopy,
NvLength pagesToScrubCheck
)
{
NvU64 iter = 0;
NvU64 newId;
NV_STATUS status;
NvU64 iter = 0;
NvU64 newId;
NV_STATUS status;
if (pList == NULL && pagesToScrubCheck != 0)
if (pScrubListCopy == NULL && pagesToScrubCheck != 0)
{
NV_PRINTF(LEVEL_ERROR,
"pages need to be saved off, but stash list is invalid\n");
@ -681,19 +721,19 @@ _scrubCheckAndSubmit
NV_PRINTF(LEVEL_INFO,
"Submitting work, Id: %llx, base: %llx, size: %llx\n",
newId, pPages[iter], chunkSize);
newId, pList[iter].base, pList[iter].size);
{
status =_scrubMemory(pScrubber, pPages[iter], chunkSize, NV_MEMORY_DEFAULT,
status =_scrubMemory(pScrubber, pList[iter].base, pList[iter].size, NV_MEMORY_DEFAULT,
(NvU32)newId);
}
if(status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failing because the work dint submit.\n");
NV_PRINTF(LEVEL_ERROR, "Failing because the work didn't submit.\n");
goto exit;
}
_scrubAddWorkToList(pScrubber, pPages[iter], chunkSize, newId);
_scrubAddWorkToList(pScrubber, pList[iter].base, pList[iter].size, newId);
_scrubCheckProgress(pScrubber);
}
@ -897,7 +937,7 @@ _scrubCheckProgress
else
lastSWSemaphoreDone = ceutilsUpdateProgress(pScrubber->pCeUtils);
}
pScrubber->lastSWSemaphoreDone = lastSWSemaphoreDone;
return lastSWSemaphoreDone;
@ -949,3 +989,42 @@ cleanup:
memdescDestroy(pMemDesc);
return status;
}
static NV_STATUS
_scrubCombinePages
(
NvU64 *pPages,
NvU64 pageSize,
NvU64 pageCount,
PSCRUB_NODE *ppScrubList,
NvU64 *pSize
)
{
NvU64 i, j;
*ppScrubList = (PSCRUB_NODE)portMemAllocNonPaged(sizeof(SCRUB_NODE) * pageCount);
NV_ASSERT_OR_RETURN(*ppScrubList != NULL, NV_ERR_NO_MEMORY);
// Copy first element from original list to new list
(*ppScrubList)[0].base = pPages[0];
(*ppScrubList)[0].size = pageSize;
for (i = 0, j = 0; i < (pageCount - 1); i++)
{
if ((((*ppScrubList)[j].size + pageSize) > SCRUB_MAX_BYTES_PER_LINE) ||
((pPages[i] + pageSize) != pPages[i+1]))
{
j++;
(*ppScrubList)[j].base = pPages[i+1];
(*ppScrubList)[j].size = pageSize;
}
else
{
(*ppScrubList)[j].size += pageSize;
}
}
*pSize = j + 1;
return NV_OK;
}

View File

@ -363,7 +363,7 @@ static NV_STATUS _pmaNumaAllocatePages
osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
}
if (bScrubOnAlloc)
if (bScrubOnAlloc && (i > 0))
{
PSCRUB_NODE pPmaScrubList = NULL;
NvU64 count;

View File

@ -1618,6 +1618,24 @@ pmaGetFreeMemory
NvU64 *pBytesFree
)
{
#if !defined(SRT_BUILD)
NvU64 val;
portSyncSpinlockAcquire(pPma->pPmaLock);
NvBool nodeOnlined = pPma->nodeOnlined;
portSyncSpinlockRelease(pPma->pPmaLock);
if (nodeOnlined)
{
osGetNumaMemoryUsage(pPma->numaNodeId, pBytesFree, &val);
return;
}
//
// what to return when bNUMA == NV_TRUE and nodeOnlined==NV_FALSE?
// TODO : BUG 4199482.
//
#endif
portSyncSpinlockAcquire(pPma->pPmaLock);
*pBytesFree = pPma->pmaStats.numFreeFrames << PMA_PAGE_SHIFT;
@ -1638,6 +1656,24 @@ pmaGetTotalMemory
*pBytesTotal = 0;
#if !defined(SRT_BUILD)
NvU64 val;
portSyncSpinlockAcquire(pPma->pPmaLock);
NvBool nodeOnlined = pPma->nodeOnlined;
portSyncSpinlockRelease(pPma->pPmaLock);
if (nodeOnlined)
{
osGetNumaMemoryUsage(pPma->numaNodeId, &val, pBytesTotal);
return;
}
//
// what to return when bNUMA == NV_TRUE and nodeOnlined==NV_FALSE?
// TODO : BUG 4199482.
//
#endif
for (i = 0; i < pPma->regSize; i++)
{
pMap = pPma->pRegions[i];

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -23,15 +23,24 @@
#include "core/core.h"
#include "gpu/gpu.h"
#include "nvtypes.h"
#include "os/os.h"
#include "kernel/gpu/mem_sys/kern_mem_sys.h"
#include "gpu/mem_mgr/mem_desc.h"
#include "gpu/bus/kern_bus.h"
#include "kernel/gpu/intr/intr.h"
#include "nverror.h"
#include "published/hopper/gh100/dev_fb.h"
#include "published/hopper/gh100/dev_ltc.h"
#include "published/hopper/gh100/dev_fbpa.h"
#include "published/hopper/gh100/dev_vm.h"
#include "published/hopper/gh100/pri_nv_xal_ep.h"
#include "published/hopper/gh100/dev_nv_xal_addendum.h"
#include "published/hopper/gh100/dev_nv_xpl.h"
#include "published/hopper/gh100/dev_xtl_ep_pri.h"
#include "published/hopper/gh100/hwproject.h"
#include "published/ampere/ga100/dev_fb.h"
NV_STATUS
kmemsysDoCacheOp_GH100
@ -566,3 +575,168 @@ kmemsysSwizzIdToVmmuSegmentsRange_GH100
return NV_OK;
}
/*!
* Utility function used to read registers and ignore PRI errors
*/
static NvU32
_kmemsysReadRegAndMaskPriError
(
OBJGPU *pGpu,
NvU32 regAddr
)
{
NvU32 regVal;
regVal = osGpuReadReg032(pGpu, regAddr);
if ((regVal & GPU_READ_PRI_ERROR_MASK) == GPU_READ_PRI_ERROR_CODE)
{
return 0;
}
return regVal;
}
/*
* @brief Function that checks if ECC error occurred by reading various count
* registers/interrupt registers. This function is not floorsweeping-aware so
* PRI errors are ignored
*/
void
kmemsysCheckEccCounts_GH100
(
OBJGPU *pGpu,
KernelMemorySystem *pKernelMemorySystem
)
{
NvU32 dramCount = 0;
NvU32 mmuCount = 0;
NvU32 ltcCount = 0;
NvU32 pcieCount = 0;
NvU32 regVal;
for (NvU32 i = 0; i < NV_SCAL_LITTER_NUM_FBPAS; i++)
{
for (NvU32 j = 0; j < NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1; j++)
{
// DRAM count read
dramCount += _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_FBPA_0_ECC_DED_COUNT(j) + (i * NV_FBPA_PRI_STRIDE));
// LTC count read
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT +
(i * NV_LTC_PRI_STRIDE) + (j * NV_LTS_PRI_STRIDE));
ltcCount += DRF_VAL(_PLTCG_LTC0_LTS0, _L2_CACHE_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
}
}
// L2TLB
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT);
mmuCount += DRF_VAL(_PFB_PRI_MMU, _L2TLB_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
// HUBTLB
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT);
mmuCount += DRF_VAL(_PFB_PRI_MMU, _HUBTLB_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
// FILLUNIT
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT);
mmuCount += DRF_VAL(_PFB_PRI_MMU, _FILLUNIT_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
// PCIE RBUF
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_COUNT_RBUF);
pcieCount += DRF_VAL(_XPL_DL, _ERR_COUNT_RBUF, _UNCORR_ERR, regVal);
// PCIE SEQ_LUT
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_COUNT_SEQ_LUT);
pcieCount += DRF_VAL(_XPL_DL, _ERR_COUNT_SEQ_LUT, _UNCORR_ERR, regVal);
// PCIE RE ORDER
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT);
pcieCount += DRF_VAL(_XAL_EP, _REORDER_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
// PCIE P2PREQ
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT);
pcieCount += DRF_VAL(_XAL_EP, _P2PREQ_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
// PCIE XTL
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_DED_ERROR_STATUS);
if (regVal != 0)
{
pcieCount += 1;
}
// PCIE XTL
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS);
if (regVal != 0)
{
pcieCount += 1;
}
// If counts > 0 or if poison interrupt pending, ECC error has occurred.
if (((dramCount + ltcCount + mmuCount + pcieCount) != 0) ||
intrIsVectorPending_HAL(pGpu, GPU_GET_INTR(pGpu), NV_PFB_FBHUB_POISON_INTR_VECTOR_HW_INIT, NULL))
{
nvErrorLog_va((void *)pGpu, UNRECOVERABLE_ECC_ERROR_ESCAPE,
"An uncorrectable ECC error detected "
"(possible firmware handling failure) "
"DRAM:%d, LTC:%d, MMU:%d, PCIE:%d", dramCount, ltcCount, mmuCount, pcieCount);
}
}
/*
* @brief Function that clears ECC error count registers.
*/
NV_STATUS
kmemsysClearEccCounts_GH100
(
OBJGPU *pGpu,
KernelMemorySystem *pKernelMemorySystem
)
{
NvU32 regVal = 0;
RMTIMEOUT timeout;
NV_STATUS status = NV_OK;
gpuClearFbhubPoisonIntrForBug2924523_HAL(pGpu);
for (NvU32 i = 0; i < NV_SCAL_LITTER_NUM_FBPAS; i++)
{
for (NvU32 j = 0; j < NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1; j++)
{
osGpuWriteReg032(pGpu, NV_PFB_FBPA_0_ECC_DED_COUNT(j) + (i * NV_FBPA_PRI_STRIDE), 0);
osGpuWriteReg032(pGpu, NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT + (i * NV_LTC_PRI_STRIDE) + (j * NV_LTS_PRI_STRIDE), 0);
}
}
// Reset MMU counts
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT, 0);
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT, 0);
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT, 0);
// Reset XAL-EP counts
osGpuWriteReg032(pGpu, NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT, 0);
osGpuWriteReg032(pGpu, NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT, 0);
// Reset XTL-EP status registers
osGpuWriteReg032(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_DED_ERROR_STATUS, ~0);
osGpuWriteReg032(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS, ~0);
// Reset XPL-EP error counters
regVal = DRF_DEF(_XPL, _DL_ERR_RESET, _RBUF_UNCORR_ERR_COUNT, _PENDING) |
DRF_DEF(_XPL, _DL_ERR_RESET, _SEQ_LUT_UNCORR_ERR_COUNT, _PENDING);
osGpuWriteReg032(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_RESET, regVal);
// Wait for the error counter reset to complete
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
for (;;)
{
status = gpuCheckTimeout(pGpu, &timeout);
regVal = osGpuReadReg032(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_RESET);
if (FLD_TEST_DRF(_XPL, _DL_ERR_RESET, _RBUF_UNCORR_ERR_COUNT, _DONE, regVal) &&
FLD_TEST_DRF(_XPL, _DL_ERR_RESET, _SEQ_LUT_UNCORR_ERR_COUNT, _DONE, regVal))
break;
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@ -5800,6 +5800,7 @@ kmigmgrInitGPUInstanceBufPools_IMPL
{
Heap *pHeap;
MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap;
NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE);
@ -5815,7 +5816,12 @@ kmigmgrInitGPUInstanceBufPools_IMPL
// This is just a sanity check to make sure this assumption is correct and
// allocation from PMA cannot trigger UVM evictions.
//
if (memmgrIsPmaInitialized(pMemoryManager))
// When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
// and hence free memory can not be expected to be same as total memory.
//
if (memmgrIsPmaInitialized(pMemoryManager) &&
(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig) == NV_OK) &&
!(pmaConfig & PMA_QUERY_NUMA_ONLINED))
{
NvU64 freeSpace, totalSpace;
pmaGetFreeMemory(&pHeap->pmaObject, &freeSpace);

View File

@ -93,6 +93,7 @@ _memoryfabricValidatePhysMem
MEMORY_DESCRIPTOR *pPhysMemDesc;
NvU64 physPageSize;
NV_STATUS status;
Memory *pMemory;
if (hPhysMem == 0)
{
@ -110,7 +111,19 @@ _memoryfabricValidatePhysMem
return status;
}
pPhysMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
pMemory = dynamicCast(pPhysmemRef->pResource, Memory);
if (pMemory == NULL)
{
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
return NV_ERR_INVALID_OBJECT_HANDLE;
}
pPhysMemDesc = pMemory->pMemDesc;
if (pPhysMemDesc == NULL)
{
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
return NV_ERR_INVALID_OBJECT_HANDLE;
}
if ((pOwnerGpu != pPhysMemDesc->pGpu) ||
!memmgrIsApertureSupportedByFla_HAL(pOwnerGpu, pMemoryManager,

View File

@ -218,7 +218,7 @@ _memMulticastFabricDescriptorDequeueWaitUnderLock
}
}
NV_STATUS
static NV_STATUS
_memMulticastFabricGpuInfoAddUnderLock
(
MemoryMulticastFabric *pMemoryMulticastFabric,
@ -1027,8 +1027,8 @@ memorymulticastfabricConstruct_IMPL
return status;
}
NV_STATUS
memorymulticastfabricCtrlAttachGpu_IMPL
static NV_STATUS
_memorymulticastfabricCtrlAttachGpu
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
@ -1041,14 +1041,13 @@ memorymulticastfabricCtrlAttachGpu_IMPL
OBJGPU *pGpu;
FABRIC_VASPACE *pFabricVAS;
NvU64 gpuProbeHandle;
MEM_MULTICAST_FABRIC_GPU_INFO *pNode = \
listTail(&pMulticastFabricDesc->gpuInfoList);
MEM_MULTICAST_FABRIC_GPU_INFO *pNode = NULL;
CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
if (pParams->flags != 0)
{
NV_PRINTF(LEVEL_ERROR, "flags passed for attach mem must be zero\n");
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
return NV_ERR_INVALID_ARGUMENT;
}
// Check if the Multicast FLA object has any additional slots for GPUs
@ -1070,10 +1069,19 @@ memorymulticastfabricCtrlAttachGpu_IMPL
{
NV_PRINTF(LEVEL_ERROR,
"Multicast attach not supported on Windows/CC/vGPU modes\n");
status = NV_ERR_NOT_SUPPORTED;
goto fail;
return NV_ERR_NOT_SUPPORTED;
}
status = _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric,
pCallContext->pControlParams);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to populate GPU info\n");
return status;
}
pNode = listTail(&pMulticastFabricDesc->gpuInfoList);
status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
&gpuProbeHandle);
if (status != NV_OK)
@ -1119,6 +1127,26 @@ fail:
return status;
}
NV_STATUS
memorymulticastfabricCtrlAttachGpu_IMPL
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
)
{
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
fabricMulticastFabricOpsMutexAcquire(pFabric);
status = _memorymulticastfabricCtrlAttachGpu(pMemoryMulticastFabric,
pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
}
static MEM_MULTICAST_FABRIC_GPU_INFO*
_memorymulticastfabricGetAttchedGpuInfo
(
@ -1148,8 +1176,8 @@ _memorymulticastfabricGetAttchedGpuInfo
return NULL;
}
NV_STATUS
memorymulticastfabricCtrlDetachMem_IMPL
static NV_STATUS
_memorymulticastfabricCtrlDetachMem
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
@ -1189,6 +1217,26 @@ memorymulticastfabricCtrlDetachMem_IMPL
return NV_OK;
}
NV_STATUS
memorymulticastfabricCtrlDetachMem_IMPL
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
)
{
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
fabricMulticastFabricOpsMutexAcquire(pFabric);
status = _memorymulticastfabricCtrlDetachMem(pMemoryMulticastFabric,
pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
}
static NV_STATUS
_memorymulticastfabricValidatePhysMem
(
@ -1202,6 +1250,7 @@ _memorymulticastfabricValidatePhysMem
MEMORY_DESCRIPTOR *pPhysMemDesc;
NvU64 physPageSize;
NV_STATUS status;
Memory *pMemory;
status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
hPhysMem, &pPhysmemRef);
@ -1213,7 +1262,19 @@ _memorymulticastfabricValidatePhysMem
return status;
}
pPhysMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
pMemory = dynamicCast(pPhysmemRef->pResource, Memory);
if (pMemory == NULL)
{
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
return NV_ERR_INVALID_OBJECT_HANDLE;
}
pPhysMemDesc = pMemory->pMemDesc;
if (pPhysMemDesc == NULL)
{
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
return NV_ERR_INVALID_OBJECT_HANDLE;
}
if (memdescGetAddressSpace(pPhysMemDesc) != ADDR_FBMEM ||
(pAttachedGpu != pPhysMemDesc->pGpu))
@ -1237,8 +1298,8 @@ _memorymulticastfabricValidatePhysMem
return NV_OK;
}
NV_STATUS
memorymulticastfabricCtrlAttachMem_IMPL
static NV_STATUS
_memorymulticastfabricCtrlAttachMem
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
@ -1342,6 +1403,26 @@ freeDupedMem:
return status;
}
NV_STATUS
memorymulticastfabricCtrlAttachMem_IMPL
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
)
{
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
fabricMulticastFabricOpsMutexAcquire(pFabric);
status = _memorymulticastfabricCtrlAttachMem(pMemoryMulticastFabric,
pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
}
void
memorymulticastfabricDestruct_IMPL
(
@ -1393,8 +1474,8 @@ memorymulticastfabricCopyConstruct_IMPL
return NV_OK;
}
NV_STATUS
memorymulticastfabricCtrlGetInfo_IMPL
static NV_STATUS
_memorymulticastfabricCtrlGetInfo
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_GET_INFO_PARAMS *pParams
@ -1413,6 +1494,26 @@ memorymulticastfabricCtrlGetInfo_IMPL
return NV_OK;
}
NV_STATUS
memorymulticastfabricCtrlGetInfo_IMPL
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_GET_INFO_PARAMS *pParams
)
{
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
fabricMulticastFabricOpsMutexAcquire(pFabric);
status = _memorymulticastfabricCtrlGetInfo(pMemoryMulticastFabric,
pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
}
NV_STATUS
memorymulticastfabricIsReady_IMPL
(
@ -1451,8 +1552,8 @@ memorymulticastfabricIsReady_IMPL
return mcTeamStatus;
}
NV_STATUS
memorymulticastfabricCtrlRegisterEvent_IMPL
static NV_STATUS
_memorymulticastfabricCtrlRegisterEvent
(
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
@ -1467,20 +1568,23 @@ memorymulticastfabricCtrlRegisterEvent_IMPL
}
NV_STATUS
memorymulticastfabricControl_Prologue_IMPL
memorymulticastfabricCtrlRegisterEvent_IMPL
(
MemoryMulticastFabric *pMemoryMulticastFabric,
CALL_CONTEXT *pCallContext,
RS_RES_CONTROL_PARAMS_INTERNAL *pParams
MemoryMulticastFabric *pMemoryMulticastFabric,
NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
)
{
RmResource *pResource = staticCast(pMemoryMulticastFabric, RmResource);
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
// Other control calls, nothing to be validated.
if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
return rmresControl_Prologue_IMPL(pResource, pCallContext, pParams);
fabricMulticastFabricOpsMutexAcquire(pFabric);
return _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric, pParams);
status = _memorymulticastfabricCtrlRegisterEvent(pMemoryMulticastFabric,
pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
}
NV_STATUS
@ -1491,7 +1595,6 @@ memorymulticastfabricControl_IMPL
RS_RES_CONTROL_PARAMS_INTERNAL *pParams
)
{
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
NV_STATUS status = NV_OK;
if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
@ -1522,14 +1625,13 @@ memorymulticastfabricControl_IMPL
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
}
fabricMulticastFabricOpsMutexAcquire(pFabric);
status = resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
pCallContext, pParams);
fabricMulticastFabricOpsMutexRelease(pFabric);
return status;
//
// Note: GPU lock(s) is required for some control calls. Thus, it is
// incorrect to take the leaf lock here. resControl_IMPL() attempts to
// acquire the GPU locks before it calls the control call body.
//
return resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
pCallContext, pParams);
}
NvBool

View File

@ -82,6 +82,8 @@ rmclientConstruct_IMPL
pClient->pSecurityToken = NULL;
pClient->pOSInfo = pSecInfo->clientOSInfo;
pClient->cachedPrivilege = pSecInfo->privLevel;
// TODO: Revisit in M2, see GPUSWSEC-1176
if (RMCFG_FEATURE_PLATFORM_GSP && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
{
@ -96,10 +98,10 @@ rmclientConstruct_IMPL
else
{
pClient->ProcID = osGetCurrentProcess();
if (pClient->cachedPrivilege <= RS_PRIV_LEVEL_USER_ROOT)
pClient->pOsPidInfo = osGetPidInfo();
}
pClient->cachedPrivilege = pSecInfo->privLevel;
// Set user-friendly client name from current process
osGetCurrentProcessName(pClient->name, NV_PROC_NAME_MAX_LENGTH);
@ -128,7 +130,7 @@ rmclientConstruct_IMPL
{
NV_PRINTF(LEVEL_WARNING,
"NVRM_RPC: Failed to set host client resource handle range %x\n", status);
return status;
goto out;
}
}
@ -139,7 +141,7 @@ rmclientConstruct_IMPL
{
NV_PRINTF(LEVEL_WARNING,
"Failed to set host client restricted resource handle range. Status=%x\n", status);
return status;
goto out;
}
if (!rmGpuLockIsOwner())
@ -148,7 +150,7 @@ rmclientConstruct_IMPL
if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_CLIENT)) != NV_OK)
{
NV_ASSERT(0);
return status;
goto out;
}
bReleaseLock = NV_TRUE;
}
@ -206,6 +208,13 @@ rmclientConstruct_IMPL
if (status == NV_OK && pParams->pAllocParams != NULL)
*(NvHandle*)(pParams->pAllocParams) = pParams->hClient;
out:
if (status != NV_OK)
{
osPutPidInfo(pClient->pOsPidInfo);
pClient->pOsPidInfo = NULL;
}
return status;
}
@ -230,6 +239,8 @@ rmclientDestruct_IMPL
// Free any association of the client with existing third-party p2p object
CliUnregisterFromThirdPartyP2P(pClient);
osPutPidInfo(pClient->pOsPidInfo);
//
// Free all of the devices of the client (do it in reverse order to
// facilitate tear down of things like ctxdmas, etc)

View File

@ -1013,7 +1013,7 @@ _rmapiControlWithSecInfoTlsIRQL
NV_STATUS status;
THREAD_STATE_NODE threadState;
NvU8 stackAllocator[TLS_ISR_ALLOCATOR_SIZE];
NvU8 stackAllocator[2*TLS_ISR_ALLOCATOR_SIZE];
PORT_MEM_ALLOCATOR* pIsrAllocator = portMemAllocatorCreateOnExistingBlock(stackAllocator, sizeof(stackAllocator));
tlsIsrInit(pIsrAllocator);

View File

@ -142,6 +142,7 @@ static CrashCatBufferDescriptor *_crashcatEngineCreateBufferDescriptor
portMemSet(pBufDesc, 0, sizeof(*pBufDesc));
pBufDesc->bRegistered = NV_FALSE;
pBufDesc->aperture = aperture;
pBufDesc->physOffset = offset;
pBufDesc->size = size;
@ -315,6 +316,8 @@ void *crashcatEngineMapCrashBuffer_IMPL
//
if (!pBufDesc->bRegistered)
_crashcatEngineDestroyBufferDescriptor(pCrashCatEng, pBufDesc);
return NULL;
}
return pBufDesc->pMapping;

View File

@ -69,7 +69,11 @@ void crashcatReportLogReporter_V1_GENERIC(CrashCatReport *pReport)
NvCrashCatNvriscvUcodeId ucodeId = crashcatReportV1ReporterUcodeId(pReportV1);
NV_CRASHCAT_RISCV_MODE riscvMode = crashcatReportV1ReporterMode(pReportV1);
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
//
// Though this is technically not a separate packet, we use the CRASHCAT_REPORT_LOG_PACKET_TYPE
// macro to get the correct prefix/indentation for the reporter information.
//
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport,
"Reported by partition:%u ucode:%u [%c-mode] version:%u @ %u",
partition, ucodeId, crashcatReportModeToChar_GENERIC(riscvMode),
crashcatReportV1ReporterVersion(pReportV1),

View File

@ -114,10 +114,13 @@ void crashcatReportLogReporter_V1_LIBOS2(CrashCatReport *pReport)
NvCrashCatReport_V1 *pReportV1 = &pReport->v1.report;
NvU8 taskId = crashcatReportV1ReporterLibos2TaskId(pReportV1);
//
// Though this is technically not a separate packet, we use the CRASHCAT_REPORT_LOG_PACKET_TYPE
// macro to get the correct prefix/indentation for the reporter information.
//
if (taskId == NV_CRASHCAT_REPORT_V1_REPORTER_ID_LIBOS2_TASK_ID_UNSPECIFIED)
{
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
"Reported by libos kernel v%u.%u [%u] @ %u",
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport, "Reported by libos kernel v%u.%u [%u] @ %u",
crashcatReportV1ReporterVersionLibos2Major(pReportV1),
crashcatReportV1ReporterVersionLibos2Minor(pReportV1),
crashcatReportV1ReporterVersionLibos2Cl(pReportV1),
@ -125,8 +128,7 @@ void crashcatReportLogReporter_V1_LIBOS2(CrashCatReport *pReport)
}
else
{
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
"Reported by libos task:%u v%u.%u [%u] @ ts:%u",
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport, "Reported by libos task:%u v%u.%u [%u] @ ts:%u",
taskId, crashcatReportV1ReporterVersionLibos2Major(pReportV1),
crashcatReportV1ReporterVersionLibos2Minor(pReportV1),
crashcatReportV1ReporterVersionLibos2Cl(pReportV1),

View File

@ -223,9 +223,12 @@ mmuWalkFindLevel
)
{
const MMU_WALK_LEVEL *pLevel = &pWalk->root;
while (pLevel->pFmt != pLevelFmt)
while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
{
NvU32 subLevel;
NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
// Single sub-level always continues.
if (1 == pLevel->pFmt->numSubLevels)
{

View File

@ -1444,6 +1444,14 @@ _portMemAllocatorCreateOnExistingBlock
pAllocator->pTracking = NULL; // No tracking for this allocator
pAllocator->pImpl = (PORT_MEM_ALLOCATOR_IMPL*)(pAllocator + 1);
//
// PORT_MEM_BITVECTOR (pAllocator->pImpl) and PORT_MEM_ALLOCATOR_TRACKING (pAllocator->pImpl->tracking)
// are mutually exclusively used.
// When pAllocator->pTracking = NULL the data in pAllocator->pImpl->tracking is not used and instead
// pBitVector uses the same meory location.
// When pAllocator->pImpl->tracking there is no usage of PORT_MEM_BITVECTOR
//
pBitVector = (PORT_MEM_BITVECTOR*)(pAllocator->pImpl);
pBitVector->pSpinlock = pSpinlock;
@ -1544,6 +1552,10 @@ _portMemAllocatorAllocExistingWrapper
{
portSyncSpinlockRelease(pSpinlock);
}
if (pMem == NULL)
{
PORT_MEM_PRINT_ERROR("Memory allocation failed.\n");
}
return pMem;
}

View File

@ -1,4 +1,4 @@
NVIDIA_VERSION = 535.104.05
NVIDIA_VERSION = 535.113.01
# This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))