423 lines
16 KiB
C
423 lines
16 KiB
C
/*******************************************************************************
|
|
Copyright (c) 2015-2024 NVIDIA Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to
|
|
deal in the Software without restriction, including without limitation the
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*******************************************************************************/
|
|
|
|
#ifndef __UVM_GLOBAL_H__
|
|
#define __UVM_GLOBAL_H__
|
|
|
|
#include "nv_uvm_types.h"
|
|
#include "uvm_extern_decl.h"
|
|
#include "uvm_linux.h"
|
|
#include "uvm_common.h"
|
|
#include "uvm_processors.h"
|
|
#include "uvm_gpu.h"
|
|
#include "uvm_lock.h"
|
|
#include "uvm_ats_ibm.h"
|
|
|
|
// Global state of the uvm driver
|
|
struct uvm_global_struct
|
|
{
|
|
// Mask of retained GPUs.
|
|
// Note that GPUs are added to this mask as the last step of add_gpu() and
|
|
// removed from it as the first step of remove_gpu() implying that a GPU
|
|
// that's being initialized or deinitialized will not be in it.
|
|
uvm_processor_mask_t retained_gpus;
|
|
|
|
// Array of the parent GPUs registered with UVM. Note that GPUs will have
|
|
// ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
|
|
// will have GPU id = 1. A GPU entry is unused iff it does not exist
|
|
// (is a NULL pointer) in this table.
|
|
uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];
|
|
|
|
// A global RM session (RM client)
|
|
// Created on module load and destroyed on module unload
|
|
uvmGpuSessionHandle rm_session_handle;
|
|
|
|
// Peer-to-peer table for storing parent GPU and MIG instance peer info.
|
|
// Note that MIG instances can be peers within a single parent GPU or
|
|
// be peers in different parent GPUs if NVLINK or PCIe peers is enabled.
|
|
// PCIe and MIG peer info is added and removed from this table when
|
|
// usermode driver calls UvmEnablePeerAccess() and UvmDisablePeerAccess()
|
|
// respectively. NvLink and MIG peers are updated when UvmRegisterGpu() and
|
|
// UvmUnregisterGpu() are called. Peer to peer state for MIG instances
|
|
// within the same parent GPU are not stored here.
|
|
uvm_parent_gpu_peer_t parent_gpu_peers[UVM_MAX_UNIQUE_PARENT_GPU_PAIRS];
|
|
|
|
// peer-to-peer copy mode
|
|
// Pascal+ GPUs support virtual addresses in p2p copies.
|
|
// Ampere+ GPUs add support for physical addresses in p2p copies.
|
|
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
|
|
|
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse
|
|
// to do most anything other than try and clean up as much as possible.
|
|
// An example of a fatal error is an unrecoverable ECC error on one of the
|
|
// GPUs.
|
|
atomic_t fatal_error;
|
|
|
|
// A flag to disable the assert on fatal error
|
|
// To be used by tests and only consulted if tests are enabled.
|
|
bool disable_fatal_error_assert;
|
|
|
|
// Lock protecting the global state
|
|
uvm_mutex_t global_lock;
|
|
|
|
struct
|
|
{
|
|
// Lock synchronizing user threads with power management activity
|
|
uvm_rw_semaphore_t lock;
|
|
|
|
// Power management state flag; tested by UVM_GPU_WRITE_ONCE()
|
|
// and UVM_GPU_READ_ONCE() to detect accesses to GPUs when
|
|
// UVM is suspended.
|
|
bool is_suspended;
|
|
} pm;
|
|
|
|
// This lock synchronizes addition and removal of GPUs from UVM's global
|
|
// table. It must be held whenever g_uvm_global.parent_gpus[] is written. In
|
|
// order to read from this table, you must hold either the gpu_table_lock,
|
|
// or the global_lock.
|
|
//
|
|
// This is a leaf lock.
|
|
uvm_spinlock_irqsave_t gpu_table_lock;
|
|
|
|
// Number of simulated/emulated devices that have registered with UVM
|
|
unsigned num_simulated_devices;
|
|
|
|
// A single queue for deferred work that is non-GPU-specific.
|
|
nv_kthread_q_t global_q;
|
|
|
|
// A single queue for deferred f_ops->release() handling. Items scheduled to
|
|
// run on it may block for the duration of system sleep cycles, stalling
|
|
// the queue and preventing any other items from running.
|
|
nv_kthread_q_t deferred_release_q;
|
|
|
|
struct
|
|
{
|
|
// Indicates whether the system HW supports ATS. This field is set once
|
|
// during global initialization (uvm_global_init), and can be read
|
|
// afterwards without acquiring any locks.
|
|
bool supported;
|
|
|
|
// On top of HW platform support, ATS support can be overridden using
|
|
// the module parameter uvm_ats_mode. This field is set once during
|
|
// global initialization (uvm_global_init), and can be read afterwards
|
|
// without acquiring any locks.
|
|
bool enabled;
|
|
} ats;
|
|
|
|
#if UVM_IBM_NPU_SUPPORTED()
|
|
// On IBM systems this array tracks the active NPUs (the NPUs which are
|
|
// attached to retained GPUs).
|
|
uvm_ibm_npu_t npus[NV_MAX_NPUS];
|
|
#endif
|
|
|
|
// List of all active VA spaces
|
|
struct
|
|
{
|
|
uvm_mutex_t lock;
|
|
struct list_head list;
|
|
} va_spaces;
|
|
|
|
// Notify a registered process about the driver state after it's unloaded.
|
|
// The intent is to systematically report any error during the driver
|
|
// teardown. unload_state is used for testing only.
|
|
struct
|
|
{
|
|
// ptr points to a 8-byte buffer within page.
|
|
NvU64 *ptr;
|
|
struct page *page;
|
|
} unload_state;
|
|
|
|
// True if the VM has AMD's SEV, or equivalent HW security extensions such
|
|
// as Intel's TDX, enabled. The flag is always false on the host.
|
|
//
|
|
// This value moves in tandem with that of Confidential Computing in the
|
|
// GPU(s) in all supported configurations, so it is used as a proxy for the
|
|
// Confidential Computing state.
|
|
//
|
|
// This field is set once during global initialization (uvm_global_init),
|
|
// and can be read afterwards without acquiring any locks.
|
|
bool conf_computing_enabled;
|
|
};
|
|
|
|
// Initialize global uvm state
|
|
NV_STATUS uvm_global_init(void);
|
|
|
|
// Deinitialize global state (called from module exit)
|
|
void uvm_global_exit(void);
|
|
|
|
// Prepare for entry into a system sleep state
|
|
NV_STATUS uvm_suspend_entry(void);
|
|
|
|
// Recover after exit from a system sleep state
|
|
NV_STATUS uvm_resume_entry(void);
|
|
|
|
// Add parent GPU to the global table.
|
|
//
|
|
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
|
static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
|
{
|
|
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
|
|
|
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
|
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
|
|
|
UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]);
|
|
g_uvm_global.parent_gpus[gpu_index] = parent_gpu;
|
|
}
|
|
|
|
// Remove parent GPU from the global table.
|
|
//
|
|
// LOCKING: requires that you hold the global lock and gpu_table_lock
|
|
static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
|
{
|
|
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
|
|
|
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
|
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
|
|
|
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
|
|
|
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
|
}
|
|
|
|
// Get a parent gpu by its id.
|
|
// Returns a pointer to the parent GPU object, or NULL if not found.
|
|
//
|
|
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
|
|
// retained at least one of the child GPUs.
|
|
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
|
|
{
|
|
return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
|
|
}
|
|
|
|
// Get a gpu by its GPU id.
|
|
// Returns a pointer to the GPU object, or NULL if not found.
|
|
//
|
|
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
|
|
// retained the gpu.
|
|
static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
|
|
{
|
|
uvm_parent_gpu_t *parent_gpu;
|
|
|
|
parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
|
|
if (!parent_gpu)
|
|
return NULL;
|
|
|
|
return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
|
|
}
|
|
|
|
static uvmGpuSessionHandle uvm_global_session_handle(void)
|
|
{
|
|
return g_uvm_global.rm_session_handle;
|
|
}
|
|
|
|
// Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources
|
|
// in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is
|
|
// suspended.
|
|
#define UVM_GPU_WRITE_ONCE(x, val) do { \
|
|
UVM_ASSERT(!uvm_global_is_suspended()); \
|
|
WRITE_ONCE(x, val); \
|
|
} while (0)
|
|
|
|
#define UVM_GPU_READ_ONCE(x) ({ \
|
|
UVM_ASSERT(!uvm_global_is_suspended()); \
|
|
READ_ONCE(x); \
|
|
})
|
|
|
|
static bool global_is_fatal_error_assert_disabled(void)
|
|
{
|
|
// Only allow the assert to be disabled if tests are enabled
|
|
if (!uvm_enable_builtin_tests)
|
|
return false;
|
|
|
|
return g_uvm_global.disable_fatal_error_assert;
|
|
}
|
|
|
|
// Set a global fatal error
|
|
// Once that happens the the driver should refuse to do anything other than try
|
|
// and clean up as much as possible.
|
|
// An example of a fatal error is an unrecoverable ECC error on one of the
|
|
// GPUs.
|
|
// Use a macro so that the assert below provides precise file and line info and
|
|
// a backtrace.
|
|
#define uvm_global_set_fatal_error(error) \
|
|
do { \
|
|
if (!global_is_fatal_error_assert_disabled()) \
|
|
UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error)); \
|
|
uvm_global_set_fatal_error_impl(error); \
|
|
} while (0)
|
|
void uvm_global_set_fatal_error_impl(NV_STATUS error);
|
|
|
|
// Get the global status
|
|
static NV_STATUS uvm_global_get_status(void)
|
|
{
|
|
return atomic_read(&g_uvm_global.fatal_error);
|
|
}
|
|
|
|
// Reset global fatal error
|
|
// This is to be used by tests triggering the global error on purpose only.
|
|
// Returns the value of the global error field that existed just before this
|
|
// reset call was made.
|
|
NV_STATUS uvm_global_reset_fatal_error(void);
|
|
|
|
static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
|
|
{
|
|
uvm_gpu_t *gpu;
|
|
uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);
|
|
|
|
if (UVM_ID_IS_INVALID(gpu_id))
|
|
return NULL;
|
|
|
|
gpu = uvm_gpu_get(gpu_id);
|
|
|
|
// If there is valid GPU id in the mask, assert that the corresponding
|
|
// uvm_gpu_t is present. Otherwise it would stop a
|
|
// for_each_gpu_in_mask() loop pre-maturely. Today, this could only
|
|
// happen in remove_gpu() because the GPU being removed is deleted from the
|
|
// global table very early.
|
|
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
|
|
|
|
return gpu;
|
|
}
|
|
|
|
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
|
|
{
|
|
uvm_gpu_id_t gpu_id;
|
|
|
|
UVM_ASSERT(gpu);
|
|
|
|
gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
|
|
if (UVM_ID_IS_INVALID(gpu_id))
|
|
return NULL;
|
|
|
|
gpu = uvm_gpu_get(gpu_id);
|
|
|
|
// See comment in uvm_processor_mask_find_first_gpu().
|
|
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
|
|
|
|
return gpu;
|
|
}
|
|
|
|
// Helper to iterate over all GPUs in the input mask
|
|
#define for_each_gpu_in_mask(gpu, mask) \
|
|
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
|
|
gpu != NULL; \
|
|
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
|
|
|
|
// Helper to iterate over all GPUs retained by the UVM driver
|
|
// (across all va spaces).
|
|
#define for_each_gpu(gpu) \
|
|
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
|
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
|
|
gpu != NULL; \
|
|
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
|
|
|
|
// LOCKING: Must hold either the global_lock or the gpu_table_lock
|
|
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
|
{
|
|
NvU32 i;
|
|
|
|
if (parent_gpu) {
|
|
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
|
|
i = gpu_index + 1;
|
|
}
|
|
else {
|
|
i = 0;
|
|
}
|
|
|
|
parent_gpu = NULL;
|
|
|
|
while (i < UVM_PARENT_ID_MAX_GPUS) {
|
|
if (g_uvm_global.parent_gpus[i]) {
|
|
parent_gpu = g_uvm_global.parent_gpus[i];
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return parent_gpu;
|
|
}
|
|
|
|
// LOCKING: Must hold the global_lock
|
|
static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
|
|
{
|
|
uvm_gpu_t *gpu = NULL;
|
|
uvm_gpu_id_t gpu_id;
|
|
NvU32 sub_processor_index;
|
|
NvU32 cur_sub_processor_index;
|
|
|
|
UVM_ASSERT(parent_gpu);
|
|
|
|
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
|
|
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
|
|
|
|
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
|
|
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
|
|
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
|
|
UVM_ASSERT(gpu != NULL);
|
|
}
|
|
|
|
return gpu;
|
|
}
|
|
|
|
// LOCKING: Must hold either the global_lock or the gpu_table_lock
|
|
#define for_each_parent_gpu(parent_gpu) \
|
|
for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL); \
|
|
(parent_gpu) != NULL; \
|
|
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
|
|
|
|
// LOCKING: Must hold the global_lock
|
|
#define for_each_gpu_in_parent(gpu, parent_gpu) \
|
|
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
|
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
|
|
(gpu) != NULL; \
|
|
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
|
|
|
|
// Helper which calls uvm_gpu_retain() on each GPU in mask.
|
|
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);
|
|
|
|
// Helper which calls uvm_gpu_release_locked on each GPU in mask.
|
|
//
|
|
// LOCKING: this function takes and releases the global lock if the input mask
|
|
// is not empty
|
|
void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
|
|
|
|
// Check for ECC errors for all GPUs in a mask
|
|
// Notably this check cannot be performed where it's not safe to call into RM.
|
|
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
|
|
|
|
// Pre-allocate fault service contexts.
|
|
NV_STATUS uvm_service_block_context_init(void);
|
|
|
|
// Release fault service contexts if any exist.
|
|
void uvm_service_block_context_exit(void);
|
|
|
|
// Allocate a service block context
|
|
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);
|
|
|
|
// Free a servic block context
|
|
void uvm_service_block_context_free(uvm_service_block_context_t *service_context);
|
|
|
|
#endif // __UVM_GLOBAL_H__
|