nvidia-open-gpu-kernel-modules/kernel-open/nvidia-uvm/uvm_common.c

339 lines
9.9 KiB
C

/*******************************************************************************
Copyright (c) 2013-2023 NVIDIA Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
*******************************************************************************/
#include "uvm_common.h"
#include "uvm_linux.h"
#include "uvm_forward_decl.h"
// TODO: Bug 1710855: Tweak this number through benchmarks
#define UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS (10*1000ULL)
#define UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC 30ULL
// Default to debug prints being enabled for debug and develop builds and
// disabled for release builds.
static int uvm_debug_prints = UVM_IS_DEBUG() || UVM_IS_DEVELOP();
// Make the module param writable so that prints can be enabled or disabled at
// any time by modifying the module parameter.
module_param(uvm_debug_prints, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_debug_prints, "Enable uvm debug prints.");
bool uvm_debug_prints_enabled(void)
{
return uvm_debug_prints != 0;
}
// This parameter allows a program in user mode to call the kernel tests
// defined in this module. This parameter should only be used for testing and
// must not be set to true otherwise since it breaks security when it is
// enabled. By default and for safety reasons this parameter is set to false.
int uvm_enable_builtin_tests __read_mostly = 0;
module_param(uvm_enable_builtin_tests, int, S_IRUGO);
MODULE_PARM_DESC(uvm_enable_builtin_tests,
"Enable the UVM built-in tests. (This is a security risk)");
// Default to release asserts being enabled.
int uvm_release_asserts __read_mostly = 1;
// Make the module param writable so that release asserts can be enabled or
// disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts, "Enable uvm asserts included in release builds.");
// Default to failed release asserts not dumping stack.
int uvm_release_asserts_dump_stack __read_mostly = 0;
// Make the module param writable so that dumping the stack can be enabled and
// disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts_dump_stack, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts_dump_stack, "dump_stack() on failed UVM release asserts.");
// Default to failed release asserts not setting the global UVM error.
int uvm_release_asserts_set_global_error __read_mostly = 0;
// Make the module param writable so that setting the global fatal error can be
// enabled and disabled at any time by modifying the module parameter.
module_param(uvm_release_asserts_set_global_error, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(uvm_release_asserts_set_global_error, "Set UVM global fatal error on failed release asserts.");
// A separate flag to enable setting global error, to be used by tests only.
bool uvm_release_asserts_set_global_error_for_tests __read_mostly = false;
//
// Convert kernel errno codes to corresponding NV_STATUS
//
NV_STATUS errno_to_nv_status(int errnoCode)
{
if (errnoCode < 0)
errnoCode = -errnoCode;
switch (errnoCode)
{
case 0:
return NV_OK;
case E2BIG:
case EINVAL:
return NV_ERR_INVALID_ARGUMENT;
case EACCES:
return NV_ERR_INVALID_ACCESS_TYPE;
case EADDRINUSE:
case EADDRNOTAVAIL:
return NV_ERR_UVM_ADDRESS_IN_USE;
case EFAULT:
return NV_ERR_INVALID_ADDRESS;
case EOVERFLOW:
return NV_ERR_OUT_OF_RANGE;
case EINTR:
case EBUSY:
case EAGAIN:
return NV_ERR_BUSY_RETRY;
case ENXIO:
case ENODEV:
return NV_ERR_MODULE_LOAD_FAILED;
case ENOMEM:
return NV_ERR_NO_MEMORY;
case EPERM:
return NV_ERR_INSUFFICIENT_PERMISSIONS;
case ESRCH:
return NV_ERR_PID_NOT_FOUND;
case ETIMEDOUT:
return NV_ERR_TIMEOUT;
case EEXIST:
return NV_ERR_IN_USE;
case ENOSYS:
case EOPNOTSUPP:
return NV_ERR_NOT_SUPPORTED;
case ENOENT:
return NV_ERR_NO_VALID_PATH;
case EIO:
return NV_ERR_RC_ERROR;
case ENODATA:
return NV_ERR_OBJECT_NOT_FOUND;
default:
return NV_ERR_GENERIC;
};
}
// Returns POSITIVE errno
int nv_status_to_errno(NV_STATUS status)
{
switch (status) {
case NV_OK:
return 0;
case NV_ERR_BUSY_RETRY:
return EAGAIN;
case NV_ERR_INSUFFICIENT_PERMISSIONS:
return EPERM;
case NV_ERR_GPU_UUID_NOT_FOUND:
return ENODEV;
case NV_ERR_INSUFFICIENT_RESOURCES:
case NV_ERR_NO_MEMORY:
return ENOMEM;
case NV_ERR_INVALID_ACCESS_TYPE:
return EACCES;
case NV_ERR_INVALID_ADDRESS:
return EFAULT;
case NV_ERR_INVALID_ARGUMENT:
case NV_ERR_INVALID_DEVICE:
case NV_ERR_INVALID_PARAMETER:
case NV_ERR_INVALID_REQUEST:
case NV_ERR_INVALID_STATE:
return EINVAL;
case NV_ERR_NOT_SUPPORTED:
return ENOSYS;
case NV_ERR_OBJECT_NOT_FOUND:
return ENODATA;
case NV_ERR_MODULE_LOAD_FAILED:
return ENXIO;
case NV_ERR_OVERLAPPING_UVM_COMMIT:
case NV_ERR_UVM_ADDRESS_IN_USE:
return EADDRINUSE;
case NV_ERR_PID_NOT_FOUND:
return ESRCH;
case NV_ERR_TIMEOUT:
case NV_ERR_TIMEOUT_RETRY:
return ETIMEDOUT;
case NV_ERR_IN_USE:
return EEXIST;
case NV_ERR_NO_VALID_PATH:
return ENOENT;
case NV_ERR_RC_ERROR:
case NV_ERR_ECC_ERROR:
return EIO;
case NV_ERR_OUT_OF_RANGE:
return EOVERFLOW;
default:
UVM_ASSERT_MSG(0, "No errno conversion set up for NV_STATUS %s\n", nvstatusToString(status));
return EINVAL;
}
}
//
// This routine retrieves the process ID of current, but makes no attempt to
// refcount or lock the pid in place.
//
unsigned uvm_get_stale_process_id(void)
{
return (unsigned)task_tgid_vnr(current);
}
unsigned uvm_get_stale_thread_id(void)
{
return (unsigned)task_pid_vnr(current);
}
void on_uvm_test_fail(void)
{
(void)NULL;
}
void on_uvm_assert(void)
{
(void)NULL;
#ifdef __COVERITY__
__coverity_panic__()
#endif
}
NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
{
NvU64 curr = NV_GETTIME();
// This schedule() is required for functionality, not just system
// performance. It allows RM to run and unblock the UVM driver:
//
// - UVM must service faults in order for RM to idle/preempt a context
// - RM must service interrupts which stall UVM (SW methods, stalling CE
// interrupts, etc) in order for UVM to service faults
//
// Even though UVM's bottom half is preemptable, we have encountered cases
// in which a user thread running in RM won't preempt the UVM driver's
// thread unless the UVM driver thread gives up its timeslice. This is also
// theoretically possible if the RM thread has a low nice priority.
//
// TODO: Bug 1710855: Look into proper prioritization of these threads as a longer-term
// solution.
if (curr - spin->start_time_ns >= UVM_SPIN_LOOP_SCHEDULE_TIMEOUT_NS && NV_MAY_SLEEP()) {
schedule();
curr = NV_GETTIME();
}
cpu_relax();
// TODO: Bug 1710855: Also check fatal_signal_pending() here if the caller can handle it.
if (curr - spin->print_time_ns >= 1000*1000*1000*UVM_SPIN_LOOP_PRINT_TIMEOUT_SEC) {
spin->print_time_ns = curr;
return NV_ERR_TIMEOUT_RETRY;
}
return NV_OK;
}
// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
// what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
// that we know that we have a real, binary formatted UUID that will work in
// the UVM APIs.
//
// It comes out like this:
//
// UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// This routine will always null-terminate the string for you. This is true
// even if the buffer was too small!
//
// Return value is the number of non-null characters written.
//
// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
// return it's default format, which is ascii, not binary, then you would get
// this back:
//
// GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
//
// ...which is actually a character string, and won't work for UVM API calls.
// So it's very important to be able to see the difference.
//
static char uvm_digit_to_hex(unsigned value)
{
if (value >= 10)
return value - 10 + 'a';
else
return value + '0';
}
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
{
char *str = buffer+8;
unsigned i;
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
return *buffer = 0;
memcpy(buffer, "UVM-GPU-", 8);
for (i = 0; i < 16; i++) {
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
if (dashMask & (1 << (i+1)))
*str++ = '-';
}
*str = 0;
return (int)(str-buffer);
}