nvidia-open-gpu-kernel-modules/kernel-open/nvidia/nv-vm.c

/*
 * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "os-interface.h"
#include "nv.h"
#include "nv-linux.h"

static inline void nv_set_contig_memory_uc(nvidia_pte_t *page_ptr, NvU32 num_pages)
{
#if defined(NV_SET_MEMORY_UC_PRESENT)
    struct page *page = NV_GET_PAGE_STRUCT(page_ptr->phys_addr);
    unsigned long addr = (unsigned long)page_address(page);
    set_memory_uc(addr, num_pages);
#elif defined(NV_SET_PAGES_UC_PRESENT)
    struct page *page = NV_GET_PAGE_STRUCT(page_ptr->phys_addr);
    set_pages_uc(page, num_pages);
#endif
}

static inline void nv_set_contig_memory_wb(nvidia_pte_t *page_ptr, NvU32 num_pages)
{
#if defined(NV_SET_MEMORY_UC_PRESENT)
    struct page *page = NV_GET_PAGE_STRUCT(page_ptr->phys_addr);
    unsigned long addr = (unsigned long)page_address(page);
    set_memory_wb(addr, num_pages);
#elif defined(NV_SET_PAGES_UC_PRESENT)
    struct page *page = NV_GET_PAGE_STRUCT(page_ptr->phys_addr);
    set_pages_wb(page, num_pages);
#endif
}

static inline int nv_set_memory_array_type_present(NvU32 type)
{
    switch (type)
    {
#if defined(NV_SET_MEMORY_ARRAY_UC_PRESENT)
        case NV_MEMORY_UNCACHED:
            return 1;
        case NV_MEMORY_WRITEBACK:
            return 1;
#endif
        default:
            return 0;
    }
}

static inline int nv_set_pages_array_type_present(NvU32 type)
{
    switch (type)
    {
#if defined(NV_SET_PAGES_ARRAY_UC_PRESENT)
        case NV_MEMORY_UNCACHED:
            return 1;
        case NV_MEMORY_WRITEBACK:
            return 1;
#endif
        default:
            return 0;
    }
}

static inline void nv_set_memory_array_type(
    unsigned long *pages,
    NvU32 num_pages,
    NvU32 type
)
{
    switch (type)
    {
#if defined(NV_SET_MEMORY_ARRAY_UC_PRESENT)
        case NV_MEMORY_UNCACHED:
            set_memory_array_uc(pages, num_pages);
            break;
        case NV_MEMORY_WRITEBACK:
            set_memory_array_wb(pages, num_pages);
            break;
#endif
        default:
            nv_printf(NV_DBG_ERRORS,
                "NVRM: %s(): type %d unimplemented\n",
                __FUNCTION__, type);
            break;
    }
}

static inline void nv_set_pages_array_type(
    struct page **pages,
    NvU32 num_pages,
    NvU32 type
)
{
    switch (type)
    {
#if defined(NV_SET_PAGES_ARRAY_UC_PRESENT)
        case NV_MEMORY_UNCACHED:
            set_pages_array_uc(pages, num_pages);
            break;
        case NV_MEMORY_WRITEBACK:
            set_pages_array_wb(pages, num_pages);
            break;
#endif
        default:
            nv_printf(NV_DBG_ERRORS,
                "NVRM: %s(): type %d unimplemented\n",
                __FUNCTION__, type);
            break;
    }
}

static inline void nv_set_contig_memory_type(
    nvidia_pte_t *page_ptr,
    NvU32 num_pages,
    NvU32 type
)
{
    switch (type)
    {
        case NV_MEMORY_UNCACHED:
            nv_set_contig_memory_uc(page_ptr, num_pages);
            break;
        case NV_MEMORY_WRITEBACK:
            nv_set_contig_memory_wb(page_ptr, num_pages);
            break;
        default:
            nv_printf(NV_DBG_ERRORS,
                "NVRM: %s(): type %d unimplemented\n",
                __FUNCTION__, type);
    }
}

static inline void nv_set_memory_type(nv_alloc_t *at, NvU32 type)
{
    NvU32 i;
    NV_STATUS status = NV_OK;
#if defined(NV_SET_MEMORY_ARRAY_UC_PRESENT)
    unsigned long *pages = NULL;
#elif defined(NV_SET_PAGES_ARRAY_UC_PRESENT)
    struct page **pages = NULL;
#else
    unsigned long *pages = NULL;
#endif

    nvidia_pte_t *page_ptr;
    struct page *page;

    if (nv_set_memory_array_type_present(type))
    {
        status = os_alloc_mem((void **)&pages,
                at->num_pages * sizeof(unsigned long));

    }
    else if (nv_set_pages_array_type_present(type))
    {
        status = os_alloc_mem((void **)&pages,
                at->num_pages * sizeof(struct page*));
    }

    if (status != NV_OK)
        pages = NULL;

    //
    // If the set_{memory,page}_array_* functions are in the kernel interface,
    // it's faster to use them since they work on non-contiguous memory,
    // whereas the set_{memory,page}_*  functions do not.
    //
    if (pages)
    {
        for (i = 0; i < at->num_pages; i++)
        {
            page_ptr = at->page_table[i];
            page = NV_GET_PAGE_STRUCT(page_ptr->phys_addr);
#if defined(NV_SET_MEMORY_ARRAY_UC_PRESENT)
            pages[i] = (unsigned long)page_address(page);
#elif defined(NV_SET_PAGES_ARRAY_UC_PRESENT)
            pages[i] = page;
#endif
        }
#if defined(NV_SET_MEMORY_ARRAY_UC_PRESENT)
        nv_set_memory_array_type(pages, at->num_pages, type);
#elif defined(NV_SET_PAGES_ARRAY_UC_PRESENT)
        nv_set_pages_array_type(pages, at->num_pages, type);
#endif
        os_free_mem(pages);
    }

    //
    // If the set_{memory,page}_array_* functions aren't present in the kernel
    // interface, each page has to be set individually, which has been measured
    // to be ~10x slower than using the set_{memory,page}_array_* functions.
    //
    else
    {
        for (i = 0; i < at->num_pages; i++)
            nv_set_contig_memory_type(at->page_table[i], 1, type);
    }
}

static NvU64 nv_get_max_sysmem_address(void)
{
    NvU64 global_max_pfn = 0ULL;
    int node_id;

    for_each_online_node(node_id)
    {
        global_max_pfn = max(global_max_pfn, (NvU64)node_end_pfn(node_id));
    }

    return ((global_max_pfn + 1) << PAGE_SHIFT) - 1;
}

static unsigned int nv_compute_gfp_mask(
    nv_state_t *nv,
    nv_alloc_t *at
)
{
    unsigned int gfp_mask = NV_GFP_KERNEL;
    struct device *dev = at->dev;

    /*
     * If we know that SWIOTLB is enabled (and therefore we avoid calling the
     * kernel to DMA-remap the pages), or if we are using dma_direct (which may
     * transparently use the SWIOTLB for pages that are unaddressable by the
     * device, in kernel versions 5.0 and later), limit our allocation pool
     * to the first 4GB to avoid allocating pages outside of our device's
     * addressable limit.
     * Also, limit the allocation to the first 4GB if explicitly requested by
     * setting the "nv->force_dma32_alloc" variable.
     */
    if (!nv || !nv_requires_dma_remap(nv) || nv_is_dma_direct(dev) || nv->force_dma32_alloc)
    {
        NvU64 max_sysmem_address = nv_get_max_sysmem_address();
        if ((dev && dev->dma_mask && (*(dev->dma_mask) < max_sysmem_address)) ||
            (nv && nv->force_dma32_alloc))
        {
            gfp_mask = NV_GFP_DMA32;
        }
    }
#if defined(__GFP_RETRY_MAYFAIL)
    gfp_mask |= __GFP_RETRY_MAYFAIL;
#elif defined(__GFP_NORETRY)
    gfp_mask |= __GFP_NORETRY;
#endif
#if defined(__GFP_ZERO)
    if (at->flags.zeroed)
        gfp_mask |= __GFP_ZERO;
#endif
#if defined(__GFP_THISNODE)
    if (at->flags.node)
        gfp_mask |= __GFP_THISNODE;
#endif
    // Compound pages are required by vm_insert_page for high-order page
    // allocations
    if (at->order > 0)
        gfp_mask |= __GFP_COMP;

    return gfp_mask;
}

/*
 * This function is needed for allocating contiguous physical memory in xen
 * dom0. Because of the use of xen sw iotlb in xen dom0, memory allocated by
 * NV_GET_FREE_PAGES may not be machine contiguous when size is more than
 * 1 page. nv_alloc_coherent_pages() will give us machine contiguous memory.
 * Even though we get dma_address directly in this function, we will
 * still call pci_map_page() later to get dma address. This is fine as it
 * will return the same machine address.
 */
static NV_STATUS nv_alloc_coherent_pages(
    nv_state_t *nv,
    nv_alloc_t *at
)
{
    nvidia_pte_t *page_ptr;
    NvU32 i;
    unsigned int gfp_mask;
    unsigned long virt_addr = 0;
    dma_addr_t bus_addr;
    nv_linux_state_t *nvl;
    struct device *dev;

    if (!nv)
    {
        nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: coherent page alloc on nvidiactl not supported\n", __FUNCTION__);
        return NV_ERR_NOT_SUPPORTED;
    }

    nvl = NV_GET_NVL_FROM_NV_STATE(nv);
    dev = nvl->dev;

    gfp_mask = nv_compute_gfp_mask(nv, at);

    virt_addr = (unsigned long)dma_alloc_coherent(dev,
                                                  at->num_pages * PAGE_SIZE,
                                                  &bus_addr,
                                                  gfp_mask);
    if (!virt_addr)
    {
        nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: failed to allocate memory\n", __FUNCTION__);
        return NV_ERR_NO_MEMORY;
    }

    for (i = 0; i < at->num_pages; i++)
    {
        page_ptr = at->page_table[i];

        page_ptr->virt_addr = virt_addr + i * PAGE_SIZE;
        page_ptr->phys_addr = virt_to_phys((void *)page_ptr->virt_addr);
        page_ptr->dma_addr  = bus_addr + i * PAGE_SIZE;
    }

    if (at->cache_type != NV_MEMORY_CACHED)
    {
        nv_set_contig_memory_type(at->page_table[0],
                                  at->num_pages,
                                  NV_MEMORY_UNCACHED);
    }

    at->flags.coherent = NV_TRUE;
    return NV_OK;
}

static void nv_free_coherent_pages(
    nv_alloc_t *at
)
{
    nvidia_pte_t *page_ptr;
    struct device *dev = at->dev;

    page_ptr = at->page_table[0];

    if (at->cache_type != NV_MEMORY_CACHED)
    {
        nv_set_contig_memory_type(at->page_table[0],
                                  at->num_pages,
                                  NV_MEMORY_WRITEBACK);
    }

    dma_free_coherent(dev, at->num_pages * PAGE_SIZE,
                      (void *)page_ptr->virt_addr, page_ptr->dma_addr);
}

NV_STATUS nv_alloc_contig_pages(
    nv_state_t *nv,
    nv_alloc_t *at
)
{
    NV_STATUS status;
    nvidia_pte_t *page_ptr;
    NvU32 i, j;
    unsigned int gfp_mask;
    unsigned long virt_addr = 0;
    NvU64 phys_addr;
    struct device *dev = at->dev;

    nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: %u pages\n", __FUNCTION__, at->num_pages);

    // TODO: This is a temporary WAR, and will be removed after fixing bug 200732409.
    if (os_is_xen_dom0() || at->flags.unencrypted)
        return nv_alloc_coherent_pages(nv, at);

    at->order = get_order(at->num_pages * PAGE_SIZE);
    gfp_mask = nv_compute_gfp_mask(nv, at);

    if (at->flags.node)
    {
        NV_ALLOC_PAGES_NODE(virt_addr, at->node_id, at->order, gfp_mask);
    }
    else
    {
        NV_GET_FREE_PAGES(virt_addr, at->order, gfp_mask);
    }
    if (virt_addr == 0)
    {
        if (os_is_vgx_hyper())
        {
            nv_printf(NV_DBG_MEMINFO,
                "NVRM: VM: %s: failed to allocate memory, trying coherent memory \n", __FUNCTION__);

            status = nv_alloc_coherent_pages(nv, at);
            return status;
        }

        nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: failed to allocate memory\n", __FUNCTION__);
        return NV_ERR_NO_MEMORY;
    }
#if !defined(__GFP_ZERO)
    if (at->flags.zeroed)
        memset((void *)virt_addr, 0, (at->num_pages * PAGE_SIZE));
#endif

    for (i = 0; i < at->num_pages; i++, virt_addr += PAGE_SIZE)
    {
        phys_addr = nv_get_kern_phys_address(virt_addr);
        if (phys_addr == 0)
        {
            nv_printf(NV_DBG_ERRORS,
                "NVRM: VM: %s: failed to look up physical address\n",
                __FUNCTION__);
            status = NV_ERR_OPERATING_SYSTEM;
            goto failed;
        }

        page_ptr = at->page_table[i];
        page_ptr->phys_addr = phys_addr;
        page_ptr->page_count = NV_GET_PAGE_COUNT(page_ptr);
        page_ptr->virt_addr = virt_addr;
        page_ptr->dma_addr = nv_phys_to_dma(dev, page_ptr->phys_addr);

        NV_MAYBE_RESERVE_PAGE(page_ptr);
    }

    if (at->cache_type != NV_MEMORY_CACHED)
    {
        nv_set_contig_memory_type(at->page_table[0],
                                  at->num_pages,
                                  NV_MEMORY_UNCACHED);
    }

    at->flags.coherent = NV_FALSE;

    return NV_OK;

failed:
    if (i > 0)
    {
        for (j = 0; j < i; j++)
            NV_MAYBE_UNRESERVE_PAGE(at->page_table[j]);
    }

    page_ptr = at->page_table[0];
    NV_FREE_PAGES(page_ptr->virt_addr, at->order);

    return status;
}

void nv_free_contig_pages(
    nv_alloc_t *at
)
{
    nvidia_pte_t *page_ptr;
    unsigned int i;

    nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: %u pages\n", __FUNCTION__, at->num_pages);

    if (at->flags.coherent)
        return nv_free_coherent_pages(at);

    if (at->cache_type != NV_MEMORY_CACHED)
    {
        nv_set_contig_memory_type(at->page_table[0],
                                  at->num_pages,
                                  NV_MEMORY_WRITEBACK);
    }

    for (i = 0; i < at->num_pages; i++)
    {
        page_ptr = at->page_table[i];

        if (NV_GET_PAGE_COUNT(page_ptr) != page_ptr->page_count)
        {
            static int count = 0;
            if (count++ < NV_MAX_RECURRING_WARNING_MESSAGES)
            {
                nv_printf(NV_DBG_ERRORS,
                    "NVRM: VM: %s: page count != initial page count (%u,%u)\n",
                    __FUNCTION__, NV_GET_PAGE_COUNT(page_ptr),
                    page_ptr->page_count);
            }
        }
        NV_MAYBE_UNRESERVE_PAGE(page_ptr);
    }

    page_ptr = at->page_table[0];

    NV_FREE_PAGES(page_ptr->virt_addr, at->order);
}

NV_STATUS nv_alloc_system_pages(
    nv_state_t *nv,
    nv_alloc_t *at
)
{
    NV_STATUS status;
    nvidia_pte_t *page_ptr;
    NvU32 i, j;
    unsigned int gfp_mask;
    unsigned long virt_addr = 0;
    NvU64 phys_addr;
    struct device *dev = at->dev;
    dma_addr_t bus_addr;

    // Order should be zero except for EGM allocations.
    unsigned int alloc_page_size = PAGE_SIZE << at->order;
    unsigned int alloc_page_shift = BIT_IDX_32(alloc_page_size);
    unsigned int alloc_num_pages = NV_CEIL(at->num_pages * PAGE_SIZE, alloc_page_size);

    unsigned int sub_page_idx;
    unsigned int sub_page_offset;
    unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;

    nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %u: %u order0 pages, %u order\n", __FUNCTION__, at->num_pages, at->order);

    gfp_mask = nv_compute_gfp_mask(nv, at);

    for (i = 0; i < alloc_num_pages; i++)
    {
        if (at->flags.unencrypted && (dev != NULL))
        {
            virt_addr = (unsigned long)dma_alloc_coherent(dev,
                                                          alloc_page_size,
                                                          &bus_addr,
                                                          gfp_mask);
            at->flags.coherent = NV_TRUE;
        }
        else if (at->flags.node)
        {
            NV_ALLOC_PAGES_NODE(virt_addr, at->node_id, at->order, gfp_mask);
        }
        else
        {
            NV_GET_FREE_PAGES(virt_addr, at->order, gfp_mask);
        }

        if (virt_addr == 0)
        {
            nv_printf(NV_DBG_MEMINFO,
                "NVRM: VM: %s: failed to allocate memory\n", __FUNCTION__);
            status = NV_ERR_NO_MEMORY;
            goto failed;
        }
#if !defined(__GFP_ZERO)
        if (at->flags.zeroed)
            memset((void *)virt_addr, 0, alloc_page_size);
#endif

        sub_page_offset = 0;
        for (sub_page_idx = 0; sub_page_idx < os_pages_in_page; sub_page_idx++)
        {
            unsigned long sub_page_virt_addr = virt_addr + sub_page_offset;
            phys_addr = nv_get_kern_phys_address(sub_page_virt_addr);
            if (phys_addr == 0)
            {
                nv_printf(NV_DBG_ERRORS,
                    "NVRM: VM: %s: failed to look up physical address\n",
                    __FUNCTION__);
                NV_FREE_PAGES(sub_page_virt_addr, at->order);
                status = NV_ERR_OPERATING_SYSTEM;
                goto failed;
            }

#if defined(_PAGE_NX)
            if (((_PAGE_NX & pgprot_val(PAGE_KERNEL)) != 0) &&
                    (phys_addr < 0x400000))
            {
                nv_printf(NV_DBG_SETUP,
                    "NVRM: VM: %s: discarding page @ 0x%llx\n",
                    __FUNCTION__, phys_addr);
                --i;
                continue;
            }
#endif

            page_ptr = at->page_table[(i * os_pages_in_page) + sub_page_idx];
            page_ptr->phys_addr = phys_addr;
            page_ptr->page_count = NV_GET_PAGE_COUNT(page_ptr);
            page_ptr->virt_addr = sub_page_virt_addr;

            //
            // Use unencrypted dma_addr returned by dma_alloc_coherent() as
            // nv_phys_to_dma() returns encrypted dma_addr when AMD SEV is enabled.
            //
            if (at->flags.coherent)
                page_ptr->dma_addr = bus_addr;
            else if (dev != NULL)
                page_ptr->dma_addr = nv_phys_to_dma(dev, page_ptr->phys_addr);
            else
                page_ptr->dma_addr = page_ptr->phys_addr;

            NV_MAYBE_RESERVE_PAGE(page_ptr);
            sub_page_offset += PAGE_SIZE;
        }
    }

    if (at->cache_type != NV_MEMORY_CACHED)
        nv_set_memory_type(at, NV_MEMORY_UNCACHED);

    return NV_OK;

failed:
    if (i > 0)
    {
        for (j = 0; j < i; j++)
        {
            page_ptr = at->page_table[j * os_pages_in_page];
            NV_MAYBE_UNRESERVE_PAGE(page_ptr);
            if (at->flags.coherent)
            {
                dma_free_coherent(dev, alloc_page_size, (void *)page_ptr->virt_addr,
                                  page_ptr->dma_addr);
            }
            else
            {
                NV_FREE_PAGES(page_ptr->virt_addr, at->order);
            }
        }
    }

    return status;
}

void nv_free_system_pages(
    nv_alloc_t *at
)
{
    nvidia_pte_t *page_ptr;
    unsigned int i;
    struct device *dev = at->dev;

    // Order should be zero except for EGM allocations.
    unsigned int alloc_page_size = PAGE_SIZE << at->order;
    unsigned int alloc_page_shift = BIT_IDX_32(alloc_page_size);
    unsigned int alloc_num_pages = NV_CEIL(at->num_pages * PAGE_SIZE, alloc_page_size);
    unsigned int os_pages_in_page = alloc_page_size / PAGE_SIZE;

    nv_printf(NV_DBG_MEMINFO,
            "NVRM: VM: %s: %u pages\n", __FUNCTION__, at->num_pages);

    if (at->cache_type != NV_MEMORY_CACHED)
        nv_set_memory_type(at, NV_MEMORY_WRITEBACK);

    for (i = 0; i < at->num_pages; i++)
    {
        page_ptr = at->page_table[i];

        if (NV_GET_PAGE_COUNT(page_ptr) != page_ptr->page_count)
        {
            static int count = 0;
            if (count++ < NV_MAX_RECURRING_WARNING_MESSAGES)
            {
                nv_printf(NV_DBG_ERRORS,
                    "NVRM: VM: %s: page count != initial page count (%u,%u)\n",
                    __FUNCTION__, NV_GET_PAGE_COUNT(page_ptr),
                    page_ptr->page_count);
            }
        }

        NV_MAYBE_UNRESERVE_PAGE(page_ptr);
    }

    for (i = 0; i < at->num_pages; i += os_pages_in_page)
    {
        page_ptr = at->page_table[i];

        if (at->flags.coherent)
        {
            dma_free_coherent(dev, alloc_page_size, (void *)page_ptr->virt_addr,
                              page_ptr->dma_addr);
        }
        else
        {
            NV_FREE_PAGES(page_ptr->virt_addr, at->order);
        }
    }
}

NvUPtr nv_vm_map_pages(
    struct page **pages,
    NvU32 count,
    NvBool cached,
    NvBool unencrypted
)
{
    NvUPtr virt_addr = 0;

    if (!NV_MAY_SLEEP())
    {
        nv_printf(NV_DBG_ERRORS,
                  "NVRM: %s: can't map %d pages, invalid context!\n",
                  __FUNCTION__, count);
        os_dbg_breakpoint();
        return virt_addr;
    }

    virt_addr = nv_vmap(pages, count, cached, unencrypted);
    return virt_addr;
}

void nv_vm_unmap_pages(
    NvUPtr virt_addr,
    NvU32 count
)
{
    if (!NV_MAY_SLEEP())
    {
        nv_printf(NV_DBG_ERRORS,
                  "NVRM: %s: can't unmap %d pages at 0x%0llx, "
                  "invalid context!\n", __FUNCTION__, count, virt_addr);
        os_dbg_breakpoint();
        return;
    }

    nv_vunmap(virt_addr, count);
}