nvidia-open-gpu-kernel-modules/kernel-open/nvidia-drm/nvidia-drm-fence.c

/*
 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include "nvidia-drm-conftest.h"

#if defined(NV_DRM_AVAILABLE)

#if defined(NV_DRM_DRMP_H_PRESENT)
#include <drm/drmP.h>
#endif

#include "nvidia-drm-priv.h"
#include "nvidia-drm-ioctl.h"
#include "nvidia-drm-gem.h"
#include "nvidia-drm-fence.h"
#include "nvidia-dma-resv-helper.h"

#if defined(NV_DRM_FENCE_AVAILABLE)

#include "nvidia-dma-fence-helper.h"

#define NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS 5000

struct nv_drm_fence_context;

struct nv_drm_fence_context_ops {
    void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
};

struct nv_drm_fence_context {
    struct nv_drm_gem_object base;

    const struct nv_drm_fence_context_ops *ops;

    struct nv_drm_device *nv_dev;
    uint64_t context;

    NvU64 fenceSemIndex; /* Index into semaphore surface */
};

struct nv_drm_prime_fence_context {
    struct nv_drm_fence_context base;

    /* Mapped semaphore surface */
    struct NvKmsKapiMemory *pSemSurface;
    NvU32 *pLinearAddress;

    /* Protects nv_drm_fence_context::{pending, last_seqno} */
    spinlock_t lock;

    /*
     * Software signaling structures. __nv_drm_prime_fence_context_new()
     * allocates channel event and __nv_drm_prime_fence_context_destroy() frees
     * it. There are no simultaneous read/write access to 'cb', therefore it
     * does not require spin-lock protection.
     */
    struct NvKmsKapiChannelEvent *cb;

    /* List of pending fences which are not yet signaled */
    struct list_head pending;

    unsigned last_seqno;
};

struct nv_drm_prime_fence {
    struct list_head list_entry;
    nv_dma_fence_t base;
    spinlock_t lock;
};

static inline
struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
{
    return container_of(fence, struct nv_drm_prime_fence, base);
}

static const char*
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
{
    return "NVIDIA";
}

static const char*
nv_drm_gem_prime_fence_op_get_timeline_name(nv_dma_fence_t *fence)
{
    return "nvidia.prime";
}

static bool nv_drm_gem_prime_fence_op_enable_signaling(nv_dma_fence_t *fence)
{
    // DO NOTHING
    return true;
}

static void nv_drm_gem_prime_fence_op_release(nv_dma_fence_t *fence)
{
    struct nv_drm_prime_fence *nv_fence = to_nv_drm_prime_fence(fence);
    nv_drm_free(nv_fence);
}

static signed long
nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
                               bool intr, signed long timeout)
{
    /*
     * If the waiter requests to wait with no timeout, force a timeout to ensure
     * that it won't get stuck forever in the kernel if something were to go
     * wrong with signaling, such as a malicious userspace not releasing the
     * semaphore.
     *
     * 96 ms (roughly 6 frames @ 60 Hz) is arbitrarily chosen to be long enough
     * that it should never get hit during normal operation, but not so long
     * that the system becomes unresponsive.
     */
    return nv_dma_fence_default_wait(fence, intr,
                              (timeout == MAX_SCHEDULE_TIMEOUT) ?
                                  msecs_to_jiffies(96) : timeout);
}

static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
    .get_driver_name = nv_drm_gem_fence_op_get_driver_name,
    .get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
    .enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
    .release = nv_drm_gem_prime_fence_op_release,
    .wait = nv_drm_gem_prime_fence_op_wait,
};

static inline void
__nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
{
    list_del(&nv_fence->list_entry);
    nv_dma_fence_signal(&nv_fence->base);
    nv_dma_fence_put(&nv_fence->base);
}

static void nv_drm_gem_prime_force_fence_signal(
    struct nv_drm_prime_fence_context *nv_fence_context)
{
    WARN_ON(!spin_is_locked(&nv_fence_context->lock));

    while (!list_empty(&nv_fence_context->pending)) {
        struct nv_drm_prime_fence *nv_fence = list_first_entry(
            &nv_fence_context->pending,
            typeof(*nv_fence),
            list_entry);

        __nv_drm_prime_fence_signal(nv_fence);
    }
}

static void nv_drm_gem_prime_fence_event
(
    void *dataPtr,
    NvU32 dataU32
)
{
    struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;

    spin_lock(&nv_fence_context->lock);

    while (!list_empty(&nv_fence_context->pending)) {
        struct nv_drm_prime_fence *nv_fence = list_first_entry(
            &nv_fence_context->pending,
            typeof(*nv_fence),
            list_entry);

        /* Index into surface with 16 byte stride */
        unsigned int seqno = *((nv_fence_context->pLinearAddress) +
                               (nv_fence_context->base.fenceSemIndex * 4));

        if (nv_fence->base.seqno > seqno) {
            /*
             * Fences in list are placed in increasing order of sequence
             * number, breaks a loop once found first fence not
             * ready to signal.
             */
            break;
        }

        __nv_drm_prime_fence_signal(nv_fence);
    }

    spin_unlock(&nv_fence_context->lock);
}

static inline struct nv_drm_prime_fence_context*
to_nv_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
    return container_of(nv_fence_context, struct nv_drm_prime_fence_context, base);
}

static void __nv_drm_prime_fence_context_destroy(
    struct nv_drm_fence_context *nv_fence_context)
{
    struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
    struct nv_drm_prime_fence_context *nv_prime_fence_context =
        to_nv_prime_fence_context(nv_fence_context);

    /*
     * Free channel event before destroying the fence context, otherwise event
     * callback continue to get called.
     */
    nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);

    /* Force signal all pending fences and empty pending list */
    spin_lock(&nv_prime_fence_context->lock);

    nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);

    spin_unlock(&nv_prime_fence_context->lock);

    /* Free nvkms resources */

    nvKms->unmapMemory(nv_dev->pDevice,
                       nv_prime_fence_context->pSemSurface,
                       NVKMS_KAPI_MAPPING_TYPE_KERNEL,
                       (void *) nv_prime_fence_context->pLinearAddress);

    nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);

    nv_drm_free(nv_fence_context);
}

static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
    .destroy = __nv_drm_prime_fence_context_destroy,
};

static inline struct nv_drm_prime_fence_context *
__nv_drm_prime_fence_context_new(
    struct nv_drm_device *nv_dev,
    struct drm_nvidia_prime_fence_context_create_params *p)
{
    struct nv_drm_prime_fence_context *nv_prime_fence_context;
    struct NvKmsKapiMemory *pSemSurface;
    NvU32 *pLinearAddress;

    /* Allocate backup nvkms resources */

    pSemSurface = nvKms->importMemory(nv_dev->pDevice,
                                      p->size,
                                      p->import_mem_nvkms_params_ptr,
                                      p->import_mem_nvkms_params_size);
    if (!pSemSurface) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to import fence semaphore surface");

        goto failed;
    }

    if (!nvKms->mapMemory(nv_dev->pDevice,
                          pSemSurface,
                          NVKMS_KAPI_MAPPING_TYPE_KERNEL,
                          (void **) &pLinearAddress)) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to map fence semaphore surface");

        goto failed_to_map_memory;
    }

    /*
     * Allocate a fence context object, initialize it and allocate channel
     * event for it.
     */

    if ((nv_prime_fence_context = nv_drm_calloc(
                    1,
                    sizeof(*nv_prime_fence_context))) == NULL) {
        goto failed_alloc_fence_context;
    }

    /*
     * nv_dma_fence_context_alloc() cannot fail, so we do not need
     * to check a return value.
     */

    nv_prime_fence_context->base.ops = &nv_drm_prime_fence_context_ops;
    nv_prime_fence_context->base.nv_dev = nv_dev;
    nv_prime_fence_context->base.context = nv_dma_fence_context_alloc(1);
    nv_prime_fence_context->base.fenceSemIndex = p->index;
    nv_prime_fence_context->pSemSurface = pSemSurface;
    nv_prime_fence_context->pLinearAddress = pLinearAddress;

    INIT_LIST_HEAD(&nv_prime_fence_context->pending);

    spin_lock_init(&nv_prime_fence_context->lock);

    /*
     * Except 'cb', the fence context should be completely initialized
     * before channel event allocation because the fence context may start
     * receiving events immediately after allocation.
     *
     * There are no simultaneous read/write access to 'cb', therefore it does
     * not require spin-lock protection.
     */
    nv_prime_fence_context->cb =
        nvKms->allocateChannelEvent(nv_dev->pDevice,
                                    nv_drm_gem_prime_fence_event,
                                    nv_prime_fence_context,
                                    p->event_nvkms_params_ptr,
                                    p->event_nvkms_params_size);
    if (!nv_prime_fence_context->cb) {
        NV_DRM_DEV_LOG_ERR(nv_dev,
                           "Failed to allocate fence signaling event");
        goto failed_to_allocate_channel_event;
    }

    return nv_prime_fence_context;

failed_to_allocate_channel_event:
    nv_drm_free(nv_prime_fence_context);

failed_alloc_fence_context:

    nvKms->unmapMemory(nv_dev->pDevice,
                       pSemSurface,
                       NVKMS_KAPI_MAPPING_TYPE_KERNEL,
                       (void *) pLinearAddress);

failed_to_map_memory:
    nvKms->freeMemory(nv_dev->pDevice, pSemSurface);

failed:
    return NULL;
}

static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
    struct nv_drm_prime_fence_context *nv_prime_fence_context,
    unsigned int seqno)
{
    struct nv_drm_prime_fence *nv_fence;
    int ret = 0;

    if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) {
        ret = -ENOMEM;
        goto out;
    }

    spin_lock(&nv_prime_fence_context->lock);

    /*
     * If seqno wrapped, force signal fences to make sure none of them
     * get stuck.
     */
    if (seqno < nv_prime_fence_context->last_seqno) {
        nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
    }

    INIT_LIST_HEAD(&nv_fence->list_entry);

    spin_lock_init(&nv_fence->lock);

    nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
                      &nv_fence->lock, nv_prime_fence_context->base.context,
                      seqno);

    /* The context maintains a reference to any pending fences. */
    nv_dma_fence_get(&nv_fence->base);

    list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);

    nv_prime_fence_context->last_seqno = seqno;

    spin_unlock(&nv_prime_fence_context->lock);

out:
    return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
}

int nv_drm_fence_supported_ioctl(struct drm_device *dev,
                                 void *data, struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    return nv_dev->pDevice ? 0 : -EINVAL;
}

static inline struct nv_drm_fence_context *to_nv_fence_context(
    struct nv_drm_gem_object *nv_gem)
{
    if (nv_gem != NULL) {
        return container_of(nv_gem, struct nv_drm_fence_context, base);
    }

    return NULL;
}

/*
 * Tear down of the 'struct nv_drm_fence_context' object is not expected
 * to be happen from any worker thread, if that happen it causes dead-lock
 * because tear down sequence calls to flush all existing
 * worker thread.
 */
static void
__nv_drm_fence_context_gem_free(struct nv_drm_gem_object *nv_gem)
{
    struct nv_drm_fence_context *nv_fence_context = to_nv_fence_context(nv_gem);

    nv_fence_context->ops->destroy(nv_fence_context);
}

const struct nv_drm_gem_object_funcs nv_fence_context_gem_ops = {
    .free = __nv_drm_fence_context_gem_free,
};

static inline
struct nv_drm_fence_context *
__nv_drm_fence_context_lookup(
    struct drm_device *dev,
    struct drm_file *filp,
    u32 handle)
{
    struct nv_drm_gem_object *nv_gem =
            nv_drm_gem_object_lookup(dev, filp, handle);

    if (nv_gem != NULL && nv_gem->ops != &nv_fence_context_gem_ops) {
        nv_drm_gem_object_unreference_unlocked(nv_gem);
        return NULL;
    }

    return to_nv_fence_context(nv_gem);
}

static int
__nv_drm_fence_context_gem_init(struct drm_device *dev,
                                struct nv_drm_fence_context *nv_fence_context,
                                u32 *handle,
                                struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);

    nv_drm_gem_object_init(nv_dev,
                           &nv_fence_context->base,
                           &nv_fence_context_gem_ops,
                           0 /* size */,
                           NULL /* pMemory */);

    return nv_drm_gem_handle_create_drop_reference(filep,
                                                   &nv_fence_context->base,
                                                   handle);
}

int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
                                            void *data, struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_prime_fence_context_create_params *p = data;
    struct nv_drm_prime_fence_context *nv_prime_fence_context;
    int err;

    if (nv_dev->pDevice == NULL) {
        return -EOPNOTSUPP;
    }

    nv_prime_fence_context = __nv_drm_prime_fence_context_new(nv_dev, p);

    if (!nv_prime_fence_context) {
        goto done;
    }

    err = __nv_drm_fence_context_gem_init(dev,
                                          &nv_prime_fence_context->base,
                                          &p->handle,
                                          filep);
    if (err) {
        __nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
    }

    return err;

done:
    return -ENOMEM;
}

static int __nv_drm_gem_attach_fence(struct nv_drm_gem_object *nv_gem,
                                     nv_dma_fence_t *fence,
                                     bool shared)
{
    nv_dma_resv_t *resv = nv_drm_gem_res_obj(nv_gem);
    int ret;

    nv_dma_resv_lock(resv, NULL);

    ret = nv_dma_resv_reserve_fences(resv, 1, shared);
    if (ret == 0) {
        if (shared) {
            nv_dma_resv_add_shared_fence(resv, fence);
        } else {
            nv_dma_resv_add_excl_fence(resv, fence);
        }
    } else {
        NV_DRM_LOG_ERR("Failed to reserve fence. Error code: %d", ret);
    }

    nv_dma_resv_unlock(resv);

    return ret;
}

int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
                                        void *data, struct drm_file *filep)
{
    int ret = -EINVAL;
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_gem_prime_fence_attach_params *p = data;

    struct nv_drm_gem_object *nv_gem;
    struct nv_drm_fence_context *nv_fence_context;
    nv_dma_fence_t *fence;

    if (nv_dev->pDevice == NULL) {
        ret = -EOPNOTSUPP;
        goto done;
    }

    if (p->__pad != 0) {
        NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
        goto done;
    }

    nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);

    if (!nv_gem) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence attach: 0x%08x",
            p->handle);

        goto done;
    }

    if((nv_fence_context = __nv_drm_fence_context_lookup(
                nv_dev->dev,
                filep,
                p->fence_context_handle)) == NULL) {

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence context: 0x%08x",
            p->fence_context_handle);

        goto fence_context_lookup_failed;
    }

    if (nv_fence_context->ops !=
        &nv_drm_prime_fence_context_ops) {

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Wrong fence context type: 0x%08x",
            p->fence_context_handle);

        goto fence_context_create_fence_failed;
    }

    fence = __nv_drm_prime_fence_context_create_fence(
                to_nv_prime_fence_context(nv_fence_context),
                p->sem_thresh);

    if (IS_ERR(fence)) {
        ret = PTR_ERR(fence);

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to allocate fence: 0x%08x", p->handle);

        goto fence_context_create_fence_failed;
    }

    ret = __nv_drm_gem_attach_fence(nv_gem, fence, true /* exclusive */);

    nv_dma_fence_put(fence);

fence_context_create_fence_failed:
    nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);

fence_context_lookup_failed:
    nv_drm_gem_object_unreference_unlocked(nv_gem);

done:
    return ret;
}

struct nv_drm_semsurf_fence {
    nv_dma_fence_t base;
    spinlock_t lock;

    /*
     * When unsignaled, node in the associated fence context's pending fence
     * list. The list holds a reference to the fence
     */
    struct list_head pending_node;

#if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
    /* 64-bit version of base.seqno on kernels with 32-bit fence seqno */
    NvU64 wait_value;
#endif

    /*
     * Raw absolute kernel time (time domain and scale are treated as opaque)
     * when this fence times out.
     */
    unsigned long timeout;
};

struct nv_drm_semsurf_fence_callback {
    struct nv_drm_semsurf_fence_ctx *ctx;
    nv_drm_work work;
    NvU64 wait_value;
};

struct nv_drm_sync_fd_wait_data {
    nv_dma_fence_cb_t dma_fence_cb;
    struct nv_drm_semsurf_fence_ctx *ctx;
    nv_drm_work work; /* Deferred second half of fence wait callback */

    /* Could use a lockless list data structure here instead */
    struct list_head pending_node;

    NvU64 pre_wait_value;
    NvU64 post_wait_value;
};

struct nv_drm_semsurf_fence_ctx {
    struct nv_drm_fence_context base;

    /* The NVKMS KAPI reference to the context's semaphore surface */
    struct NvKmsKapiSemaphoreSurface *pSemSurface;

    /* CPU mapping of the semaphore slot values */
    union {
        volatile void *pVoid;
        volatile NvU32 *p32;
        volatile NvU64 *p64;
    } pSemMapping;
    volatile NvU64 *pMaxSubmittedMapping;

    /* work thread for fence timeouts and waits */
    nv_drm_workthread worker;

    /* Timeout timer and associated workthread work */
    nv_drm_timer timer;
    nv_drm_work timeout_work;

    /* Protects access to everything below */
    spinlock_t lock;

    /* List of pending fences which are not yet signaled */
    struct list_head pending_fences;

    /* List of pending fence wait operations */
    struct list_head pending_waits;

    /*
     * Tracking data for the single in-flight callback associated with this
     * context. Either both pointers will be valid, or both will be NULL.
     *
     * Note it is not safe to dereference these values outside of the context
     * lock unless it is certain the associated callback is not yet active,
     * or has been canceled. Their memory is owned by the callback itself as
     * soon as it is registered. Subtly, this means these variables can not
     * be used as output parameters to the function that registers the callback.
     */
    struct {
        struct nv_drm_semsurf_fence_callback *local;
        struct NvKmsKapiSemaphoreSurfaceCallback *nvKms;
    } callback;

    /*
     * Wait value associated with either the above or a being-registered
     * callback. May differ from callback->local->wait_value if it is the
     * latter. Zero if no callback is currently needed.
     */
    NvU64 current_wait_value;
};

static inline struct nv_drm_semsurf_fence_ctx*
to_semsurf_fence_ctx(
    struct nv_drm_fence_context *nv_fence_context
)
{
    return container_of(nv_fence_context,
                        struct nv_drm_semsurf_fence_ctx,
                        base);
}

static inline NvU64
__nv_drm_get_semsurf_fence_seqno(const struct nv_drm_semsurf_fence *nv_fence)
{
#if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
    return nv_fence->base.seqno;
#else
    return nv_fence->wait_value;
#endif
}

#ifndef READ_ONCE
#define READ_ONCE(x) ACCESS_ONCE(x)
#endif

static inline NvU64
__nv_drm_get_semsurf_ctx_seqno(struct nv_drm_semsurf_fence_ctx *ctx)
{
    NvU64 semVal;

    if (ctx->pMaxSubmittedMapping) {
        /* 32-bit GPU semaphores */
        NvU64 maxSubmitted = READ_ONCE(*ctx->pMaxSubmittedMapping);

        /*
         * Must happen after the max submitted read! See
         * NvTimeSemFermiGetPayload() for full details.
         */
        semVal = READ_ONCE(*ctx->pSemMapping.p32);

        if ((maxSubmitted & 0xFFFFFFFFull) < semVal) {
            maxSubmitted -= 0x100000000ull;
        }

        semVal |= (maxSubmitted & 0xffffffff00000000ull);
    } else {
        /* 64-bit GPU semaphores */
        semVal = READ_ONCE(*ctx->pSemMapping.p64);
    }

    return semVal;
}

static void
__nv_drm_semsurf_force_complete_pending(struct nv_drm_semsurf_fence_ctx *ctx)
{
    unsigned long flags;

    /*
     * No locks are needed for the pending_fences list. This code runs after all
     * other possible references to the fence context have been removed. The
     * fences have their own individual locks to protect themselves.
     */
    while (!list_empty(&ctx->pending_fences)) {
        struct nv_drm_semsurf_fence *nv_fence = list_first_entry(
            &ctx->pending_fences,
            typeof(*nv_fence),
            pending_node);
        nv_dma_fence_t *fence = &nv_fence->base;

        list_del(&nv_fence->pending_node);

        nv_dma_fence_set_error(fence, -ETIMEDOUT);
        nv_dma_fence_signal(fence);

        /* Remove the pending list's reference */
        nv_dma_fence_put(fence);
    }

    /*
     * The pending waits are also referenced by the fences they are waiting on,
     * but those fences are guaranteed to complete in finite time. Just keep the
     * the context alive until they do so.
     */
    spin_lock_irqsave(&ctx->lock, flags);
    while (!list_empty(&ctx->pending_waits)) {
        spin_unlock_irqrestore(&ctx->lock, flags);
        nv_drm_yield();
        spin_lock_irqsave(&ctx->lock, flags);
    }
    spin_unlock_irqrestore(&ctx->lock, flags);
}

/* Forward declaration */
static void
__nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx);

static void
__nv_drm_semsurf_ctx_fence_callback_work(void *data)
{
    struct nv_drm_semsurf_fence_callback *callback = data;

    __nv_drm_semsurf_ctx_reg_callbacks(callback->ctx);

    nv_drm_free(callback);
}

static struct nv_drm_semsurf_fence_callback*
__nv_drm_semsurf_new_callback(struct nv_drm_semsurf_fence_ctx *ctx)
{
    struct nv_drm_semsurf_fence_callback *newCallback =
        nv_drm_calloc(1, sizeof(*newCallback));

    if (!newCallback) {
        return NULL;
    }

    newCallback->ctx = ctx;
    nv_drm_workthread_work_init(&newCallback->work,
                                __nv_drm_semsurf_ctx_fence_callback_work,
                                newCallback);

    return newCallback;
}

static void
__nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx,
                                       NvU64 *newWaitValueOut,
                                       unsigned long *newTimeoutOut)
{
    struct list_head finished;
    struct list_head timed_out;
    struct nv_drm_semsurf_fence *nv_fence;
    nv_dma_fence_t *fence;
    NvU64 currentSeqno = __nv_drm_get_semsurf_ctx_seqno(ctx);
    NvU64 fenceSeqno = 0;
    unsigned long flags;
    unsigned long fenceTimeout = 0;
    unsigned long now = nv_drm_timer_now();

    INIT_LIST_HEAD(&finished);
    INIT_LIST_HEAD(&timed_out);

    spin_lock_irqsave(&ctx->lock, flags);

    while (!list_empty(&ctx->pending_fences)) {
        nv_fence = list_first_entry(&ctx->pending_fences,
                                    typeof(*nv_fence),
                                    pending_node);

        fenceSeqno = __nv_drm_get_semsurf_fence_seqno(nv_fence);
        fenceTimeout = nv_fence->timeout;

        if (fenceSeqno <= currentSeqno) {
            list_move_tail(&nv_fence->pending_node, &finished);
        } else if (fenceTimeout <= now) {
            list_move_tail(&nv_fence->pending_node, &timed_out);
        } else {
            break;
        }
    }

    /*
     * If the caller passes non-NULL newWaitValueOut and newTimeoutOut
     * parameters, it establishes a contract. If the returned values are
     * non-zero, the caller must attempt to register a callback associated with
     * the new wait value and reset the context's timer to the specified
     * timeout.
     */
    if (newWaitValueOut && newTimeoutOut) {
        if (list_empty(&ctx->pending_fences)) {
            /* No pending fences, so no waiter is needed. */
            ctx->current_wait_value = fenceSeqno = 0;
            fenceTimeout = 0;
        } else if (fenceSeqno == ctx->current_wait_value) {
            /*
             * The context already has a waiter registered, or in the process of
             * being registered, for this fence. Indicate to the caller no new
             * waiter registration is needed, and leave the ctx state alone.
             */
            fenceSeqno = 0;
            fenceTimeout = 0;
        } else {
            /* A new waiter must be registered. Prep the context */
            ctx->current_wait_value = fenceSeqno;
        }

        *newWaitValueOut = fenceSeqno;
        *newTimeoutOut = fenceTimeout;
    }

    spin_unlock_irqrestore(&ctx->lock, flags);

    while (!list_empty(&finished)) {
        nv_fence = list_first_entry(&finished, typeof(*nv_fence), pending_node);
        list_del_init(&nv_fence->pending_node);
        fence = &nv_fence->base;
        nv_dma_fence_signal(fence);
        nv_dma_fence_put(fence); /* Drops the pending list's reference */
    }

    while (!list_empty(&timed_out)) {
        nv_fence = list_first_entry(&timed_out, typeof(*nv_fence),
                                    pending_node);
        list_del_init(&nv_fence->pending_node);
        fence = &nv_fence->base;
        nv_dma_fence_set_error(fence, -ETIMEDOUT);
        nv_dma_fence_signal(fence);
        nv_dma_fence_put(fence); /* Drops the pending list's reference */
    }
}

static void
__nv_drm_semsurf_ctx_callback(void *data)
{
    struct nv_drm_semsurf_fence_callback *callback = data;
    struct nv_drm_semsurf_fence_ctx *ctx = callback->ctx;
    unsigned long flags;

    spin_lock_irqsave(&ctx->lock, flags);
    /* If this was the context's currently registered callback, clear it. */
    if (ctx->callback.local == callback) {
        ctx->callback.local = NULL;
        ctx->callback.nvKms = NULL;
    }
    /* If storing of this callback may have been pending, prevent it. */
    if (ctx->current_wait_value == callback->wait_value) {
        ctx->current_wait_value = 0;
    }
    spin_unlock_irqrestore(&ctx->lock, flags);

    /*
     * This is redundant with the __nv_drm_semsurf_ctx_reg_callbacks() call from
     * __nv_drm_semsurf_ctx_fence_callback_work(), which will be called by the
     * work enqueued below, but calling it here as well allows unblocking
     * waiters with less latency.
     */
    __nv_drm_semsurf_ctx_process_completed(ctx, NULL, NULL);

    if (!nv_drm_workthread_add_work(&ctx->worker, &callback->work)) {
        /*
         * The context is shutting down. It will force-signal all fences when
         * doing so, so there's no need for any more callback handling.
         */
        nv_drm_free(callback);
    }
}

/*
 * Take spin lock, attempt to stash newNvKmsCallback/newCallback in ctx.
 * If current_wait_value in fence context != new_wait_value, we raced with
 * someone registering a newer waiter. Release spin lock, and unregister our
 * waiter. It isn't needed anymore.
 */
static bool
__nv_drm_semsurf_ctx_store_callback(
    struct nv_drm_semsurf_fence_ctx *ctx,
    NvU64 new_wait_value,
    struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback,
    struct nv_drm_semsurf_fence_callback *newCallback)
{
    struct nv_drm_device *nv_dev = ctx->base.nv_dev;
    struct NvKmsKapiSemaphoreSurfaceCallback *oldNvKmsCallback;
    struct nv_drm_semsurf_fence_callback *oldCallback = NULL;
    NvU64 oldWaitValue;
    unsigned long flags;
    bool installed = false;

    spin_lock_irqsave(&ctx->lock, flags);
    if (ctx->current_wait_value == new_wait_value) {
        oldCallback = ctx->callback.local;
        oldNvKmsCallback = ctx->callback.nvKms;
        oldWaitValue = oldCallback ? oldCallback->wait_value : 0;
        ctx->callback.local = newCallback;
        ctx->callback.nvKms = newNvKmsCallback;
        installed = true;
    }
    spin_unlock_irqrestore(&ctx->lock, flags);

    if (oldCallback) {
        if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
                                                      ctx->pSemSurface,
                                                      ctx->base.fenceSemIndex,
                                                      oldWaitValue,
                                                      oldNvKmsCallback)) {
            /*
             * The old callback was successfully canceled, and its NVKMS and RM
             * resources have been freed. Free its local tracking data.
             */
            nv_drm_free(oldCallback);
        } else {
            /*
             * The new callback is already running. It will do no harm, and free
             * itself.
             */
        }
    }

    return installed;
}

/*
 * Processes completed fences and registers an RM callback and a timeout timer
 * for the next incomplete fence, if any. To avoid calling in to RM while
 * holding a spinlock, this is done in a loop until the state settles.
 *
 * Can NOT be called from in an atomic context/interrupt handler.
 */
static void
__nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx)

{
    struct nv_drm_device *nv_dev = ctx->base.nv_dev;
    struct nv_drm_semsurf_fence_callback *newCallback =
        __nv_drm_semsurf_new_callback(ctx);
    struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback;
    NvU64 newWaitValue;
    unsigned long newTimeout;
    NvKmsKapiRegisterWaiterResult kapiRet;

    if (!newCallback) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to allocate new fence signal callback data");
        return;
    }

    do {
        /*
         * Process any completed or timed out fences. This returns the wait
         * value and timeout of the first remaining pending fence, or 0/0
         * if no pending fences remain. It will also tag the context as
         * waiting for the value returned.
         */
        __nv_drm_semsurf_ctx_process_completed(ctx,
                                               &newWaitValue,
                                               &newTimeout);

        if (newWaitValue == 0) {
            /* No fences remain, so no callback is needed. */
            nv_drm_free(newCallback);
            newCallback = NULL;
            return;
        }

        newCallback->wait_value = newWaitValue;

        /*
         * Attempt to register a callback for the remaining fences. Note this
         * code may be running concurrently in multiple places, attempting to
         * register a callback for the same value, a value greater than
         * newWaitValue if more fences have since completed, or a value less
         * than newWaitValue if new fences have been created tracking lower
         * values than the previously lowest pending one. Hence, even if this
         * registration succeeds, the callback may be discarded
         */
        kapiRet =
            nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice,
                                                    ctx->pSemSurface,
                                                    __nv_drm_semsurf_ctx_callback,
                                                    newCallback,
                                                    ctx->base.fenceSemIndex,
                                                    newWaitValue,
                                                    0,
                                                    &newNvKmsCallback);
    } while (kapiRet == NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED);

    /* Can't deref newCallback at this point unless kapiRet indicates failure */

    if (kapiRet != NVKMS_KAPI_REG_WAITER_SUCCESS) {
        /*
         * This is expected if another thread concurrently registered a callback
         * for the same value, which is fine. That thread's callback will do the
         * same work this thread's would have. Clean this one up and return.
         *
         * Another possibility is that an allocation or some other low-level
         * operation that can spuriously fail has caused this failure, or of
         * course a bug resulting in invalid usage of the
         * registerSemaphoreSurfaceCallback() API. There is no good way to
         * handle such failures, so the fence timeout will be relied upon to
         * guarantee forward progress in those cases.
         */
        nv_drm_free(newCallback);
        return;
    }

    nv_drm_mod_timer(&ctx->timer, newTimeout);

    if (!__nv_drm_semsurf_ctx_store_callback(ctx,
                                             newWaitValue,
                                             newNvKmsCallback,
                                             newCallback)) {
        /*
         * Another thread registered a callback for a different value before
         * this thread's callback could be stored in the context, or the
         * callback is already running. That's OK. One of the following is true:
         *
         * -A new fence with a lower value has been registered, and the callback
         *  associated with that fence is now active and associated with the
         *  context.
         *
         * -This fence has already completed, and a new callback associated with
         *  a higher value has been registered and associated with the context.
         *  This lower-value callback is no longer needed, as any fences
         *  associated with it must have been marked completed before
         *  registering the higher-value callback.
         *
         * -The callback started running and cleared ctx->current_wait_value
         *  before the callback could be stored in the context. Work to signal
         *  the fence is now pending.
         *
         * Hence, it is safe to request cancellation of the callback and free
         * the associated data if cancellation succeeds.
         */
        if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
                                                      ctx->pSemSurface,
                                                      ctx->base.fenceSemIndex,
                                                      newWaitValue,
                                                      newNvKmsCallback)) {
            /* RM callback successfully canceled. Free local tracking data */
            nv_drm_free(newCallback);
        }
    }
}

static void __nv_drm_semsurf_fence_ctx_destroy(
    struct nv_drm_fence_context *nv_fence_context)
{
    struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
    struct nv_drm_semsurf_fence_ctx *ctx =
        to_semsurf_fence_ctx(nv_fence_context);
    struct NvKmsKapiSemaphoreSurfaceCallback *pendingNvKmsCallback;
    NvU64 pendingWaitValue;
    unsigned long flags;

    /*
     * The workthread must be shut down before the timer is stopped to ensure
     * the timer does not queue work that restarts itself.
     */
    nv_drm_workthread_shutdown(&ctx->worker);

    nv_drm_del_timer_sync(&ctx->timer);

    /*
     * The semaphore surface could still be sending callbacks, so it is still
     * not safe to dereference the ctx->callback pointers. However,
     * unregistering a callback via its handle is safe, as that code in NVKMS
     * takes care to avoid dereferencing the handle until it knows the callback
     * has been canceled in RM. This unregistration must be done to ensure the
     * callback data is not leaked in NVKMS if it is still pending, as freeing
     * the semaphore surface only cleans up RM's callback data.
     */
    spin_lock_irqsave(&ctx->lock, flags);
    pendingNvKmsCallback = ctx->callback.nvKms;
    pendingWaitValue = ctx->callback.local ?
        ctx->callback.local->wait_value : 0;
    spin_unlock_irqrestore(&ctx->lock, flags);

    if (pendingNvKmsCallback) {
        WARN_ON(pendingWaitValue == 0);
        nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
                                                  ctx->pSemSurface,
                                                  ctx->base.fenceSemIndex,
                                                  pendingWaitValue,
                                                  pendingNvKmsCallback);
    }

    nvKms->freeSemaphoreSurface(nv_dev->pDevice, ctx->pSemSurface);

    /*
     * Now that the semaphore surface, the timer, and the workthread are gone:
     *
     * -No more RM/NVKMS callbacks will arrive, nor are any in progress. Freeing
     *  the semaphore surface cancels all its callbacks associated with this
     *  instance of it, and idles any pending callbacks.
     *
     * -No more timer callbacks will arrive, nor are any in flight.
     *
     * -The workthread has been idled and is no longer running.
     *
     * Further, given the destructor is running, no other references to the
     * fence context exist, so this code can assume no concurrent access to the
     * fence context's data will happen from here on out.
     */

    if (ctx->callback.local) {
        nv_drm_free(ctx->callback.local);
        ctx->callback.local = NULL;
        ctx->callback.nvKms = NULL;
    }

    __nv_drm_semsurf_force_complete_pending(ctx);

    nv_drm_free(nv_fence_context);
}

static void
__nv_drm_semsurf_ctx_timeout_work(void *data)
{
    struct nv_drm_semsurf_fence_ctx *ctx = data;

    __nv_drm_semsurf_ctx_reg_callbacks(ctx);
}

static void
__nv_drm_semsurf_ctx_timeout_callback(nv_drm_timer *timer)
{
    struct nv_drm_semsurf_fence_ctx *ctx =
        container_of(timer, typeof(*ctx), timer);

    /*
     * Schedule work to register new waiter & timer on a worker thread.
     *
     * It does not matter if this fails. There are two possible failure cases:
     *
     * - ctx->timeout_work is already scheduled. That existing scheduled work
     *   will do at least as much as work scheduled right now and executed
     *   immediately, which is sufficient.
     *
     * - The context is shutting down. In this case, all fences will be force-
     *   signalled, so no further callbacks or timeouts are needed.
     *
     * Note this work may schedule a new timeout timer. To ensure that doesn't
     * happen while context shutdown is shutting down and idling the timer, the
     * the worker thread must be shut down before the timer is stopped.
     */
    nv_drm_workthread_add_work(&ctx->worker, &ctx->timeout_work);
}

static struct nv_drm_fence_context_ops
nv_drm_semsurf_fence_ctx_ops = {
    .destroy = __nv_drm_semsurf_fence_ctx_destroy,
};

static struct nv_drm_semsurf_fence_ctx*
__nv_drm_semsurf_fence_ctx_new(
    struct nv_drm_device *nv_dev,
    struct drm_nvidia_semsurf_fence_ctx_create_params *p
)
{
    struct nv_drm_semsurf_fence_ctx *ctx;
    struct NvKmsKapiSemaphoreSurface *pSemSurface;
    uint8_t *semMapping;
    uint8_t *maxSubmittedMapping;
    char worker_name[20+16+1]; /* strlen(nvidia-drm/timeline-) + 16 for %llx + NUL */

    pSemSurface = nvKms->importSemaphoreSurface(nv_dev->pDevice,
                                                p->nvkms_params_ptr,
                                                p->nvkms_params_size,
                                                (void **)&semMapping,
                                                (void **)&maxSubmittedMapping);
    if (!pSemSurface) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to import semaphore surface");

        goto failed;
    }

    /*
     * Allocate a fence context object and initialize it.
     */

    if ((ctx = nv_drm_calloc(1, sizeof(*ctx))) == NULL) {
        goto failed_alloc_fence_context;
    }

    semMapping += (p->index * nv_dev->semsurf_stride);
    if (maxSubmittedMapping) {
        maxSubmittedMapping += (p->index * nv_dev->semsurf_stride) +
            nv_dev->semsurf_max_submitted_offset;
    }

    /*
     * nv_dma_fence_context_alloc() cannot fail, so we do not need
     * to check a return value.
     */

    ctx->base.ops = &nv_drm_semsurf_fence_ctx_ops;
    ctx->base.nv_dev = nv_dev;
    ctx->base.context = nv_dma_fence_context_alloc(1);
    ctx->base.fenceSemIndex = p->index;
    ctx->pSemSurface = pSemSurface;
    ctx->pSemMapping.pVoid = semMapping;
    ctx->pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping;
    ctx->callback.local = NULL;
    ctx->callback.nvKms = NULL;
    ctx->current_wait_value = 0;

    spin_lock_init(&ctx->lock);
    INIT_LIST_HEAD(&ctx->pending_fences);
    INIT_LIST_HEAD(&ctx->pending_waits);

    sprintf(worker_name, "nvidia-drm/timeline-%llx",
            (long long unsigned)ctx->base.context);
    if (!nv_drm_workthread_init(&ctx->worker, worker_name)) {
        goto failed_alloc_worker;
    }

    nv_drm_workthread_work_init(&ctx->timeout_work,
                                __nv_drm_semsurf_ctx_timeout_work,
                                ctx);

    nv_drm_timer_setup(&ctx->timer, __nv_drm_semsurf_ctx_timeout_callback);

    return ctx;

failed_alloc_worker:
    nv_drm_free(ctx);

failed_alloc_fence_context:
    nvKms->freeSemaphoreSurface(nv_dev->pDevice, pSemSurface);

failed:
    return NULL;

}

int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev,
                                          void *data,
                                          struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_semsurf_fence_ctx_create_params *p = data;
    struct nv_drm_semsurf_fence_ctx *ctx;
    int err;

    if (nv_dev->pDevice == NULL) {
        return -EOPNOTSUPP;
    }

    if (p->__pad != 0) {
        NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
        return -EINVAL;
    }

    ctx = __nv_drm_semsurf_fence_ctx_new(nv_dev, p);

    if (!ctx) {
        return -ENOMEM;
    }

    err = __nv_drm_fence_context_gem_init(dev, &ctx->base, &p->handle, filep);

    if (err) {
        __nv_drm_semsurf_fence_ctx_destroy(&ctx->base);
    }

    return err;
}

static inline struct nv_drm_semsurf_fence*
to_nv_drm_semsurf_fence(nv_dma_fence_t *fence)
{
    return container_of(fence, struct nv_drm_semsurf_fence, base);
}

static const char*
__nv_drm_semsurf_fence_op_get_timeline_name(nv_dma_fence_t *fence)
{
    return "nvidia.semaphore_surface";
}

static bool
__nv_drm_semsurf_fence_op_enable_signaling(nv_dma_fence_t *fence)
{
    // DO NOTHING - Could defer RM callback registration until this point
    return true;
}

static void
__nv_drm_semsurf_fence_op_release(nv_dma_fence_t *fence)
{
    struct nv_drm_semsurf_fence *nv_fence =
        to_nv_drm_semsurf_fence(fence);

    nv_drm_free(nv_fence);
}

static const nv_dma_fence_ops_t nv_drm_semsurf_fence_ops = {
    .get_driver_name = nv_drm_gem_fence_op_get_driver_name,
    .get_timeline_name = __nv_drm_semsurf_fence_op_get_timeline_name,
    .enable_signaling = __nv_drm_semsurf_fence_op_enable_signaling,
    .release = __nv_drm_semsurf_fence_op_release,
    .wait = nv_dma_fence_default_wait,
#if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
    .use_64bit_seqno = true,
#endif
};

/*
 * Completes fence initialization, places a new reference to the fence in the
 * context's pending fence list, and updates/registers any RM callbacks and
 * timeout timers if necessary.
 *
 * Can NOT be called from in an atomic context/interrupt handler.
 */
static void
__nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx *ctx,
                                 struct nv_drm_semsurf_fence *nv_fence,
                                 NvU64 timeoutMS)
{
    struct list_head *pending;
    unsigned long flags;

    if (timeoutMS > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) {
        timeoutMS = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS;
    }

    /* Add a reference to the fence for the list */
    nv_dma_fence_get(&nv_fence->base);
    INIT_LIST_HEAD(&nv_fence->pending_node);

    nv_fence->timeout = nv_drm_timeout_from_ms(timeoutMS);

    spin_lock_irqsave(&ctx->lock, flags);

    list_for_each(pending, &ctx->pending_fences) {
        struct nv_drm_semsurf_fence *pending_fence =
            list_entry(pending, typeof(*pending_fence), pending_node);
        if (__nv_drm_get_semsurf_fence_seqno(pending_fence) >
            __nv_drm_get_semsurf_fence_seqno(nv_fence)) {
            /* Inserts 'nv_fence->pending_node' before 'pending' */
            list_add_tail(&nv_fence->pending_node, pending);
            break;
        }
    }

    if (list_empty(&nv_fence->pending_node)) {
        /*
         * Inserts 'fence->pending_node' at the end of 'ctx->pending_fences',
         * or as the head if the list is empty
         */
        list_add_tail(&nv_fence->pending_node, &ctx->pending_fences);
    }

    /* Fence is live starting... now! */
    spin_unlock_irqrestore(&ctx->lock, flags);

    /* Register new wait and timeout callbacks, if necessary */
    __nv_drm_semsurf_ctx_reg_callbacks(ctx);
}

static nv_dma_fence_t *__nv_drm_semsurf_fence_ctx_create_fence(
    struct nv_drm_device *nv_dev,
    struct nv_drm_semsurf_fence_ctx *ctx,
    NvU64 wait_value,
    NvU64 timeout_value_ms)
{
    struct nv_drm_semsurf_fence *nv_fence;
    nv_dma_fence_t *fence;
    int ret = 0;

    if (timeout_value_ms == 0 ||
        timeout_value_ms > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) {
        timeout_value_ms = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS;
    }

    if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) {
        ret = -ENOMEM;
        goto out;
    }

    fence = &nv_fence->base;
    spin_lock_init(&nv_fence->lock);
#if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
    nv_fence->wait_value = wait_value;
#endif

    /* Initializes the fence with one reference (for the caller) */
    nv_dma_fence_init(fence, &nv_drm_semsurf_fence_ops,
                      &nv_fence->lock,
                      ctx->base.context, wait_value);

    __nv_drm_semsurf_ctx_add_pending(ctx, nv_fence, timeout_value_ms);

out:
    /* Returned fence has one reference reserved for the caller. */
    return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
}

int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
                                      void *data,
                                      struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_semsurf_fence_create_params *p = data;
    struct nv_drm_fence_context *nv_fence_context;
    nv_dma_fence_t *fence;
    int ret = -EINVAL;
    int fd;

    if (nv_dev->pDevice == NULL) {
        ret = -EOPNOTSUPP;
        goto done;
    }

    if (p->__pad != 0) {
        NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
        goto done;
    }

    if ((nv_fence_context = __nv_drm_fence_context_lookup(
                                nv_dev->dev,
                                filep,
                                p->fence_context_handle)) == NULL) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence context: 0x%08x",
            p->fence_context_handle);

        goto done;
    }

    if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Wrong fence context type: 0x%08x",
            p->fence_context_handle);

        goto fence_context_create_fence_failed;
    }

    fence = __nv_drm_semsurf_fence_ctx_create_fence(
        nv_dev,
        to_semsurf_fence_ctx(nv_fence_context),
        p->wait_value,
        p->timeout_value_ms);

    if (IS_ERR(fence)) {
        ret = PTR_ERR(fence);

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to allocate fence: 0x%08x", p->fence_context_handle);

        goto fence_context_create_fence_failed;
    }

    if ((fd = nv_drm_create_sync_file(fence)) < 0) {
        ret = fd;

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to create sync file from fence on ctx 0x%08x",
            p->fence_context_handle);

        goto fence_context_create_sync_failed;
    }

    p->fd = fd;
    ret = 0;

fence_context_create_sync_failed:
    /*
     * Release this function's reference to the fence.  If successful, the sync
     * FD will still hold a reference, and the pending list (if the fence hasn't
     * already been signaled) will also retain a reference.
     */
    nv_dma_fence_put(fence);

fence_context_create_fence_failed:
    nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);

done:
    return ret;
}

static void
__nv_drm_semsurf_free_wait_data(struct nv_drm_sync_fd_wait_data *wait_data)
{
    struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;
    unsigned long flags;

    spin_lock_irqsave(&ctx->lock, flags);
    list_del(&wait_data->pending_node);
    spin_unlock_irqrestore(&ctx->lock, flags);

    nv_drm_free(wait_data);
}

static void
__nv_drm_semsurf_wait_fence_work_cb
(
    void *arg
)
{
    struct nv_drm_sync_fd_wait_data *wait_data = arg;
    struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;
    struct nv_drm_device *nv_dev = ctx->base.nv_dev;
    NvKmsKapiRegisterWaiterResult ret;

    /*
     * Note this command applies "newValue" immediately if the semaphore has
     * already reached "waitValue." It only returns NVKMS_KAPI_ALREADY_SIGNALLED
     * if a separate notification was requested as well.
     */
    ret = nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice,
                                                  ctx->pSemSurface,
                                                  NULL,
                                                  NULL,
                                                  ctx->base.fenceSemIndex,
                                                  wait_data->pre_wait_value,
                                                  wait_data->post_wait_value,
                                                  NULL);

    if (ret != NVKMS_KAPI_REG_WAITER_SUCCESS) {
        NV_DRM_DEV_LOG_ERR(nv_dev,
                           "Failed to register auto-value-update on pre-wait value for sync FD semaphore surface");
    }

    __nv_drm_semsurf_free_wait_data(wait_data);
}

static void
__nv_drm_semsurf_wait_fence_cb
(
    nv_dma_fence_t *fence,
    nv_dma_fence_cb_t *cb
)
{
    struct nv_drm_sync_fd_wait_data *wait_data =
        container_of(cb, typeof(*wait_data), dma_fence_cb);
    struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;

    /*
     * Defer registering the wait with RM to a worker thread, since
     * this function may be called in interrupt context, which
     * could mean arriving here directly from RM's top/bottom half
     * handler when the fence being waited on came from an RM-managed GPU.
     */
    if (!nv_drm_workthread_add_work(&ctx->worker, &wait_data->work)) {
        /*
         * The context is shutting down. RM would likely just drop
         * the wait anyway as part of that, so do nothing. Either the
         * client is exiting uncleanly, or it is a bug in the client
         * in that it didn't consume its wait before destroying the
         * fence context used to instantiate it.
         */
        __nv_drm_semsurf_free_wait_data(wait_data);
    }

    /* Don't need to reference the fence anymore, just the fence context. */
    nv_dma_fence_put(fence);
}

int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
                                    void *data,
                                    struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_semsurf_fence_wait_params *p = data;
    struct nv_drm_fence_context *nv_fence_context;
    struct nv_drm_semsurf_fence_ctx *ctx;
    struct nv_drm_sync_fd_wait_data *wait_data = NULL;
    nv_dma_fence_t *fence;
    unsigned long flags;
    int ret = -EINVAL;

    if (nv_dev->pDevice == NULL) {
        return -EOPNOTSUPP;
    }

    if (p->pre_wait_value >= p->post_wait_value) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Non-monotonic wait values specified to fence wait: 0x%" NvU64_fmtu ", 0x%" NvU64_fmtu,
            p->pre_wait_value, p->post_wait_value);
        goto done;
    }

    if ((nv_fence_context = __nv_drm_fence_context_lookup(
                                nv_dev->dev,
                                filep,
                                p->fence_context_handle)) == NULL) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence context: 0x%08x",
            p->fence_context_handle);

        goto done;
    }

    if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Wrong fence context type: 0x%08x",
            p->fence_context_handle);

        goto fence_context_sync_lookup_failed;
    }

    ctx = to_semsurf_fence_ctx(nv_fence_context);

    wait_data = nv_drm_calloc(1, sizeof(*wait_data));

    if (!wait_data) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to allocate callback data for sync FD wait: %d", p->fd);

        goto fence_context_sync_lookup_failed;
    }

    fence = nv_drm_sync_file_get_fence(p->fd);

    if (!fence) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Attempt to wait on invalid sync FD: %d", p->fd);

        goto fence_context_sync_lookup_failed;
    }

    wait_data->ctx = ctx;
    wait_data->pre_wait_value = p->pre_wait_value;
    wait_data->post_wait_value = p->post_wait_value;
    nv_drm_workthread_work_init(&wait_data->work,
                                __nv_drm_semsurf_wait_fence_work_cb,
                                wait_data);

    spin_lock_irqsave(&ctx->lock, flags);
    list_add(&wait_data->pending_node, &ctx->pending_waits);
    spin_unlock_irqrestore(&ctx->lock, flags);

    ret = nv_dma_fence_add_callback(fence,
                                    &wait_data->dma_fence_cb,
                                    __nv_drm_semsurf_wait_fence_cb);

    if (ret) {
       if (ret == -ENOENT) {
           /* The fence is already signaled */
       } else {
           NV_DRM_LOG_ERR(
               "Failed to add dma_fence callback. Signaling early!");
           /* Proceed as if the fence wait succeeded */
       }

       /* Execute second half of wait immediately, avoiding the worker thread */
       nv_dma_fence_put(fence);
        __nv_drm_semsurf_wait_fence_work_cb(wait_data);
    }

    ret = 0;

fence_context_sync_lookup_failed:
    if (ret && wait_data) {
        /*
         * Do not use __nv_drm_semsurf_free_wait_data() here, as the wait_data
         * has not been added to the pending list yet.
         */
        nv_drm_free(wait_data);
    }

    nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);

done:
    return 0;
}

int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
                                      void *data,
                                      struct drm_file *filep)
{
    struct nv_drm_device *nv_dev = to_nv_device(dev);
    struct drm_nvidia_semsurf_fence_attach_params *p = data;
    struct nv_drm_gem_object *nv_gem = NULL;
    struct nv_drm_fence_context *nv_fence_context = NULL;
    nv_dma_fence_t *fence;
    int ret = -EINVAL;

    if (nv_dev->pDevice == NULL) {
        ret = -EOPNOTSUPP;
        goto done;
    }

    nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);

    if (!nv_gem) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence attach: 0x%08x",
            p->handle);

        goto done;
    }

    nv_fence_context = __nv_drm_fence_context_lookup(
        nv_dev->dev,
        filep,
        p->fence_context_handle);

    if (!nv_fence_context) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to lookup gem object for fence context: 0x%08x",
            p->fence_context_handle);

        goto done;
    }

    if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Wrong fence context type: 0x%08x",
            p->fence_context_handle);

        goto done;
    }

    fence = __nv_drm_semsurf_fence_ctx_create_fence(
        nv_dev,
        to_semsurf_fence_ctx(nv_fence_context),
        p->wait_value,
        p->timeout_value_ms);

    if (IS_ERR(fence)) {
        ret = PTR_ERR(fence);

        NV_DRM_DEV_LOG_ERR(
            nv_dev,
            "Failed to allocate fence: 0x%08x", p->handle);

        goto done;
    }

    ret = __nv_drm_gem_attach_fence(nv_gem, fence, p->shared);

    nv_dma_fence_put(fence);

done:
    if (nv_fence_context) {
        nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
    }

    if (nv_gem) {
        nv_drm_gem_object_unreference_unlocked(nv_gem);
    }

    return ret;
}

#endif /* NV_DRM_FENCE_AVAILABLE */

#endif /* NV_DRM_AVAILABLE */