// SPDX-License-Identifier: GPL-2.0-only
/*
 * VMware VMCI Driver
 *
 * Copyright (C) 2012 VMware, Inc. All rights reserved.
 */

#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/cred.h>
#include <linux/slab.h>

#include "vmci_queue_pair.h"
#include "vmci_datagram.h"
#include "vmci_doorbell.h"
#include "vmci_context.h"
#include "vmci_driver.h"
#include "vmci_event.h"

/* Use a wide upper bound for the maximum contexts. */
#define VMCI_MAX_CONTEXTS 2000

/*
 * List of current VMCI contexts.  Contexts can be added by
 * vmci_ctx_create() and removed via vmci_ctx_destroy().
 * These, along with context lookup, are protected by the
 * list structure's lock.
 */
static struct {
	struct list_head head;
	spinlock_t lock; /* Spinlock for context list operations */
} ctx_list = {
	.head = LIST_HEAD_INIT(ctx_list.head),
	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
};

/* Used by contexts that did not set up notify flag pointers */
static bool ctx_dummy_notify;

static void ctx_signal_notify(struct vmci_ctx *context)
{
	*context->notify = true;
}

static void ctx_clear_notify(struct vmci_ctx *context)
{
	*context->notify = false;
}

/*
 * If nothing requires the attention of the guest, clears both
 * notify flag and call.
 */
static void ctx_clear_notify_call(struct vmci_ctx *context)
{
	if (context->pending_datagrams == 0 &&
	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
		ctx_clear_notify(context);
}

/*
 * Sets the context's notify flag iff datagrams are pending for this
 * context.  Called from vmci_setup_notify().
 */
void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
{
	spin_lock(&context->lock);
	if (context->pending_datagrams)
		ctx_signal_notify(context);
	spin_unlock(&context->lock);
}

/*
 * Allocates and initializes a VMCI context.
 */
struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
				 uintptr_t event_hnd,
				 int user_version,
				 const struct cred *cred)
{
	struct vmci_ctx *context;
	int error;

	if (cid == VMCI_INVALID_ID) {
		pr_devel("Invalid context ID for VMCI context\n");
		error = -EINVAL;
		goto err_out;
	}

	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
			 priv_flags);
		error = -EINVAL;
		goto err_out;
	}

	if (user_version == 0) {
		pr_devel("Invalid suer_version %d\n", user_version);
		error = -EINVAL;
		goto err_out;
	}

	context = kzalloc(sizeof(*context), GFP_KERNEL);
	if (!context) {
		pr_warn("Failed to allocate memory for VMCI context\n");
		error = -ENOMEM;
		goto err_out;
	}

	kref_init(&context->kref);
	spin_lock_init(&context->lock);
	INIT_LIST_HEAD(&context->list_item);
	INIT_LIST_HEAD(&context->datagram_queue);
	INIT_LIST_HEAD(&context->notifier_list);

	/* Initialize host-specific VMCI context. */
	init_waitqueue_head(&context->host_context.wait_queue);

	context->queue_pair_array =
		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
	if (!context->queue_pair_array) {
		error = -ENOMEM;
		goto err_free_ctx;
	}

	context->doorbell_array =
		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
	if (!context->doorbell_array) {
		error = -ENOMEM;
		goto err_free_qp_array;
	}

	context->pending_doorbell_array =
		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
	if (!context->pending_doorbell_array) {
		error = -ENOMEM;
		goto err_free_db_array;
	}

	context->user_version = user_version;

	context->priv_flags = priv_flags;

	if (cred)
		context->cred = get_cred(cred);

	context->notify = &ctx_dummy_notify;
	context->notify_page = NULL;

	/*
	 * If we collide with an existing context we generate a new
	 * and use it instead. The VMX will determine if regeneration
	 * is okay. Since there isn't 4B - 16 VMs running on a given
	 * host, the below loop will terminate.
	 */
	spin_lock(&ctx_list.lock);

	while (vmci_ctx_exists(cid)) {
		/* We reserve the lowest 16 ids for fixed contexts. */
		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
		if (cid == VMCI_INVALID_ID)
			cid = VMCI_RESERVED_CID_LIMIT;
	}
	context->cid = cid;

	list_add_tail_rcu(&context->list_item, &ctx_list.head);
	spin_unlock(&ctx_list.lock);

	return context;

 err_free_db_array:
	vmci_handle_arr_destroy(context->doorbell_array);
 err_free_qp_array:
	vmci_handle_arr_destroy(context->queue_pair_array);
 err_free_ctx:
	kfree(context);
 err_out:
	return ERR_PTR(error);
}

/*
 * Destroy VMCI context.
 */
void vmci_ctx_destroy(struct vmci_ctx *context)
{
	spin_lock(&ctx_list.lock);
	list_del_rcu(&context->list_item);
	spin_unlock(&ctx_list.lock);
	synchronize_rcu();

	vmci_ctx_put(context);
}

/*
 * Fire notification for all contexts interested in given cid.
 */
static int ctx_fire_notification(u32 context_id, u32 priv_flags)
{
	u32 i, array_size;
	struct vmci_ctx *sub_ctx;
	struct vmci_handle_arr *subscriber_array;
	struct vmci_handle context_handle =
		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);

	/*
	 * We create an array to hold the subscribers we find when
	 * scanning through all contexts.
	 */
	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
	if (subscriber_array == NULL)
		return VMCI_ERROR_NO_MEM;

	/*
	 * Scan all contexts to find who is interested in being
	 * notified about given contextID.
	 */
	rcu_read_lock();
	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
		struct vmci_handle_list *node;

		/*
		 * We only deliver notifications of the removal of
		 * contexts, if the two contexts are allowed to
		 * interact.
		 */
		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
			continue;

		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
			if (!vmci_handle_is_equal(node->handle, context_handle))
				continue;

			vmci_handle_arr_append_entry(&subscriber_array,
					vmci_make_handle(sub_ctx->cid,
							 VMCI_EVENT_HANDLER));
		}
	}
	rcu_read_unlock();

	/* Fire event to all subscribers. */
	array_size = vmci_handle_arr_get_size(subscriber_array);
	for (i = 0; i < array_size; i++) {
		int result;
		struct vmci_event_ctx ev;

		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
						  VMCI_CONTEXT_RESOURCE_ID);
		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
		ev.payload.context_id = context_id;

		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
						&ev.msg.hdr, false);
		if (result < VMCI_SUCCESS) {
			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
				 ev.msg.event_data.event,
				 ev.msg.hdr.dst.context);
			/* We continue to enqueue on next subscriber. */
		}
	}
	vmci_handle_arr_destroy(subscriber_array);

	return VMCI_SUCCESS;
}

/*
 * Returns the current number of pending datagrams. The call may
 * also serve as a synchronization point for the datagram queue,
 * as no enqueue operations can occur concurrently.
 */
int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
{
	struct vmci_ctx *context;

	context = vmci_ctx_get(cid);
	if (context == NULL)
		return VMCI_ERROR_INVALID_ARGS;

	spin_lock(&context->lock);
	if (pending)
		*pending = context->pending_datagrams;
	spin_unlock(&context->lock);
	vmci_ctx_put(context);

	return VMCI_SUCCESS;
}

/*
 * Queues a VMCI datagram for the appropriate target VM context.
 */
int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
{
	struct vmci_datagram_queue_entry *dq_entry;
	struct vmci_ctx *context;
	struct vmci_handle dg_src;
	size_t vmci_dg_size;

	vmci_dg_size = VMCI_DG_SIZE(dg);
	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
		return VMCI_ERROR_INVALID_ARGS;
	}

	/* Get the target VM's VMCI context. */
	context = vmci_ctx_get(cid);
	if (!context) {
		pr_devel("Invalid context (ID=0x%x)\n", cid);
		return VMCI_ERROR_INVALID_ARGS;
	}

	/* Allocate guest call entry and add it to the target VM's queue. */
	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
	if (dq_entry == NULL) {
		pr_warn("Failed to allocate memory for datagram\n");
		vmci_ctx_put(context);
		return VMCI_ERROR_NO_MEM;
	}
	dq_entry->dg = dg;
	dq_entry->dg_size = vmci_dg_size;
	dg_src = dg->src;
	INIT_LIST_HEAD(&dq_entry->list_item);

	spin_lock(&context->lock);

	/*
	 * We put a higher limit on datagrams from the hypervisor.  If
	 * the pending datagram is not from hypervisor, then we check
	 * if enqueueing it would exceed the
	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
	 * the pending datagram is from hypervisor, we allow it to be
	 * queued at the destination side provided we don't reach the
	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
	 */
	if (context->datagram_queue_size + vmci_dg_size >=
	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
	    (!vmci_handle_is_equal(dg_src,
				vmci_make_handle
				(VMCI_HYPERVISOR_CONTEXT_ID,
				 VMCI_CONTEXT_RESOURCE_ID)) ||
	     context->datagram_queue_size + vmci_dg_size >=
	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
		spin_unlock(&context->lock);
		vmci_ctx_put(context);
		kfree(dq_entry);
		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
		return VMCI_ERROR_NO_RESOURCES;
	}

	list_add(&dq_entry->list_item, &context->datagram_queue);
	context->pending_datagrams++;
	context->datagram_queue_size += vmci_dg_size;
	ctx_signal_notify(context);
	wake_up(&context->host_context.wait_queue);
	spin_unlock(&context->lock);
	vmci_ctx_put(context);

	return vmci_dg_size;
}

/*
 * Verifies whether a context with the specified context ID exists.
 * FIXME: utility is dubious as no decisions can be reliably made
 * using this data as context can appear and disappear at any time.
 */
bool vmci_ctx_exists(u32 cid)
{
	struct vmci_ctx *context;
	bool exists = false;

	rcu_read_lock();

	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
		if (context->cid == cid) {
			exists = true;
			break;
		}
	}

	rcu_read_unlock();
	return exists;
}

/*
 * Retrieves VMCI context corresponding to the given cid.
 */
struct vmci_ctx *vmci_ctx_get(u32 cid)
{
	struct vmci_ctx *c, *context = NULL;

	if (cid == VMCI_INVALID_ID)
		return NULL;

	rcu_read_lock();
	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
		if (c->cid == cid) {
			/*
			 * The context owner drops its own reference to the
			 * context only after removing it from the list and
			 * waiting for RCU grace period to expire. This
			 * means that we are not about to increase the
			 * reference count of something that is in the
			 * process of being destroyed.
			 */
			context = c;
			kref_get(&context->kref);
			break;
		}
	}
	rcu_read_unlock();

	return context;
}

/*
 * Deallocates all parts of a context data structure. This
 * function doesn't lock the context, because it assumes that
 * the caller was holding the last reference to context.
 */
static void ctx_free_ctx(struct kref *kref)
{
	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
	struct vmci_handle temp_handle;
	struct vmci_handle_list *notifier, *tmp;

	/*
	 * Fire event to all contexts interested in knowing this
	 * context is dying.
	 */
	ctx_fire_notification(context->cid, context->priv_flags);

	/*
	 * Cleanup all queue pair resources attached to context.  If
	 * the VM dies without cleaning up, this code will make sure
	 * that no resources are leaked.
	 */
	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
		if (vmci_qp_broker_detach(temp_handle,
					  context) < VMCI_SUCCESS) {
			/*
			 * When vmci_qp_broker_detach() succeeds it
			 * removes the handle from the array.  If
			 * detach fails, we must remove the handle
			 * ourselves.
			 */
			vmci_handle_arr_remove_entry(context->queue_pair_array,
						     temp_handle);
		}
		temp_handle =
		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
	}

	/*
	 * It is fine to destroy this without locking the callQueue, as
	 * this is the only thread having a reference to the context.
	 */
	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
				 &context->datagram_queue, list_item) {
		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
		list_del(&dq_entry->list_item);
		kfree(dq_entry->dg);
		kfree(dq_entry);
	}

	list_for_each_entry_safe(notifier, tmp,
				 &context->notifier_list, node) {
		list_del(&notifier->node);
		kfree(notifier);
	}

	vmci_handle_arr_destroy(context->queue_pair_array);
	vmci_handle_arr_destroy(context->doorbell_array);
	vmci_handle_arr_destroy(context->pending_doorbell_array);
	vmci_ctx_unset_notify(context);
	if (context->cred)
		put_cred(context->cred);
	kfree(context);
}

/*
 * Drops reference to VMCI context. If this is the last reference to
 * the context it will be deallocated. A context is created with
 * a reference count of one, and on destroy, it is removed from
 * the context list before its reference count is decremented. Thus,
 * if we reach zero, we are sure that nobody else are about to increment
 * it (they need the entry in the context list for that), and so there
 * is no need for locking.
 */
void vmci_ctx_put(struct vmci_ctx *context)
{
	kref_put(&context->kref, ctx_free_ctx);
}

/*
 * Dequeues the next datagram and returns it to caller.
 * The caller passes in a pointer to the max size datagram
 * it can handle and the datagram is only unqueued if the
 * size is less than max_size. If larger max_size is set to
 * the size of the datagram to give the caller a chance to
 * set up a larger buffer for the guestcall.
 */
int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
			      size_t *max_size,
			      struct vmci_datagram **dg)
{
	struct vmci_datagram_queue_entry *dq_entry;
	struct list_head *list_item;
	int rv;

	/* Dequeue the next datagram entry. */
	spin_lock(&context->lock);
	if (context->pending_datagrams == 0) {
		ctx_clear_notify_call(context);
		spin_unlock(&context->lock);
		pr_devel("No datagrams pending\n");
		return VMCI_ERROR_NO_MORE_DATAGRAMS;
	}

	list_item = context->datagram_queue.next;

	dq_entry =
	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);

	/* Check size of caller's buffer. */
	if (*max_size < dq_entry->dg_size) {
		*max_size = dq_entry->dg_size;
		spin_unlock(&context->lock);
		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
			 (u32) *max_size);
		return VMCI_ERROR_NO_MEM;
	}

	list_del(list_item);
	context->pending_datagrams--;
	context->datagram_queue_size -= dq_entry->dg_size;
	if (context->pending_datagrams == 0) {
		ctx_clear_notify_call(context);
		rv = VMCI_SUCCESS;
	} else {
		/*
		 * Return the size of the next datagram.
		 */
		struct vmci_datagram_queue_entry *next_entry;

		list_item = context->datagram_queue.next;
		next_entry =
		    list_entry(list_item, struct vmci_datagram_queue_entry,
			       list_item);

		/*
		 * The following size_t -> int truncation is fine as
		 * the maximum size of a (routable) datagram is 68KB.
		 */
		rv = (int)next_entry->dg_size;
	}
	spin_unlock(&context->lock);

	/* Caller must free datagram. */
	*dg = dq_entry->dg;
	dq_entry->dg = NULL;
	kfree(dq_entry);

	return rv;
}

/*
 * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
 * page mapped/locked by vmci_setup_notify().
 */
void vmci_ctx_unset_notify(struct vmci_ctx *context)
{
	struct page *notify_page;

	spin_lock(&context->lock);

	notify_page = context->notify_page;
	context->notify = &ctx_dummy_notify;
	context->notify_page = NULL;

	spin_unlock(&context->lock);

	if (notify_page) {
		kunmap(notify_page);
		put_page(notify_page);
	}
}

/*
 * Add remote_cid to list of contexts current contexts wants
 * notifications from/about.
 */
int vm