diff options
Diffstat (limited to 'drivers/dibs')
-rw-r--r-- | drivers/dibs/Kconfig | 23 | ||||
-rw-r--r-- | drivers/dibs/Makefile | 8 | ||||
-rw-r--r-- | drivers/dibs/dibs_loopback.c | 361 | ||||
-rw-r--r-- | drivers/dibs/dibs_loopback.h | 57 | ||||
-rw-r--r-- | drivers/dibs/dibs_main.c | 278 |
5 files changed, 727 insertions, 0 deletions
diff --git a/drivers/dibs/Kconfig b/drivers/dibs/Kconfig new file mode 100644 index 000000000000..5dc347b9b235 --- /dev/null +++ b/drivers/dibs/Kconfig @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 +config DIBS + tristate "DIBS support" + default n + help + Direct Internal Buffer Sharing (DIBS) + A communication method that uses common physical (internal) memory + for synchronous direct access into a remote buffer. + + Select this option to provide the abstraction layer between + dibs devices and dibs clients like the SMC protocol. + The module name is dibs. + +config DIBS_LO + bool "intra-OS shortcut with dibs loopback" + depends on DIBS + default n + help + DIBS_LO enables the creation of an software-emulated dibs device + named lo which can be used for transferring data when communication + occurs within the same OS. This helps in convenient testing of + dibs clients, since dibs loopback is independent of architecture or + hardware. diff --git a/drivers/dibs/Makefile b/drivers/dibs/Makefile new file mode 100644 index 000000000000..85805490c77f --- /dev/null +++ b/drivers/dibs/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# DIBS class module +# + +dibs-y += dibs_main.o +obj-$(CONFIG_DIBS) += dibs.o +dibs-$(CONFIG_DIBS_LO) += dibs_loopback.o
\ No newline at end of file diff --git a/drivers/dibs/dibs_loopback.c b/drivers/dibs/dibs_loopback.c new file mode 100644 index 000000000000..aa029e29c6b2 --- /dev/null +++ b/drivers/dibs/dibs_loopback.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Functions for dibs loopback/loopback-ism device. + * + * Copyright (c) 2024, Alibaba Inc. + * + * Author: Wen Gu <guwen@linux.alibaba.com> + * Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#include <linux/bitops.h> +#include <linux/device.h> +#include <linux/dibs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "dibs_loopback.h" + +#define DIBS_LO_SUPPORT_NOCOPY 0x1 +#define DIBS_DMA_ADDR_INVALID (~(dma_addr_t)0) + +static const char dibs_lo_dev_name[] = "lo"; +/* global loopback device */ +static struct dibs_lo_dev *lo_dev; + +static u16 dibs_lo_get_fabric_id(struct dibs_dev *dibs) +{ + return DIBS_LOOPBACK_FABRIC; +} + +static int dibs_lo_query_rgid(struct dibs_dev *dibs, const uuid_t *rgid, + u32 vid_valid, u32 vid) +{ + /* rgid should be the same as lgid */ + if (!uuid_equal(rgid, &dibs->gid)) + return -ENETUNREACH; + return 0; +} + +static int dibs_lo_max_dmbs(void) +{ + return DIBS_LO_MAX_DMBS; +} + +static int dibs_lo_register_dmb(struct dibs_dev *dibs, struct dibs_dmb *dmb, + struct dibs_client *client) +{ + struct dibs_lo_dmb_node *dmb_node, *tmp_node; + struct dibs_lo_dev *ldev; + struct folio *folio; + unsigned long flags; + int sba_idx, rc; + + ldev = dibs->drv_priv; + sba_idx = dmb->idx; + /* check space for new dmb */ + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, DIBS_LO_MAX_DMBS) { + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) + break; + } + if (sba_idx == DIBS_LO_MAX_DMBS) + return -ENOSPC; + + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); + if (!dmb_node) { + rc = -ENOMEM; + goto err_bit; + } + + dmb_node->sba_idx = sba_idx; + dmb_node->len = dmb->dmb_len; + + /* not critical; fail under memory pressure and fallback to TCP */ + folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_ZERO, + get_order(dmb_node->len)); + if (!folio) { + rc = -ENOMEM; + goto err_node; + } + dmb_node->cpu_addr = folio_address(folio); + dmb_node->dma_addr = DIBS_DMA_ADDR_INVALID; + refcount_set(&dmb_node->refcnt, 1); + +again: + /* add new dmb into hash table */ + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); + write_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) { + if (tmp_node->token == dmb_node->token) { + write_unlock_bh(&ldev->dmb_ht_lock); + goto again; + } + } + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); + write_unlock_bh(&ldev->dmb_ht_lock); + atomic_inc(&ldev->dmb_cnt); + + dmb->idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + + spin_lock_irqsave(&dibs->lock, flags); + dibs->dmb_clientid_arr[sba_idx] = client->id; + spin_unlock_irqrestore(&dibs->lock, flags); + + return 0; + +err_node: + kfree(dmb_node); +err_bit: + clear_bit(sba_idx, ldev->sba_idx_mask); + return rc; +} + +static void __dibs_lo_unregister_dmb(struct dibs_lo_dev *ldev, + struct dibs_lo_dmb_node *dmb_node) +{ + /* remove dmb from hash table */ + write_lock_bh(&ldev->dmb_ht_lock); + hash_del(&dmb_node->list); + write_unlock_bh(&ldev->dmb_ht_lock); + + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); + folio_put(virt_to_folio(dmb_node->cpu_addr)); + kfree(dmb_node); + + if (atomic_dec_and_test(&ldev->dmb_cnt)) + wake_up(&ldev->ldev_release); +} + +static int dibs_lo_unregister_dmb(struct dibs_dev *dibs, struct dibs_dmb *dmb) +{ + struct dibs_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct dibs_lo_dev *ldev; + unsigned long flags; + + ldev = dibs->drv_priv; + + /* find dmb from hash table */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + read_unlock_bh(&ldev->dmb_ht_lock); + if (!dmb_node) + return -EINVAL; + + if (refcount_dec_and_test(&dmb_node->refcnt)) { + spin_lock_irqsave(&dibs->lock, flags); + dibs->dmb_clientid_arr[dmb_node->sba_idx] = NO_DIBS_CLIENT; + spin_unlock_irqrestore(&dibs->lock, flags); + + __dibs_lo_unregister_dmb(ldev, dmb_node); + } + return 0; +} + +static int dibs_lo_support_dmb_nocopy(struct dibs_dev *dibs) +{ + return DIBS_LO_SUPPORT_NOCOPY; +} + +static int dibs_lo_attach_dmb(struct dibs_dev *dibs, struct dibs_dmb *dmb) +{ + struct dibs_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct dibs_lo_dev *ldev; + + ldev = dibs->drv_priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock_bh(&ldev->dmb_ht_lock); + + if (!refcount_inc_not_zero(&dmb_node->refcnt)) + /* the dmb is being unregistered, but has + * not been removed from the hash table. + */ + return -EINVAL; + + /* provide dmb information */ + dmb->idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + return 0; +} + +static int dibs_lo_detach_dmb(struct dibs_dev *dibs, u64 token) +{ + struct dibs_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct dibs_lo_dev *ldev; + + ldev = dibs->drv_priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) { + if (tmp_node->token == token) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock_bh(&ldev->dmb_ht_lock); + + if (refcount_dec_and_test(&dmb_node->refcnt)) + __dibs_lo_unregister_dmb(ldev, dmb_node); + return 0; +} + +static int dibs_lo_move_data(struct dibs_dev *dibs, u64 dmb_tok, + unsigned int idx, bool sf, unsigned int offset, + void *data, unsigned int size) +{ + struct dibs_lo_dmb_node *rmb_node = NULL, *tmp_node; + struct dibs_lo_dev *ldev; + u16 s_mask; + u8 client_id; + u32 sba_idx; + + ldev = dibs->drv_priv; + + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) { + if (tmp_node->token == dmb_tok) { + rmb_node = tmp_node; + break; + } + } + if (!rmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + memcpy((char *)rmb_node->cpu_addr + offset, data, size); + sba_idx = rmb_node->sba_idx; + read_unlock_bh(&ldev->dmb_ht_lock); + + if (!sf) + return 0; + + spin_lock(&dibs->lock); + client_id = dibs->dmb_clientid_arr[sba_idx]; + s_mask = ror16(0x1000, idx); + if (likely(client_id != NO_DIBS_CLIENT && dibs->subs[client_id])) + dibs->subs[client_id]->ops->handle_irq(dibs, sba_idx, s_mask); + spin_unlock(&dibs->lock); + + return 0; +} + +static const struct dibs_dev_ops dibs_lo_ops = { + .get_fabric_id = dibs_lo_get_fabric_id, + .query_remote_gid = dibs_lo_query_rgid, + .max_dmbs = dibs_lo_max_dmbs, + .register_dmb = dibs_lo_register_dmb, + .unregister_dmb = dibs_lo_unregister_dmb, + .move_data = dibs_lo_move_data, + .support_mmapped_rdmb = dibs_lo_support_dmb_nocopy, + .attach_dmb = dibs_lo_attach_dmb, + .detach_dmb = dibs_lo_detach_dmb, +}; + +static void dibs_lo_dev_init(struct dibs_lo_dev *ldev) +{ + rwlock_init(&ldev->dmb_ht_lock); + hash_init(ldev->dmb_ht); + atomic_set(&ldev->dmb_cnt, 0); + init_waitqueue_head(&ldev->ldev_release); +} + +static void dibs_lo_dev_exit(struct dibs_lo_dev *ldev) +{ + if (atomic_read(&ldev->dmb_cnt)) + wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt)); +} + +static int dibs_lo_dev_probe(void) +{ + struct dibs_lo_dev *ldev; + struct dibs_dev *dibs; + int ret; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return -ENOMEM; + + dibs = dibs_dev_alloc(); + if (!dibs) { + kfree(ldev); + return -ENOMEM; + } + + ldev->dibs = dibs; + dibs->drv_priv = ldev; + dibs_lo_dev_init(ldev); + uuid_gen(&dibs->gid); + dibs->ops = &dibs_lo_ops; + + dibs->dev.parent = NULL; + dev_set_name(&dibs->dev, "%s", dibs_lo_dev_name); + + ret = dibs_dev_add(dibs); + if (ret) + goto err_reg; + lo_dev = ldev; + return 0; + +err_reg: + kfree(dibs->dmb_clientid_arr); + /* pairs with dibs_dev_alloc() */ + put_device(&dibs->dev); + kfree(ldev); + + return ret; +} + +static void dibs_lo_dev_remove(void) +{ + if (!lo_dev) + return; + + dibs_dev_del(lo_dev->dibs); + dibs_lo_dev_exit(lo_dev); + /* pairs with dibs_dev_alloc() */ + put_device(&lo_dev->dibs->dev); + kfree(lo_dev); + lo_dev = NULL; +} + +int dibs_loopback_init(void) +{ + return dibs_lo_dev_probe(); +} + +void dibs_loopback_exit(void) +{ + dibs_lo_dev_remove(); +} diff --git a/drivers/dibs/dibs_loopback.h b/drivers/dibs/dibs_loopback.h new file mode 100644 index 000000000000..0664f6a8e662 --- /dev/null +++ b/drivers/dibs/dibs_loopback.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dibs loopback (aka loopback-ism) device structure definitions. + * + * Copyright (c) 2024, Alibaba Inc. + * + * Author: Wen Gu <guwen@linux.alibaba.com> + * Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#ifndef _DIBS_LOOPBACK_H +#define _DIBS_LOOPBACK_H + +#include <linux/dibs.h> +#include <linux/hashtable.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/wait.h> + +#if IS_ENABLED(CONFIG_DIBS_LO) +#define DIBS_LO_DMBS_HASH_BITS 12 +#define DIBS_LO_MAX_DMBS 5000 + +struct dibs_lo_dmb_node { + struct hlist_node list; + u64 token; + u32 len; + u32 sba_idx; + void *cpu_addr; + dma_addr_t dma_addr; + refcount_t refcnt; +}; + +struct dibs_lo_dev { + struct dibs_dev *dibs; + atomic_t dmb_cnt; + rwlock_t dmb_ht_lock; + DECLARE_BITMAP(sba_idx_mask, DIBS_LO_MAX_DMBS); + DECLARE_HASHTABLE(dmb_ht, DIBS_LO_DMBS_HASH_BITS); + wait_queue_head_t ldev_release; +}; + +int dibs_loopback_init(void); +void dibs_loopback_exit(void); +#else +static inline int dibs_loopback_init(void) +{ + return 0; +} + +static inline void dibs_loopback_exit(void) +{ +} +#endif + +#endif /* _DIBS_LOOPBACK_H */ diff --git a/drivers/dibs/dibs_main.c b/drivers/dibs/dibs_main.c new file mode 100644 index 000000000000..0374f8350ff7 --- /dev/null +++ b/drivers/dibs/dibs_main.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DIBS - Direct Internal Buffer Sharing + * + * Implementation of the DIBS class module + * + * Copyright IBM Corp. 2025 + */ +#define KMSG_COMPONENT "dibs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/dibs.h> + +#include "dibs_loopback.h" + +MODULE_DESCRIPTION("Direct Internal Buffer Sharing class"); +MODULE_LICENSE("GPL"); + +static struct class *dibs_class; + +/* use an array rather a list for fast mapping: */ +static struct dibs_client *clients[MAX_DIBS_CLIENTS]; +static u8 max_client; +static DEFINE_MUTEX(clients_lock); +struct dibs_dev_list { + struct list_head list; + struct mutex mutex; /* protects dibs device list */ +}; + +static struct dibs_dev_list dibs_dev_list = { + .list = LIST_HEAD_INIT(dibs_dev_list.list), + .mutex = __MUTEX_INITIALIZER(dibs_dev_list.mutex), +}; + +static void dibs_setup_forwarding(struct dibs_client *client, + struct dibs_dev *dibs) +{ + unsigned long flags; + + spin_lock_irqsave(&dibs->lock, flags); + dibs->subs[client->id] = client; + spin_unlock_irqrestore(&dibs->lock, flags); +} + +int dibs_register_client(struct dibs_client *client) +{ + struct dibs_dev *dibs; + int i, rc = -ENOSPC; + + mutex_lock(&dibs_dev_list.mutex); + mutex_lock(&clients_lock); + for (i = 0; i < MAX_DIBS_CLIENTS; ++i) { + if (!clients[i]) { + clients[i] = client; + client->id = i; + if (i == max_client) + max_client++; + rc = 0; + break; + } + } + mutex_unlock(&clients_lock); + + if (i < MAX_DIBS_CLIENTS) { + /* initialize with all devices that we got so far */ + list_for_each_entry(dibs, &dibs_dev_list.list, list) { + dibs->priv[i] = NULL; + client->ops->add_dev(dibs); + dibs_setup_forwarding(client, dibs); + } + } + mutex_unlock(&dibs_dev_list.mutex); + + return rc; +} +EXPORT_SYMBOL_GPL(dibs_register_client); + +int dibs_unregister_client(struct dibs_client *client) +{ + struct dibs_dev *dibs; + unsigned long flags; + int max_dmbs; + int rc = 0; + + mutex_lock(&dibs_dev_list.mutex); + list_for_each_entry(dibs, &dibs_dev_list.list, list) { + spin_lock_irqsave(&dibs->lock, flags); + max_dmbs = dibs->ops->max_dmbs(); + for (int i = 0; i < max_dmbs; ++i) { + if (dibs->dmb_clientid_arr[i] == client->id) { + WARN(1, "%s: attempt to unregister '%s' with registered dmb(s)\n", + __func__, client->name); + rc = -EBUSY; + goto err_reg_dmb; + } + } + /* Stop forwarding IRQs and events */ + dibs->subs[client->id] = NULL; + spin_unlock_irqrestore(&dibs->lock, flags); + clients[client->id]->ops->del_dev(dibs); + dibs->priv[client->id] = NULL; + } + + mutex_lock(&clients_lock); + clients[client->id] = NULL; + if (client->id + 1 == max_client) + max_client--; + mutex_unlock(&clients_lock); + + mutex_unlock(&dibs_dev_list.mutex); + return rc; + +err_reg_dmb: + spin_unlock_irqrestore(&dibs->lock, flags); + mutex_unlock(&dibs_dev_list.mutex); + return rc; +} +EXPORT_SYMBOL_GPL(dibs_unregister_client); + +static void dibs_dev_release(struct device *dev) +{ + struct dibs_dev *dibs; + + dibs = container_of(dev, struct dibs_dev, dev); + + kfree(dibs); +} + +struct dibs_dev *dibs_dev_alloc(void) +{ + struct dibs_dev *dibs; + + dibs = kzalloc(sizeof(*dibs), GFP_KERNEL); + if (!dibs) + return dibs; + dibs->dev.release = dibs_dev_release; + dibs->dev.class = dibs_class; + device_initialize(&dibs->dev); + + return dibs; +} +EXPORT_SYMBOL_GPL(dibs_dev_alloc); + +static ssize_t gid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dibs_dev *dibs; + + dibs = container_of(dev, struct dibs_dev, dev); + + return sysfs_emit(buf, "%pUb\n", &dibs->gid); +} +static DEVICE_ATTR_RO(gid); + +static ssize_t fabric_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dibs_dev *dibs; + u16 fabric_id; + + dibs = container_of(dev, struct dibs_dev, dev); + fabric_id = dibs->ops->get_fabric_id(dibs); + + return sysfs_emit(buf, "0x%04x\n", fabric_id); +} +static DEVICE_ATTR_RO(fabric_id); + +static struct attribute *dibs_dev_attrs[] = { + &dev_attr_gid.attr, + &dev_attr_fabric_id.attr, + NULL, +}; + +static const struct attribute_group dibs_dev_attr_group = { + .attrs = dibs_dev_attrs, +}; + +int dibs_dev_add(struct dibs_dev *dibs) +{ + int max_dmbs; + int i, ret; + + max_dmbs = dibs->ops->max_dmbs(); + spin_lock_init(&dibs->lock); + dibs->dmb_clientid_arr = kzalloc(max_dmbs, GFP_KERNEL); + if (!dibs->dmb_clientid_arr) + return -ENOMEM; + memset(dibs->dmb_clientid_arr, NO_DIBS_CLIENT, max_dmbs); + + ret = device_add(&dibs->dev); + if (ret) + goto free_client_arr; + + ret = sysfs_create_group(&dibs->dev.kobj, &dibs_dev_attr_group); + if (ret) { + dev_err(&dibs->dev, "sysfs_create_group failed for dibs_dev\n"); + goto err_device_del; + } + mutex_lock(&dibs_dev_list.mutex); + mutex_lock(&clients_lock); + for (i = 0; i < max_client; ++i) { + if (clients[i]) { + clients[i]->ops->add_dev(dibs); + dibs_setup_forwarding(clients[i], dibs); + } + } + mutex_unlock(&clients_lock); + list_add(&dibs->list, &dibs_dev_list.list); + mutex_unlock(&dibs_dev_list.mutex); + + return 0; + +err_device_del: + device_del(&dibs->dev); +free_client_arr: + kfree(dibs->dmb_clientid_arr); + return ret; + +} +EXPORT_SYMBOL_GPL(dibs_dev_add); + +void dibs_dev_del(struct dibs_dev *dibs) +{ + unsigned long flags; + int i; + + sysfs_remove_group(&dibs->dev.kobj, &dibs_dev_attr_group); + + spin_lock_irqsave(&dibs->lock, flags); + for (i = 0; i < MAX_DIBS_CLIENTS; ++i) + dibs->subs[i] = NULL; + spin_unlock_irqrestore(&dibs->lock, flags); + + mutex_lock(&dibs_dev_list.mutex); + mutex_lock(&clients_lock); + for (i = 0; i < max_client; ++i) { + if (clients[i]) + clients[i]->ops->del_dev(dibs); + } + mutex_unlock(&clients_lock); + list_del_init(&dibs->list); + mutex_unlock(&dibs_dev_list.mutex); + + device_del(&dibs->dev); + kfree(dibs->dmb_clientid_arr); +} +EXPORT_SYMBOL_GPL(dibs_dev_del); + +static int __init dibs_init(void) +{ + int rc; + + memset(clients, 0, sizeof(clients)); + max_client = 0; + + dibs_class = class_create("dibs"); + if (IS_ERR(dibs_class)) + return PTR_ERR(dibs_class); + + rc = dibs_loopback_init(); + if (rc) + pr_err("%s fails with %d\n", __func__, rc); + + return rc; +} + +static void __exit dibs_exit(void) +{ + dibs_loopback_exit(); + class_destroy(dibs_class); +} + +module_init(dibs_init); +module_exit(dibs_exit); |