From 5a1c18b761ddb299a06746948b9ec2814b04fa92 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 2 Jan 2019 00:00:01 +0100 Subject: bcma: keep a direct pointer to the struct device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing struct device is pretty useful/common so having a direct pointer: 1) Simplifies some code 2) Makes bcma_bus_get_host_dev() unneeded 3) Allows further improvements like using dev_* printing helpers Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index ef61f3607e99..60b94b944e9f 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -332,6 +332,8 @@ extern int bcma_arch_register_fallback_sprom( struct ssb_sprom *out)); struct bcma_bus { + struct device *dev; + /* The MMIO area. */ void __iomem *mmio; @@ -339,14 +341,7 @@ struct bcma_bus { enum bcma_hosttype hosttype; bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */ - union { - /* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */ - struct pci_dev *host_pci; - /* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */ - struct sdio_func *host_sdio; - /* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */ - struct platform_device *host_pdev; - }; + struct pci_dev *host_pci; /* PCI bus pointer (BCMA_HOSTTYPE_PCI only) */ struct bcma_chipinfo chipinfo; -- cgit v1.2.3 From ee17e5d6201c66492a0e8053190fca2ed2b8457d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 12 Jan 2019 11:48:20 -0600 Subject: signal: Make siginmask safe when passed a signal of 0 Eric Biggers reported: > The following commit, which went into v4.20, introduced undefined behavior when > sys_rt_sigqueueinfo() is called with sig=0: > > commit 4ce5f9c9e7546915c559ffae594e6d73f918db00 > Author: Eric W. Biederman > Date: Tue Sep 25 12:59:31 2018 +0200 > > signal: Use a smaller struct siginfo in the kernel > > In sig_specific_sicodes(), used from known_siginfo_layout(), the expression > '1ULL << ((sig)-1)' is undefined as it evaluates to 1ULL << 4294967295. > > Reproducer: > > #include > #include > #include > > int main(void) > { > siginfo_t si = { .si_code = 1 }; > syscall(__NR_rt_sigqueueinfo, 0, 0, &si); > } > > UBSAN report for v5.0-rc1: > > UBSAN: Undefined behaviour in kernel/signal.c:2946:7 > shift exponent 4294967295 is too large for 64-bit type 'long unsigned int' > CPU: 2 PID: 346 Comm: syz_signal Not tainted 5.0.0-rc1 #25 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 > Call Trace: > __dump_stack lib/dump_stack.c:77 [inline] > dump_stack+0x70/0xa5 lib/dump_stack.c:113 > ubsan_epilogue+0xd/0x40 lib/ubsan.c:159 > __ubsan_handle_shift_out_of_bounds+0x12c/0x170 lib/ubsan.c:425 > known_siginfo_layout+0xae/0xe0 kernel/signal.c:2946 > post_copy_siginfo_from_user kernel/signal.c:3009 [inline] > __copy_siginfo_from_user+0x35/0x60 kernel/signal.c:3035 > __do_sys_rt_sigqueueinfo kernel/signal.c:3553 [inline] > __se_sys_rt_sigqueueinfo kernel/signal.c:3549 [inline] > __x64_sys_rt_sigqueueinfo+0x31/0x70 kernel/signal.c:3549 > do_syscall_64+0x4c/0x1b0 arch/x86/entry/common.c:290 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x433639 > Code: c4 18 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b 27 00 00 c3 66 2e 0f 1f 84 00 00 00 00 > RSP: 002b:00007fffcb289fc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000081 > RAX: ffffffffffffffda RBX: 00000000004002e0 RCX: 0000000000433639 > RDX: 00007fffcb289fd0 RSI: 0000000000000000 RDI: 0000000000000000 > RBP: 00000000006b2018 R08: 000000000000004d R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401560 > R13: 00000000004015f0 R14: 0000000000000000 R15: 0000000000000000 I have looked at the other callers of siginmask and they all appear to in locations where sig can not be zero. I have looked at the code generation of adding an extra test against zero and gcc was able with a simple decrement instruction to combine the two tests together. So the at most adding this test cost a single cpu cycle. In practice that decrement instruction was already present as part of the mask comparison, so the only change was when the instruction was executed. So given that it is cheap, and obviously correct to update siginmask to verify the signal is not zero. Fix this issue there to avoid any future problems. Reported-by: Eric Biggers Fixes: 4ce5f9c9e754 ("signal: Use a smaller struct siginfo in the kernel") Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index cc7e2c1cd444..9702016734b1 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -392,7 +392,7 @@ extern bool unhandled_signal(struct task_struct *tsk, int sig); #endif #define siginmask(sig, mask) \ - ((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) + ((sig) > 0 && (sig) < SIGRTMIN && (rt_sigmask(sig) & (mask))) #define SIG_KERNEL_ONLY_MASK (\ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP)) -- cgit v1.2.3 From 73f5a82bb3c9fce550da4a74a32b8cb064b50663 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 13 Jan 2019 15:57:04 +0200 Subject: RDMA/mad: Reduce MAD scope to mlx5_ib only Management Datagram Interface (MAD) is applicable only when physical port is Infiniband. It makes MAD command logic to be completely unrelated to eth/core parts of mlx5. Signed-off-by: Leon Romanovsky Acked-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 37 ++++++++++++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/mad.c | 11 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mad.c | 75 ------------------------ include/linux/mlx5/driver.h | 2 - 7 files changed, 47 insertions(+), 85 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 356bccc715ee..6bcc63aaa50b 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, counter_set_id); return err; } + +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port) +{ + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; + + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); + + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + if (err) + goto out; + + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); + +out: + kfree(out); + kfree(in); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1e76dc67a369..923a7b93f507 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); +int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, + u16 opmod, u8 port); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 558638468edb..6c529e6f3a01 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_ib.h" +#include "cmd.h" enum { MLX5_IB_VENDOR_CLASS1 = 0x9, @@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, return dev->mdev->port_caps[port_num - 1].has_smi; } -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad) +static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, + int ignore_bkey, u8 port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const void *in_mad, + void *response_mad) { u8 op_modifier = 0; @@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, if (ignore_bkey || !in_wc) op_modifier |= 0x2; - return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); + return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, + port); } static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06d3b1efea8..efe383c0ac86 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1038,9 +1038,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9de9abacf7f6..0257731e6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54299251d40d..4e444863054a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -897,8 +897,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *out, int outlen); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port); int mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); -- cgit v1.2.3 From e355477ed9e4f401e3931043df97325d38552d54 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 18 Jan 2019 16:33:10 -0800 Subject: net/mlx5: Make mlx5_cmd_exec_cb() a safe API APIs that have deferred callbacks should have some kind of cleanup function that callers can use to fence the callbacks. Otherwise things like module unloading can lead to dangling function pointers, or worse. The IB MR code is the only place that calls this function and had a really poor attempt at creating this fence. Provide a good version in the core code as future patches will add more places that need this fence. Signed-off-by: Jason Gunthorpe Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/mr.c | 39 ++++--------------- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 ++++++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 11 +++--- include/linux/mlx5/driver.h | 32 +++++++++++++--- 5 files changed, 91 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efe383c0ac86..eedba0d2ec4b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *parent; atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { @@ -944,6 +945,7 @@ struct mlx5_ib_dev { struct mlx5_memic memic; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd6ea1f75085..bf2b6ea23851 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr) } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); @@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; @@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } -static void wait_for_async_commands(struct mlx5_ib_dev *dev) -{ - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int total = 0; - int i; - int j; - - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - for (j = 0 ; j < 1000; j++) { - if (!ent->pending) - break; - msleep(50); - } - } - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - total += ent->pending; - } - - if (total) - mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); - else - mlx5_ib_warn(dev, "done with all pending requests\n"); -} - int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); - wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3e0fa8a8077b..a25a8c6f938e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4e444863054a..039c9398614c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -850,11 +850,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); @@ -885,9 +904,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); -- cgit v1.2.3 From 13054abbaa4f1fd4e6f3b4b63439ec033b4c8035 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 29 Jan 2019 11:58:35 +0100 Subject: HID: debug: fix the ring buffer implementation Ring buffer implementation in hid_debug_event() and hid_debug_events_read() is strange allowing lost or corrupted data. After commit 717adfdaf147 ("HID: debug: check length before copy_to_user()") it is possible to enter an infinite loop in hid_debug_events_read() by providing 0 as count, this locks up a system. Fix this by rewriting the ring buffer implementation with kfifo and simplify the code. This fixes CVE-2019-3819. v2: fix an execution logic and add a comment v3: use __set_current_state() instead of set_current_state() Link: https://bugzilla.redhat.com/show_bug.cgi?id=1669187 Cc: stable@vger.kernel.org # v4.18+ Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Fixes: 717adfdaf147 ("HID: debug: check length before copy_to_user()") Signed-off-by: Vladis Dronov Reviewed-by: Oleg Nesterov Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-debug.c | 120 ++++++++++++++++++---------------------------- include/linux/hid-debug.h | 9 ++-- 2 files changed, 51 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index c530476edba6..ac9fda1b5a72 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -661,17 +662,12 @@ EXPORT_SYMBOL_GPL(hid_dump_device); /* enqueue string to 'events' ring buffer */ void hid_debug_event(struct hid_device *hdev, char *buf) { - unsigned i; struct hid_debug_list *list; unsigned long flags; spin_lock_irqsave(&hdev->debug_list_lock, flags); - list_for_each_entry(list, &hdev->debug_list, node) { - for (i = 0; buf[i]; i++) - list->hid_debug_buf[(list->tail + i) % HID_DEBUG_BUFSIZE] = - buf[i]; - list->tail = (list->tail + i) % HID_DEBUG_BUFSIZE; - } + list_for_each_entry(list, &hdev->debug_list, node) + kfifo_in(&list->hid_debug_fifo, buf, strlen(buf)); spin_unlock_irqrestore(&hdev->debug_list_lock, flags); wake_up_interruptible(&hdev->debug_wait); @@ -722,8 +718,7 @@ void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 valu hid_debug_event(hdev, buf); kfree(buf); - wake_up_interruptible(&hdev->debug_wait); - + wake_up_interruptible(&hdev->debug_wait); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -1083,8 +1078,8 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } - if (!(list->hid_debug_buf = kzalloc(HID_DEBUG_BUFSIZE, GFP_KERNEL))) { - err = -ENOMEM; + err = kfifo_alloc(&list->hid_debug_fifo, HID_DEBUG_FIFOSIZE, GFP_KERNEL); + if (err) { kfree(list); goto out; } @@ -1104,77 +1099,57 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct hid_debug_list *list = file->private_data; - int ret = 0, len; + int ret = 0, copied; DECLARE_WAITQUEUE(wait, current); mutex_lock(&list->read_mutex); - while (ret == 0) { - if (list->head == list->tail) { - add_wait_queue(&list->hdev->debug_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - while (list->head == list->tail) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } + if (kfifo_is_empty(&list->hid_debug_fifo)) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + while (kfifo_is_empty(&list->hid_debug_fifo)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } - if (!list->hdev || !list->hdev->debug) { - ret = -EIO; - set_current_state(TASK_RUNNING); - goto out; - } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } - /* allow O_NONBLOCK from other threads */ - mutex_unlock(&list->read_mutex); - schedule(); - mutex_lock(&list->read_mutex); - set_current_state(TASK_INTERRUPTIBLE); + /* if list->hdev is NULL we cannot remove_wait_queue(). + * if list->hdev->debug is 0 then hid_debug_unregister() + * was already called and list->hdev is being destroyed. + * if we add remove_wait_queue() here we can hit a race. + */ + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + set_current_state(TASK_RUNNING); + goto out; } - set_current_state(TASK_RUNNING); - remove_wait_queue(&list->hdev->debug_wait, &wait); + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); } - if (ret) - goto out; + __set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); - /* pass the ringbuffer contents to userspace */ -copy_rest: - if (list->tail == list->head) + if (ret) goto out; - if (list->tail > list->head) { - len = list->tail - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - ret += len; - list->head += len; - } else { - len = HID_DEBUG_BUFSIZE - list->head; - if (len > count) - len = count; - - if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { - ret = -EFAULT; - goto out; - } - list->head = 0; - ret += len; - count -= len; - if (count > 0) - goto copy_rest; - } - } + + /* pass the fifo content to userspace, locking is not needed with only + * one concurrent reader and one concurrent writer + */ + ret = kfifo_to_user(&list->hid_debug_fifo, buffer, count, &copied); + if (ret) + goto out; + ret = copied; out: mutex_unlock(&list->read_mutex); return ret; @@ -1185,7 +1160,7 @@ static __poll_t hid_debug_events_poll(struct file *file, poll_table *wait) struct hid_debug_list *list = file->private_data; poll_wait(file, &list->hdev->debug_wait, wait); - if (list->head != list->tail) + if (!kfifo_is_empty(&list->hid_debug_fifo)) return EPOLLIN | EPOLLRDNORM; if (!list->hdev->debug) return EPOLLERR | EPOLLHUP; @@ -1200,7 +1175,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) spin_lock_irqsave(&list->hdev->debug_list_lock, flags); list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); - kfree(list->hid_debug_buf); + kfifo_free(&list->hid_debug_fifo); kfree(list); return 0; @@ -1246,4 +1221,3 @@ void hid_debug_exit(void) { debugfs_remove_recursive(hid_debug_root); } - diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 8663f216c563..2d6100edf204 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,7 +24,10 @@ #ifdef CONFIG_DEBUG_FS +#include + #define HID_DEBUG_BUFSIZE 512 +#define HID_DEBUG_FIFOSIZE 512 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_report(struct hid_device *, int , u8 *, int); @@ -37,11 +40,8 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); - struct hid_debug_list { - char *hid_debug_buf; - int head; - int tail; + DECLARE_KFIFO_PTR(hid_debug_fifo, char); struct fasync_struct *fasync; struct hid_device *hdev; struct list_head node; @@ -64,4 +64,3 @@ struct hid_debug_list { #endif #endif - -- cgit v1.2.3 From b284909abad48b07d3071a9fc9b5692b3e64914b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 30 Jan 2019 07:13:58 -0600 Subject: cpu/hotplug: Fix "SMT disabled by BIOS" detection for KVM With the following commit: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") ... the hotplug code attempted to detect when SMT was disabled by BIOS, in which case it reported SMT as permanently disabled. However, that code broke a virt hotplug scenario, where the guest is booted with only primary CPU threads, and a sibling is brought online later. The problem is that there doesn't seem to be a way to reliably distinguish between the HW "SMT disabled by BIOS" case and the virt "sibling not yet brought online" case. So the above-mentioned commit was a bit misguided, as it permanently disabled SMT for both cases, preventing future virt sibling hotplugs. Going back and reviewing the original problems which were attempted to be solved by that commit, when SMT was disabled in BIOS: 1) /sys/devices/system/cpu/smt/control showed "on" instead of "notsupported"; and 2) vmx_vm_init() was incorrectly showing the L1TF_MSG_SMT warning. I'd propose that we instead consider #1 above to not actually be a problem. Because, at least in the virt case, it's possible that SMT wasn't disabled by BIOS and a sibling thread could be brought online later. So it makes sense to just always default the smt control to "on" to allow for that possibility (assuming cpuid indicates that the CPU supports SMT). The real problem is #2, which has a simple fix: change vmx_vm_init() to query the actual current SMT state -- i.e., whether any siblings are currently online -- instead of looking at the SMT "control" sysfs value. So fix it by: a) reverting the original "fix" and its followup fix: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") bc2d8d262cba ("cpu/hotplug: Fix SMT supported evaluation") and b) changing vmx_vm_init() to query the actual current SMT state -- instead of the sysfs control value -- to determine whether the L1TF warning is needed. This also requires the 'sched_smt_present' variable to exported, instead of 'cpu_smt_control'. Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS") Reported-by: Igor Mammedov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Joe Mario Cc: Jiri Kosina Cc: Peter Zijlstra Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/e3a85d585da28cc333ecbc1e78ee9216e6da9396.1548794349.git.jpoimboe@redhat.com --- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx/vmx.c | 3 ++- include/linux/cpu.h | 2 -- kernel/cpu.c | 33 ++++----------------------------- kernel/sched/fair.c | 1 + kernel/smp.c | 2 -- 6 files changed, 8 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1de0f4170178..01874d54f4fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -71,7 +71,7 @@ void __init check_bugs(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology_early(); + cpu_smt_check_topology(); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4341175339f3..95d618045001 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm) * Warn upon starting the first VM in a potentially * insecure environment. */ - if (cpu_smt_control == CPU_SMT_ENABLED) + if (sched_smt_active()) pr_warn_once(L1TF_MSG_SMT); if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER) pr_warn_once(L1TF_MSG_L1D); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 218df7f4d3e1..5041357d0297 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -180,12 +180,10 @@ enum cpuhp_smt_control { #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT) extern enum cpuhp_smt_control cpu_smt_control; extern void cpu_smt_disable(bool force); -extern void cpu_smt_check_topology_early(void); extern void cpu_smt_check_topology(void); #else # define cpu_smt_control (CPU_SMT_ENABLED) static inline void cpu_smt_disable(bool force) { } -static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index c0c7f64573ed..d1c6d152da89 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; -EXPORT_SYMBOL_GPL(cpu_smt_control); - -static bool cpu_smt_available __read_mostly; void __init cpu_smt_disable(bool force) { @@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force) /* * The decision whether SMT is supported can only be done after the full - * CPU identification. Called from architecture code before non boot CPUs - * are brought up. - */ -void __init cpu_smt_check_topology_early(void) -{ - if (!topology_smt_supported()) - cpu_smt_control = CPU_SMT_NOT_SUPPORTED; -} - -/* - * If SMT was disabled by BIOS, detect it here, after the CPUs have been - * brought online. This ensures the smt/l1tf sysfs entries are consistent - * with reality. cpu_smt_available is set to true during the bringup of non - * boot CPUs when a SMT sibling is detected. Note, this may overwrite - * cpu_smt_control's previous setting. + * CPU identification. Called from architecture code. */ void __init cpu_smt_check_topology(void) { - if (!cpu_smt_available) + if (!topology_smt_supported()) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; } @@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable); static inline bool cpu_smt_allowed(unsigned int cpu) { - if (topology_is_primary_thread(cpu)) + if (cpu_smt_control == CPU_SMT_ENABLED) return true; - /* - * If the CPU is not a 'primary' thread and the booted_once bit is - * set then the processor has SMT support. Store this information - * for the late check of SMT support in cpu_smt_check_topology(). - */ - if (per_cpu(cpuhp_state, cpu).booted_once) - cpu_smt_available = true; - - if (cpu_smt_control == CPU_SMT_ENABLED) + if (topology_is_primary_thread(cpu)) return true; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50aa2aba69bd..310d0637fe4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); static inline void set_idle_cores(int cpu, int val) { diff --git a/kernel/smp.c b/kernel/smp.c index 163c451af42e..f4cf1b0bb3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -584,8 +584,6 @@ void __init smp_init(void) num_nodes, (num_nodes > 1 ? "s" : ""), num_cpus, (num_cpus > 1 ? "s" : "")); - /* Final decision about SMT support */ - cpu_smt_check_topology(); /* Any cleanup work */ smp_cpus_done(setup_max_cpus); } -- cgit v1.2.3 From 7d10f70fc198877b43d92bdcd7604279788b9568 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:37 -0500 Subject: fs: Don't need to put list_lru into its own cacheline The list_lru structure is essentially just a pointer to a table of per-node LRU lists. Even if CONFIG_MEMCG_KMEM is defined, the list field is just used for LRU list registration and shrinker_id is set at initialization. Those fields won't need to be touched that often. So there is no point to make the list_lru structures to sit in their own cachelines. Signed-off-by: Waiman Long Reviewed-by: Dave Chinner Signed-off-by: Linus Torvalds --- include/linux/fs.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 811c77743dad..29d8e2cfed0e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1479,11 +1479,12 @@ struct super_block { struct user_namespace *s_user_ns; /* - * Keep the lru lists last in the structure so they always sit on their - * own individual cachelines. + * The list_lru structure is essentially just a pointer to a table + * of per-node lru lists, each of which has its own spinlock. + * There is no need to put them into separate cachelines. */ - struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; - struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct list_lru s_dentry_lru; + struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; -- cgit v1.2.3 From af0c9af1b3f66052c369d08be3f60fa9a9559e48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Jan 2019 13:52:38 -0500 Subject: fs/dcache: Track & report number of negative dentries The current dentry number tracking code doesn't distinguish between positive & negative dentries. It just reports the total number of dentries in the LRU lists. As excessive number of negative dentries can have an impact on system performance, it will be wise to track the number of positive and negative dentries separately. This patch adds tracking for the total number of negative dentries in the system LRU lists and reports it in the 5th field in the /proc/sys/fs/dentry-state file. The number, however, does not include negative dentries that are in flight but not in the LRU yet as well as those in the shrinker lists which are on the way out anyway. The number of positive dentries in the LRU lists can be roughly found by subtracting the number of negative dentries from the unused count. Matthew Wilcox had confirmed that since the introduction of the dentry_stat structure in 2.1.60, the dummy array was there, probably for future extension. They were not replacements of pre-existing fields. So no sane applications that read the value of /proc/sys/fs/dentry-state will do dummy thing if the last 2 fields of the sysctl parameter are not zero. IOW, it will be safe to use one of the dummy array entry for negative dentry count. Signed-off-by: Waiman Long Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 26 ++++++++++++++++---------- fs/dcache.c | 32 ++++++++++++++++++++++++++++++++ include/linux/dcache.h | 7 ++++--- 3 files changed, 52 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 819caf8ca05f..58649bd4fcfc 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -56,26 +56,32 @@ of any kernel data structures. dentry-state: -From linux/fs/dentry.c: +From linux/include/linux/dcache.h: -------------------------------------------------------------- -struct { +struct dentry_stat_t dentry_stat { int nr_dentry; int nr_unused; int age_limit; /* age in seconds */ int want_pages; /* pages requested by system */ - int dummy[2]; -} dentry_stat = {0, 0, 45, 0,}; --------------------------------------------------------------- - -Dentries are dynamically allocated and deallocated, and -nr_dentry seems to be 0 all the time. Hence it's safe to -assume that only nr_unused, age_limit and want_pages are -used. Nr_unused seems to be exactly what its name says. + int nr_negative; /* # of unused negative dentries */ + int dummy; /* Reserved for future use */ +}; +-------------------------------------------------------------- + +Dentries are dynamically allocated and deallocated. + +nr_dentry shows the total number of dentries allocated (active ++ unused). nr_unused shows the number of dentries that are not +actively used, but are saved in the LRU list for future reuse. + Age_limit is the age in seconds after which dcache entries can be reclaimed when memory is short and want_pages is nonzero when shrink_dcache_pages() has been called and the dcache isn't pruned yet. +nr_negative shows the number of unused dentries that are also +negative dentries which do not mapped to actual files. + ============================================================== dquot-max & dquot-nr: diff --git a/fs/dcache.c b/fs/dcache.c index 44e5652b2664..aac41adf4743 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = { static DEFINE_PER_CPU(long, nr_dentry); static DEFINE_PER_CPU(long, nr_dentry_unused); +static DEFINE_PER_CPU(long, nr_dentry_negative); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) @@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void) return sum < 0 ? 0 : sum; } +static long get_nr_dentry_negative(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_negative, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); + dentry_stat.nr_negative = get_nr_dentry_negative(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_inc(nr_dentry_negative); } static void dentry_free(struct dentry *dentry) @@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry) * The per-cpu "nr_dentry_unused" counters are updated with * the DCACHE_LRU_LIST bit. * + * The per-cpu "nr_dentry_negative" counters are only updated + * when deleted from or added to the per-superblock LRU list, not + * from/to the shrink list. That is to avoid an unneeded dec/inc + * pair when moving from LRU to shrink list in select_collect(). + * * These helper functions make sure we always follow the * rules. d_lock must be held by the caller. */ @@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry) D_FLAG_VERIFY(dentry, 0); dentry->d_flags |= DCACHE_LRU_LIST; this_cpu_inc(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_inc(nr_dentry_negative); WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } @@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags &= ~DCACHE_LRU_LIST; this_cpu_dec(nr_dentry_unused); + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate(lru, &dentry->d_lru); } @@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, { D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); dentry->d_flags |= DCACHE_SHRINK_LIST; + if (d_is_negative(dentry)) + this_cpu_dec(nr_dentry_negative); list_lru_isolate_move(lru, &dentry->d_lru, list); } @@ -1816,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); + /* + * Decrement negative dentry count if it was in the LRU list. + */ + if (dentry->d_flags & DCACHE_LRU_LIST) + this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ef4b70f64f33..60996e64c579 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -62,9 +62,10 @@ extern const struct qstr slash_name; struct dentry_stat_t { long nr_dentry; long nr_unused; - long age_limit; /* age in seconds */ - long want_pages; /* pages requested by system */ - long dummy[2]; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long nr_negative; /* # of unused negative dentries */ + long dummy; /* Reserved for future use */ }; extern struct dentry_stat_t dentry_stat; -- cgit v1.2.3 From 15efb47dc560849d0c07db96fdad5121f2cf736e Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 30 Jan 2019 18:26:02 +0100 Subject: PM-runtime: Fix deadlock with ktime_get() A deadlock has been seen when swicthing clocksources which use PM-runtime. The call path is: change_clocksource ... write_seqcount_begin ... timekeeping_update ... sh_cmt_clocksource_enable ... rpm_resume pm_runtime_mark_last_busy ktime_get do read_seqcount_begin while read_seqcount_retry .... write_seqcount_end Although we should be safe because we haven't yet changed the clocksource at that time, we can't do that because of seqcount protection. Use ktime_get_mono_fast_ns() instead which is lock safe for such cases. With ktime_get_mono_fast_ns, the timestamp is not guaranteed to be monotonic across an update and as a result can goes backward. According to update_fast_timekeeper() description: "In the worst case, this can result is a slightly wrong timestamp (a few nanoseconds)". For PM-runtime autosuspend, this means only that the suspend decision may be slightly suboptimal. Fixes: 8234f6734c5d ("PM-runtime: Switch autosuspend over to using hrtimers") Reported-by: Biju Das Signed-off-by: Vincent Guittot Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 10 +++++----- include/linux/pm_runtime.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 457be03b744d..0ea2139c50d8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -130,7 +130,7 @@ u64 pm_runtime_autosuspend_expiration(struct device *dev) { int autosuspend_delay; u64 last_busy, expires = 0; - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_mono_fast_ns(); if (!dev->power.use_autosuspend) goto out; @@ -909,7 +909,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) * If 'expires' is after the current time, we've been called * too early. */ - if (expires > 0 && expires < ktime_to_ns(ktime_get())) { + if (expires > 0 && expires < ktime_get_mono_fast_ns()) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); @@ -928,7 +928,7 @@ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) int pm_schedule_suspend(struct device *dev, unsigned int delay) { unsigned long flags; - ktime_t expires; + u64 expires; int retval; spin_lock_irqsave(&dev->power.lock, flags); @@ -945,8 +945,8 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); - expires = ktime_add(ktime_get(), ms_to_ktime(delay)); - dev->power.timer_expires = ktime_to_ns(expires); + expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC; + dev->power.timer_expires = expires; dev->power.timer_autosuspends = 0; hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 54af4eef169f..fed5be706bc9 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - WRITE_ONCE(dev->power.last_busy, ktime_to_ns(ktime_get())); + WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } static inline bool pm_runtime_is_irq_safe(struct device *dev) -- cgit v1.2.3 From d5256083f62e2720f75bb3c5a928a0afe47d6bc3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Jan 2019 12:49:48 +0100 Subject: ipvlan, l3mdev: fix broken l3s mode wrt local routes While implementing ipvlan l3 and l3s mode for kubernetes CNI plugin, I ran into the issue that while l3 mode is working fine, l3s mode does not have any connectivity to kube-apiserver and hence all pods end up in Error state as well. The ipvlan master device sits on top of a bond device and hostns traffic to kube-apiserver (also running in hostns) is DNATed from 10.152.183.1:443 to 139.178.29.207:37573 where the latter is the address of the bond0. While in l3 mode, a curl to https://10.152.183.1:443 or to https://139.178.29.207:37573 works fine from hostns, neither of them do in case of l3s. In the latter only a curl to https://127.0.0.1:37573 appeared to work where for local addresses of bond0 I saw kernel suddenly starting to emit ARP requests to query HW address of bond0 which remained unanswered and neighbor entries in INCOMPLETE state. These ARP requests only happen while in l3s. Debugging this further, I found the issue is that l3s mode is piggy- backing on l3 master device, and in this case local routes are using l3mdev_master_dev_rcu(dev) instead of net->loopback_dev as per commit f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") and 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback"). I found that reverting them back into using the net->loopback_dev fixed ipvlan l3s connectivity and got everything working for the CNI. Now judging from 4fbae7d83c98 ("ipvlan: Introduce l3s mode") and the l3mdev paper in [0] the only sole reason why ipvlan l3s is relying on l3 master device is to get the l3mdev_ip_rcv() receive hook for setting the dst entry of the input route without adding its own ipvlan specific hacks into the receive path, however, any l3 domain semantics beyond just that are breaking l3s operation. Note that ipvlan also has the ability to dynamically switch its internal operation from l3 to l3s for all ports via ipvlan_set_port_mode() at runtime. In any case, l3 vs l3s soley distinguishes itself by 'de-confusing' netfilter through switching skb->dev to ipvlan slave device late in NF_INET_LOCAL_IN before handing the skb to L4. Minimal fix taken here is to add a IFF_L3MDEV_RX_HANDLER flag which, if set from ipvlan setup, gets us only the wanted l3mdev_l3_rcv() hook without any additional l3mdev semantics on top. This should also have minimal impact since dev->priv_flags is already hot in cache. With this set, l3s mode is working fine and I also get things like masquerading pod traffic on the ipvlan master properly working. [0] https://netdevconf.org/1.2/papers/ahern-what-is-l3mdev-paper.pdf Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant") Fixes: 5f02ce24c269 ("net: l3mdev: Allow the l3mdev to be a loopback") Fixes: 4fbae7d83c98 ("ipvlan: Introduce l3s mode") Signed-off-by: Daniel Borkmann Cc: Mahesh Bandewar Cc: David Ahern Cc: Florian Westphal Cc: Martynas Pumputis Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 6 +++--- include/linux/netdevice.h | 8 ++++++++ include/net/l3mdev.h | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 19bdde60680c..7cdac77d0c68 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -100,12 +100,12 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); if (!err) { mdev->l3mdev_ops = &ipvl_l3mdev_ops; - mdev->priv_flags |= IFF_L3MDEV_MASTER; + mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER; } else goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ - mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } @@ -167,7 +167,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct sk_buff *skb; if (port->mode == IPVLAN_MODE_L3S) { - dev->priv_flags &= ~IFF_L3MDEV_MASTER; + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(dev_net(dev)); dev->l3mdev_ops = NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1377d085ef99..86dbb3e29139 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1483,6 +1483,7 @@ struct net_device_ops { * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1514,6 +1515,7 @@ enum netdev_priv_flags { IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1544,6 +1546,7 @@ enum netdev_priv_flags { #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE +#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER /** * struct net_device - The DEVICE structure. @@ -4549,6 +4552,11 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } +static inline bool netif_has_l3_rx_handler(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; +} + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 78fa0ac4613c..5175fd63cd82 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -153,7 +153,8 @@ struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); - else if (netif_is_l3_master(skb->dev)) + else if (netif_is_l3_master(skb->dev) || + netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) -- cgit v1.2.3 From 4ec5302fa906ec9d86597b236f62315bacdb9622 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:19 +0100 Subject: net: stmmac: Fallback to Platform Data clock in Watchdog conversion If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean. This way we can still use the coalesce command in PCI based setups. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index d1f61c25d82b..5d85742a2be0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,8 +721,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -731,8 +734,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; -- cgit v1.2.3 From 56841070ccc87b463ac037d2d1f2beb8e5e35f0c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 31 Jan 2019 11:19:43 +0000 Subject: irqchip/gic-v3-its: Fix ITT_entry_size accessor According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens. Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 071b4cbdf010..c848a7cc502e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) -- cgit v1.2.3 From 9a6d5488002fdca7134a0e59b0ae252f61042810 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Jan 2019 08:41:40 -0700 Subject: ide: ensure atapi sense request aren't preempted There's an issue with how sense requests are handled in IDE. If ide-cd encounters an error, it queues a sense request. With how IDE request handling is done, this is the next request we need to handle. But it's impossible to guarantee this, as another request could come in between the sense being queued, and ->queue_rq() being run and handling it. If that request ALSO fails, then we attempt to doubly queue the single sense request we have. Since we only support one active request at the time, defer request processing when a sense request is queued. Fixes: 600335205b8d "ide: convert to blk-mq" Reported-by: He Zhe Tested-by: He Zhe Signed-off-by: Jens Axboe --- drivers/ide/ide-atapi.c | 9 +++++++- drivers/ide/ide-io.c | 61 +++++++++++++++++++++++++------------------------ drivers/ide/ide-park.c | 2 ++ drivers/ide/ide-probe.c | 23 +++++++++++++------ include/linux/ide.h | 2 ++ 5 files changed, 59 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index da58020a144e..33a28cde126c 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense); int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - struct request *sense_rq = drive->sense_rq; + ide_hwif_t *hwif = drive->hwif; + struct request *sense_rq; + unsigned long flags; + + spin_lock_irqsave(&hwif->lock, flags); /* deferred failure from ide_prep_sense() */ if (!drive->sense_rq_armed) { printk(KERN_WARNING PFX "%s: error queuing a sense request\n", drive->name); + spin_unlock_irqrestore(&hwif->lock, flags); return -ENOMEM; } + sense_rq = drive->sense_rq; ide_req(sense_rq)->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; ide_insert_request_head(drive, sense_rq); + spin_unlock_irqrestore(&hwif->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8445b484ae69..b137f27a34d5 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error, } if (!blk_update_request(rq, error, nr_bytes)) { - if (rq == drive->sense_rq) + if (rq == drive->sense_rq) { drive->sense_rq = NULL; + drive->sense_rq_active = false; + } __blk_mq_end_request(rq, error); return 0; @@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3); } -/* - * Issue a new request to a device. - */ -blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq, + bool local_requeue) { - ide_drive_t *drive = hctx->queue->queuedata; - ide_hwif_t *hwif = drive->hwif; + ide_hwif_t *hwif = drive->hwif; struct ide_host *host = hwif->host; - struct request *rq = bd->rq; ide_startstop_t startstop; if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) { @@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, if (ide_lock_host(host, hwif)) return BLK_STS_DEV_RESOURCE; - blk_mq_start_request(rq); - spin_lock_irq(&hwif->lock); if (!ide_lock_port(hwif)) { @@ -510,18 +505,6 @@ repeat: hwif->cur_dev = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - /* - * we know that the queue isn't empty, but this can happen - * if ->prep_rq() decides to kill a request - */ - if (!rq) { - rq = bd->rq; - if (!rq) { - ide_unlock_port(hwif); - goto out; - } - } - /* * Sanity: don't accept a request that isn't a PM request * if we are currently power managed. This is very important as @@ -560,9 +543,12 @@ repeat: } } else { plug_device: + if (local_requeue) + list_add(&rq->queuelist, &drive->rq_list); spin_unlock_irq(&hwif->lock); ide_unlock_host(host); - ide_requeue_and_plug(drive, rq); + if (!local_requeue) + ide_requeue_and_plug(drive, rq); return BLK_STS_OK; } @@ -573,6 +559,26 @@ out: return BLK_STS_OK; } +/* + * Issue a new request to a device. + */ +blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + ide_drive_t *drive = hctx->queue->queuedata; + ide_hwif_t *hwif = drive->hwif; + + spin_lock_irq(&hwif->lock); + if (drive->sense_rq_active) { + spin_unlock_irq(&hwif->lock); + return BLK_STS_DEV_RESOURCE; + } + spin_unlock_irq(&hwif->lock); + + blk_mq_start_request(bd->rq); + return ide_issue_rq(drive, bd->rq, false); +} + static int drive_is_ready(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer); void ide_insert_request_head(ide_drive_t *drive, struct request *rq) { - ide_hwif_t *hwif = drive->hwif; - unsigned long flags; - - spin_lock_irqsave(&hwif->lock, flags); + drive->sense_rq_active = true; list_add_tail(&rq->queuelist, &drive->rq_list); - spin_unlock_irqrestore(&hwif->lock, flags); - kblockd_schedule_work(&drive->rq_work); } EXPORT_SYMBOL_GPL(ide_insert_request_head); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 102aa3bc3e7f..8af7af6001eb 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; scsi_req(rq)->cmd_len = 1; ide_req(rq)->type = ATA_PRIV_MISC; + spin_lock_irq(&hwif->lock); ide_insert_request_head(drive, rq); + spin_unlock_irq(&hwif->lock); out: return; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 63627be0811a..5aeaca24a28f 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work) ide_drive_t *drive = container_of(work, ide_drive_t, rq_work); ide_hwif_t *hwif = drive->hwif; struct request *rq; + blk_status_t ret; LIST_HEAD(list); - spin_lock_irq(&hwif->lock); - if (!list_empty(&drive->rq_list)) - list_splice_init(&drive->rq_list, &list); - spin_unlock_irq(&hwif->lock); + blk_mq_quiesce_queue(drive->queue); - while (!list_empty(&list)) { - rq = list_first_entry(&list, struct request, queuelist); + ret = BLK_STS_OK; + spin_lock_irq(&hwif->lock); + while (!list_empty(&drive->rq_list)) { + rq = list_first_entry(&drive->rq_list, struct request, queuelist); list_del_init(&rq->queuelist); - blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL); + + spin_unlock_irq(&hwif->lock); + ret = ide_issue_rq(drive, rq, true); + spin_lock_irq(&hwif->lock); } + spin_unlock_irq(&hwif->lock); + + blk_mq_unquiesce_queue(drive->queue); + + if (ret != BLK_STS_OK) + kblockd_schedule_work(&drive->rq_work); } static const u8 ide_hwif_to_major[] = diff --git a/include/linux/ide.h b/include/linux/ide.h index e7d29ae633cd..971cf76a78a0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -615,6 +615,7 @@ struct ide_drive_s { /* current sense rq and buffer */ bool sense_rq_armed; + bool sense_rq_active; struct request *sense_rq; struct request_sense sense_data; @@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); -- cgit v1.2.3 From 6cab5e90ab2bd323c9f3811b6c70a4687df51e27 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 28 Jan 2019 18:43:34 -0800 Subject: bpf: run bpf programs with preemption disabled Disabled preemption is necessary for proper access to per-cpu maps from BPF programs. But the sender side of socket filters didn't have preemption disabled: unix_dgram_sendmsg->sk_filter->sk_filter_trim_cap->bpf_prog_run_save_cb->BPF_PROG_RUN and a combination of af_packet with tun device didn't disable either: tpacket_snd->packet_direct_xmit->packet_pick_tx_queue->ndo_select_queue-> tun_select_queue->tun_ebpf_select_queue->bpf_prog_run_clear_cb->BPF_PROG_RUN Disable preemption before executing BPF programs (both classic and extended). Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 21 ++++++++++++++++++--- kernel/bpf/cgroup.c | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..e532fcc6e4b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -591,8 +591,8 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } -static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) +static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@ -611,15 +611,30 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, return res; } +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; +} + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res; if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ab612fe9862f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, bpf_compute_and_save_data_end(skb, &saved_data_end); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; -- cgit v1.2.3 From 9bcdeb51bd7d2ae9fe65ea4d60643d2aeef5bfe3 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Feb 2019 14:20:31 -0800 Subject: oom, oom_reaper: do not enqueue same task twice Arkadiusz reported that enabling memcg's group oom killing causes strange memcg statistics where there is no task in a memcg despite the number of tasks in that memcg is not 0. It turned out that there is a bug in wake_oom_reaper() which allows enqueuing same task twice which makes impossible to decrease the number of tasks in that memcg due to a refcount leak. This bug existed since the OOM reaper became invokable from task_will_free_mem(current) path in out_of_memory() in Linux 4.7, T1@P1 |T2@P1 |T3@P1 |OOM reaper ----------+----------+----------+------------ # Processing an OOM victim in a different memcg domain. try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) try_charge() mem_cgroup_out_of_memory() mutex_lock(&oom_lock) out_of_memory() oom_kill_process(P1) do_send_sig_info(SIGKILL, @P1) mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T2@P1) wake_oom_reaper(T2@P1) # T2@P1 is enqueued. mutex_unlock(&oom_lock) out_of_memory() mark_oom_victim(T1@P1) wake_oom_reaper(T1@P1) # T1@P1 is enqueued again due to oom_reaper_list == T2@P1 && T1@P1->oom_reaper_list == NULL. mutex_unlock(&oom_lock) # Completed processing an OOM victim in a different memcg domain. spin_lock(&oom_reaper_lock) # T1P1 is dequeued. spin_unlock(&oom_reaper_lock) but memcg's group oom killing made it easier to trigger this bug by calling wake_oom_reaper() on the same task from one out_of_memory() request. Fix this bug using an approach used by commit 855b018325737f76 ("oom, oom_reaper: disable oom_reaper for oom_kill_allocating_task"). As a side effect of this patch, this patch also avoids enqueuing multiple threads sharing memory via task_will_free_mem(current) path. Link: http://lkml.kernel.org/r/e865a044-2c10-9858-f4ef-254bc71d6cc2@i-love.sakura.ne.jp Link: http://lkml.kernel.org/r/5ee34fc6-1485-34f8-8790-903ddabaa809@i-love.sakura.ne.jp Fixes: af8e15cc85a25315 ("oom, oom_reaper: do not enqueue task if it is on the oom_reaper_list head") Signed-off-by: Tetsuo Handa Reported-by: Arkadiusz Miskiewicz Tested-by: Arkadiusz Miskiewicz Acked-by: Michal Hocko Acked-by: Roman Gushchin Cc: Tejun Heo Cc: Aleksa Sarai Cc: Jay Kamat Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/coredump.h | 1 + mm/oom_kill.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ec912d01126f..ecdc6542070f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ +#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f0e8cd9edb1a..059e617a1847 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -647,8 +647,8 @@ static int oom_reaper(void *unused) static void wake_oom_reaper(struct task_struct *tsk) { - /* tsk is already queued? */ - if (tsk == oom_reaper_list || tsk->oom_reaper_list) + /* mm is already queued? */ + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); -- cgit v1.2.3 From b13bc35193d9e7a8c050a24928ca5c9e7c9a009b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:51 -0800 Subject: mm/hotplug: invalid PFNs from pfn_to_online_page() On an arm64 ThunderX2 server, the first kmemleak scan would crash [1] with CONFIG_DEBUG_VM_PGFLAGS=y due to page_to_nid() found a pfn that is not directly mapped (MEMBLOCK_NOMAP). Hence, the page->flags is uninitialized. This is due to the commit 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") starts to use pfn_to_online_page() instead of pfn_valid(). However, in the CONFIG_MEMORY_HOTPLUG=y case, pfn_to_online_page() does not call memblock_is_map_memory() while pfn_valid() does. Historically, the commit 68709f45385a ("arm64: only consider memblocks with NOMAP cleared for linear mapping") causes pages marked as nomap being no long reassigned to the new zone in memmap_init_zone() by calling __init_single_page(). Since the commit 2d070eab2e82 ("mm: consider zone which is not fully populated to have holes") introduced pfn_to_online_page() and was designed to return a valid pfn only, but it is clearly broken on arm64. Therefore, let pfn_to_online_page() call pfn_valid_within(), so it can handle nomap thanks to the commit f52bb98f5ade ("arm64: mm: always enable CONFIG_HOLES_IN_ZONE"), while it will be optimized away on architectures where have no HOLES_IN_ZONE. [1] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000006 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 Internal error: Oops: 96000005 [#1] SMP CPU: 60 PID: 1408 Comm: kmemleak Not tainted 5.0.0-rc2+ #8 pstate: 60400009 (nZCv daif +PAN -UAO) pc : page_mapping+0x24/0x144 lr : __dump_page+0x34/0x3dc sp : ffff00003a5cfd10 x29: ffff00003a5cfd10 x28: 000000000000802f x27: 0000000000000000 x26: 0000000000277d00 x25: ffff000010791f56 x24: ffff7fe000000000 x23: ffff000010772f8b x22: ffff00001125f670 x21: ffff000011311000 x20: ffff000010772f8b x19: fffffffffffffffe x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffff802698b19600 x13: ffff802698b1a200 x12: ffff802698b16f00 x11: ffff802698b1a400 x10: 0000000000001400 x9 : 0000000000000001 x8 : ffff00001121a000 x7 : 0000000000000000 x6 : ffff0000102c53b8 x5 : 0000000000000000 x4 : 0000000000000003 x3 : 0000000000000100 x2 : 0000000000000000 x1 : ffff000010772f8b x0 : ffffffffffffffff Process kmemleak (pid: 1408, stack limit = 0x(____ptrval____)) Call trace: page_mapping+0x24/0x144 __dump_page+0x34/0x3dc dump_page+0x28/0x4c kmemleak_scan+0x4ac/0x680 kmemleak_scan_thread+0xb4/0xdc kthread+0x12c/0x13c ret_from_fork+0x10/0x18 Code: d503201f f9400660 36000040 d1000413 (f9400661) ---[ end trace 4d4bd7f573490c8e ]--- Kernel panic - not syncing: Fatal exception SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x002,20000c38 Memory Limit: none ---[ end Kernel panic - not syncing: Fatal exception ]--- Link: http://lkml.kernel.org/r/20190122132916.28360-1-cai@lca.pw Fixes: 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Catalin Marinas Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 07da5c6c5ba0..368267c1b71b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -21,14 +21,16 @@ struct vmem_altmap; * walkers which rely on the fully initialized page->flags and others * should use this rather than pfn_valid && pfn_to_page */ -#define pfn_to_online_page(pfn) \ -({ \ - struct page *___page = NULL; \ - unsigned long ___nr = pfn_to_section_nr(pfn); \ - \ - if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ - ___page = pfn_to_page(pfn); \ - ___page; \ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + unsigned long ___pfn = pfn; \ + unsigned long ___nr = pfn_to_section_nr(___pfn); \ + \ + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \ + pfn_valid_within(___pfn)) \ + ___page = pfn_to_page(___pfn); \ + ___page; \ }) /* -- cgit v1.2.3 From e6d429313ea5c776d2e76b4494df69102e6b7115 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 29 Jan 2019 17:44:36 -0500 Subject: x86/resctrl: Avoid confusion over the new X86_RESCTRL config "Resource Control" is a very broad term for this CPU feature, and a term that is also associated with containers, cgroups etc. This can easily cause confusion. Make the user prompt more specific. Match the config symbol name. [ bp: In the future, the corresponding ARM arch-specific code will be under ARM_CPU_RESCTRL and the arch-agnostic bits will be carved out under the CPU_RESCTRL umbrella symbol. ] Signed-off-by: Johannes Weiner Signed-off-by: Borislav Petkov Cc: Babu Moger Cc: Fenghua Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Morse Cc: Jonathan Corbet Cc: "Kirill A. Shutemov" Cc: linux-doc@vger.kernel.org Cc: Peter Zijlstra Cc: Pu Wen Cc: Reinette Chatre Cc: Thomas Gleixner Cc: Tony Luck Cc: x86-ml Link: https://lkml.kernel.org/r/20190130195621.GA30653@cmpxchg.org --- Documentation/x86/resctrl_ui.txt | 2 +- arch/x86/Kconfig | 6 +++--- arch/x86/include/asm/resctrl_sched.h | 4 ++-- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/resctrl/Makefile | 4 ++-- include/linux/sched.h | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.txt index e8e8d14d3c4e..c1f95b59e14d 100644 --- a/Documentation/x86/resctrl_ui.txt +++ b/Documentation/x86/resctrl_ui.txt @@ -9,7 +9,7 @@ Fenghua Yu Tony Luck Vikas Shivappa -This feature is enabled by the CONFIG_X86_RESCTRL and the x86 /proc/cpuinfo +This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo flag bits: RDT (Resource Director Technology) Allocation - "rdt_a" CAT (Cache Allocation Technology) - "cat_l3", "cat_l2" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 26387c7bf305..68261430fe6e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -446,12 +446,12 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. -config X86_RESCTRL - bool "Resource Control support" +config X86_CPU_RESCTRL + bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) select KERNFS help - Enable Resource Control support. + Enable x86 CPU resource control support. Provide support for the allocation and monitoring of system resources usage by the CPU. diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl_sched.h index 40ebddde6ac2..f6b7fe2833cc 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl_sched.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_RESCTRL_SCHED_H #define _ASM_X86_RESCTRL_SCHED_H -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL #include #include @@ -88,6 +88,6 @@ static inline void resctrl_sched_in(void) static inline void resctrl_sched_in(void) {} -#endif /* CONFIG_X86_RESCTRL */ +#endif /* CONFIG_X86_CPU_RESCTRL */ #endif /* _ASM_X86_RESCTRL_SCHED_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b6fa0869f7aa..cfd24f9f7614 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_RESCTRL) += resctrl/ +obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile index 1cabe6fd8e11..4a06c37b9cf1 100644 --- a/arch/x86/kernel/cpu/resctrl/Makefile +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_X86_RESCTRL) += core.o rdtgroup.o monitor.o -obj-$(CONFIG_X86_RESCTRL) += ctrlmondata.o pseudo_lock.o +obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o pseudo_lock.o CFLAGS_pseudo_lock.o = -I$(src) diff --git a/include/linux/sched.h b/include/linux/sched.h index 224666226e87..8c328b14c424 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -995,7 +995,7 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif -#ifdef CONFIG_X86_RESCTRL +#ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif -- cgit v1.2.3 From dda7a817f2873a0e0b1c7fde1265758f3623daa4 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:49 +0200 Subject: net/mlx5: Add XRC transport to ODP device capabilities layout The device capabilities for ODP structure was missing the field for XRC transport so add it here. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 35fe5217b244..5407db8ba8e1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -831,7 +831,9 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; - u8 reserved_at_e0[0x720]; + struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_calc_op { -- cgit v1.2.3 From 46861e3e88be18846971792b763eaf520a91a802 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 22 Jan 2019 08:48:51 +0200 Subject: net/mlx5: Set ODP SRQ support in firmware To avoid compatibility issue with older kernels the firmware doesn't allow SRQ to work with ODP unless kernel asks for it. Signed-off-by: Moni Shoua Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 53 ++++++++++++++++++++++++++ include/linux/mlx5/device.h | 3 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 3 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 085e1133b8d5..e38aa206ab6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_ctx; + void *set_hca_cap; + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int err; + + if (!MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + /** + * If all bits are cleared we shouldn't try to set it + * or we might fail while trying to access a reserved bit. + */ + if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) || + MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))) + return 0; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + + /* set ODP SRQ support for RC/UD and XRC transports */ + MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive))); + + MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive, + (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive))); + + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -931,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto reclaim_boot_pages; + } + err = mlx5_satisfy_startup_pages(dev, 0); if (err) { dev_err(&pdev->dev, "failed to allocate init pages\n"); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8c4a820bd4c1..0845a227a7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) +#define MLX5_CAP_ODP_MAX(mdev, cap)\ + MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap) + #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5407db8ba8e1..c5c679390fbd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -72,6 +72,7 @@ enum { enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, + MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, }; -- cgit v1.2.3 From 960587285a56ec3cafb4d1e6b25c19eced4d0bce Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:16:59 +0100 Subject: netfilter: nat: remove module dependency on ipv6 core nf_nat_ipv6 calls two ipv6 core functions, so add those to v6ops to avoid the module dependency. This is a prerequisite for merging ipv4 and ipv6 nat implementations. Add wrappers to avoid the indirection if ipv6 is builtin. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 6 ++++++ net/ipv6/netfilter.c | 4 ++++ net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 17 ++++++++++++++++- net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 21 +++++++++++++++++++-- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..ad4223c10488 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -33,6 +33,12 @@ struct nf_ipv6_ops { int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); +#if IS_MODULE(CONFIG_IPV6) + int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*dev_get_saddr)(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr); +#endif }; #ifdef CONFIG_NETFILTER diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8b075f0bc351..0a5caf263889 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -112,6 +112,10 @@ static const struct nf_ipv6_ops ipv6ops = { .fragment = ip6_fragment, .route = nf_ip6_route, .reroute = nf_ip6_reroute, +#if IS_MODULE(CONFIG_IPV6) + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, +#endif }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 9c914db44bec..b52026adb3e7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, return ret; } +static int nat_route_me_harder(struct net *net, struct sk_buff *skb) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->route_me_harder(net, skb); +#else + return ip6_route_me_harder(net, skb); +#endif +} + static unsigned int nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -333,7 +348,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = nat_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 0ad0da5a2600..fd313b726263 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -24,6 +24,23 @@ static atomic_t v6_worker_count; +static int +nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr) +{ +#ifdef CONFIG_IPV6_MODULE + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return -EHOSTUNREACH; + + return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#else + return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); +#endif +} + unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); -- cgit v1.2.3 From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:17:00 +0100 Subject: netfilter: ipv6: avoid indirect calls for IPV6=y case indirect calls are only needed if ipv6 is a module. Add helpers to abstract the v6ops indirections and use them instead. fragment, reroute and route_input are kept as indirect calls. The first two are not not used in hot path and route_input is only used by bridge netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 64 +++++++++++++++++++++++++++++++-------- net/ipv6/netfilter.c | 15 ++++----- net/ipv6/netfilter/nft_fib_ipv6.c | 9 ++---- net/netfilter/utils.c | 6 ++-- net/netfilter/xt_addrtype.c | 16 +++------- 5 files changed, 68 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index ad4223c10488..471e9467105b 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,29 +25,24 @@ struct nf_queue_entry; * if IPv6 is a module. */ struct nf_ipv6_ops { +#if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) int (*route_me_harder)(struct net *net, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); + int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict); #endif + void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); + int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); -__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +#include extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) @@ -55,6 +50,49 @@ static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) return rcu_dereference(nf_ipv6_ops); } +static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->chk_addr(net, addr, dev, strict); +#else + return ipv6_chk_addr(net, addr, dev, strict); +#endif +} + +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict); + +static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + if (v6ops) + return v6ops->route(net, dst, fl, strict); + + return -EHOSTUNREACH; +#endif +#if IS_BUILTIN(CONFIG_IPV6) + return __nf_ip6_route(net, dst, fl, strict); +#else + return -EHOSTUNREACH; +#endif +} + +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); + +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0a5caf263889..a8263031f3a6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .route = nf_ip6_route, - .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) + .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, #endif + .route_input = ip6_route_input, + .fragment = ip6_fragment, + .reroute = nf_ip6_reroute, }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 55af9f247993..06dc55590441 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops; + const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { @@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->route(net, dst, fl, strict); + ret = nf_ip6_route(net, dst, fl, strict); break; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 89e281b3bfc2..29987ff03621 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - v6ops = nf_get_ipv6_ops(); - if (v6ops) { - if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - if (v6ops->chk_addr(net, addr, dev, true)) - ret = XT_ADDRTYPE_LOCAL; - } - route_err = v6ops->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); - } else { - route_err = 1; + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + if (nf_ipv6_chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; } + route_err = nf_ip6_route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; -- cgit v1.2.3 From 1878f0dcbff0cd07f62602deb160a44d69a8f146 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Wed, 6 Feb 2019 07:36:40 +0100 Subject: net: phy: provide full set of accessor functions to MMD registers This adds full set of locked and unlocked accessor functions to read and write PHY MMD registers and/or bitfields. Set of functions exactly matches what is already available for PHY legacy registers. Signed-off-by: Nikita Yushchenko Signed-off-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 116 ++++++++++++++++++++++++++++++++++----- include/linux/phy.h | 134 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 214 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 909b3344babf..7d6aad287f84 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -414,15 +414,15 @@ static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad, } /** - * phy_read_mmd - Convenience function for reading a register + * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. * @phydev: The phy_device struct * @devad: The MMD to read from (0..31) * @regnum: The register on the MMD to read (0..65535) * - * Same rules as for phy_read(); + * Same rules as for __phy_read(); */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) { int val; @@ -434,33 +434,52 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) } else if (phydev->is_c45) { u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - val = mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); + val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr); } else { struct mii_bus *bus = phydev->mdio.bus; int phy_addr = phydev->mdio.addr; - mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, phy_addr, devad, regnum); /* Read the content of the MMD's selected register */ val = __mdiobus_read(bus, phy_addr, MII_MMD_DATA); - mutex_unlock(&bus->mdio_lock); } return val; } +EXPORT_SYMBOL(__phy_read_mmd); + +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_read_mmd(phydev, devad, regnum); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} EXPORT_SYMBOL(phy_read_mmd); /** - * phy_write_mmd - Convenience function for writing a register + * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. * @phydev: The phy_device struct * @devad: The MMD to read from * @regnum: The register on the MMD to read * @val: value to write to @regnum * - * Same rules as for phy_write(); + * Same rules as for __phy_write(); */ -int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) { int ret; @@ -472,23 +491,43 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) } else if (phydev->is_c45) { u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff); - ret = mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, - addr, val); + ret = __mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, + addr, val); } else { struct mii_bus *bus = phydev->mdio.bus; int phy_addr = phydev->mdio.addr; - mutex_lock(&bus->mdio_lock); mmd_phy_indirect(bus, phy_addr, devad, regnum); /* Write the data into MMD's selected register */ __mdiobus_write(bus, phy_addr, MII_MMD_DATA, val); - mutex_unlock(&bus->mdio_lock); ret = 0; } return ret; } +EXPORT_SYMBOL(__phy_write_mmd); + +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_write_mmd(phydev, devad, regnum, val); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} EXPORT_SYMBOL(phy_write_mmd); /** @@ -538,6 +577,57 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) } EXPORT_SYMBOL_GPL(phy_modify); +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * Unlocked helper function which allows a MMD register to be modified as + * new register value = (old register value & ~mask) | set + */ +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + ret = __phy_read_mmd(phydev, devad, regnum); + if (ret < 0) + return ret; + + ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd); + +/** + * phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd); + static int __phy_read_page(struct phy_device *phydev) { return phydev->drv->read_page(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index 70f83d0d7469..237dd035858a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -692,17 +692,6 @@ static inline bool phy_is_started(struct phy_device *phydev) void phy_resolve_aneg_linkmode(struct phy_device *phydev); -/** - * phy_read_mmd - Convenience function for reading a register - * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); - */ -int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); - /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -758,9 +747,60 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val) val); } +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * __phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for __phy_read(); + */ +int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); + +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + +/** + * __phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to write to + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for __phy_write(); + */ +int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); + int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); + /** * __phy_set_bits - Convenience function for setting bits in a PHY register * @phydev: the phy_device struct @@ -810,6 +850,66 @@ static inline int phy_clear_bits(struct phy_device *phydev, u32 regnum, u16 val) return phy_modify(phydev, regnum, val, 0); } +/** + * __phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * __phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + * + * The caller must have taken the MDIO bus lock. + */ +static inline int __phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return __phy_modify_mmd(phydev, devad, regnum, val, 0); +} + +/** + * phy_set_bits_mmd - Convenience function for setting bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to set + */ +static inline int phy_set_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, 0, val); +} + +/** + * phy_clear_bits_mmd - Convenience function for clearing bits in a register + * on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @val: bits to clear + */ +static inline int phy_clear_bits_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + return phy_modify_mmd(phydev, devad, regnum, val, 0); +} + /** * phy_interrupt_is_valid - Convenience function for testing a given PHY irq * @phydev: the phy_device struct @@ -886,18 +986,6 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) return phydev->is_pseudo_fixed_link; } -/** - * phy_write_mmd - Convenience function for writing a register - * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); - */ -int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); - int phy_save_page(struct phy_device *phydev); int phy_select_page(struct phy_device *phydev, int page); int phy_restore_page(struct phy_device *phydev, int oldpage, int ret); -- cgit v1.2.3 From eca4205f9ec3bea2d5aad0493c19f5d2675a20fc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Feb 2019 12:50:51 +0100 Subject: ethtool: add ethtool_rx_flow_spec to flow_rule structure translator This patch adds a function to translate the ethtool_rx_flow_spec structure to the flow_rule representation. This allows us to reuse code from the driver side given that both flower and ethtool_rx_flow interfaces use the same representation. This patch also includes support for the flow type flags FLOW_EXT, FLOW_MAC_EXT and FLOW_RSS. The ethtool_rx_flow_spec_input wrapper structure is used to convey the rss_context field, that is away from the ethtool_rx_flow_spec structure, and the ethtool_rx_flow_spec structure. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/ethtool.h | 15 +++ net/core/ethtool.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index afd9596ce636..19a8de5326fb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -400,4 +400,19 @@ struct ethtool_ops { void (*get_ethtool_phy_stats)(struct net_device *, struct ethtool_stats *, u64 *); }; + +struct ethtool_rx_flow_rule { + struct flow_rule *rule; + unsigned long priv[0]; +}; + +struct ethtool_rx_flow_spec_input { + const struct ethtool_rx_flow_spec *fs; + u32 rss_ctx; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input); +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule); + #endif /* _LINUX_ETHTOOL_H */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 45c0a6e3d6ad..0fbf39239b29 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Some useful ethtool_ops methods that're device independent. @@ -2820,3 +2821,243 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return rc; } + +struct ethtool_rx_flow_key { + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_eth_addrs eth_addrs; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct ethtool_rx_flow_match { + struct flow_dissector dissector; + struct ethtool_rx_flow_key key; + struct ethtool_rx_flow_key mask; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) +{ + const struct ethtool_rx_flow_spec *fs = input->fs; + static struct in6_addr zero_addr = {}; + struct ethtool_rx_flow_match *match; + struct ethtool_rx_flow_rule *flow; + struct flow_action_entry *act; + + flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + + sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + /* ethtool_rx supports only one single action per rule. */ + flow->rule = flow_rule_alloc(1); + if (!flow->rule) { + kfree(flow); + return ERR_PTR(-ENOMEM); + } + + match = (struct ethtool_rx_flow_match *)flow->priv; + flow->rule->match.dissector = &match->dissector; + flow->rule->match.mask = &match->mask; + flow->rule->match.key = &match->key; + + match->mask.basic.n_proto = htons(0xffff); + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: { + const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IP); + + v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; + + if (v4_m_spec->ip4src) { + match->key.ipv4.src = v4_spec->ip4src; + match->mask.ipv4.src = v4_m_spec->ip4src; + } + if (v4_m_spec->ip4dst) { + match->key.ipv4.dst = v4_spec->ip4dst; + match->mask.ipv4.dst = v4_m_spec->ip4dst; + } + if (v4_m_spec->ip4src || + v4_m_spec->ip4dst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv4); + } + if (v4_m_spec->psrc) { + match->key.tp.src = v4_spec->psrc; + match->mask.tp.src = v4_m_spec->psrc; + } + if (v4_m_spec->pdst) { + match->key.tp.dst = v4_spec->pdst; + match->mask.tp.dst = v4_m_spec->pdst; + } + if (v4_m_spec->psrc || + v4_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v4_m_spec->tos) { + match->key.ip.tos = v4_spec->tos; + match->mask.ip.tos = v4_m_spec->tos; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: { + const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IPV6); + + v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.src, v6_spec->ip6src, + sizeof(match->key.ipv6.src)); + memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, + sizeof(match->mask.ipv6.src)); + } + if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, + sizeof(match->key.ipv6.dst)); + memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, + sizeof(match->mask.ipv6.dst)); + } + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || + memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv6); + } + if (v6_m_spec->psrc) { + match->key.tp.src = v6_spec->psrc; + match->mask.tp.src = v6_m_spec->psrc; + } + if (v6_m_spec->pdst) { + match->key.tp.dst = v6_spec->pdst; + match->mask.tp.dst = v6_m_spec->pdst; + } + if (v6_m_spec->psrc || + v6_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v6_m_spec->tclass) { + match->key.ip.tos = v6_spec->tclass; + match->mask.ip.tos = v6_m_spec->tclass; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + default: + ethtool_rx_flow_rule_destroy(flow); + return ERR_PTR(-EINVAL); + } + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_TCP; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_UDP; + break; + } + match->mask.basic.ip_proto = 0xff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); + match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = + offsetof(struct ethtool_rx_flow_key, basic); + + if (fs->flow_type & FLOW_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->vlan_etype && + ext_m_spec->vlan_tci) { + match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; + match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; + + match->key.vlan.vlan_id = + ntohs(ext_h_spec->vlan_tci) & 0x0fff; + match->mask.vlan.vlan_id = + ntohs(ext_m_spec->vlan_tci) & 0x0fff; + + match->key.vlan.vlan_priority = + (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; + match->mask.vlan.vlan_priority = + (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_VLAN); + match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = + offsetof(struct ethtool_rx_flow_key, vlan); + } + } + if (fs->flow_type & FLOW_MAC_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->h_dest) { + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); + } + } + + act = &flow->rule->action.entries[0]; + switch (fs->ring_cookie) { + case RX_CLS_FLOW_DISC: + act->id = FLOW_ACTION_DROP; + break; + case RX_CLS_FLOW_WAKE: + act->id = FLOW_ACTION_WAKE; + break; + default: + act->id = FLOW_ACTION_QUEUE; + if (fs->flow_type & FLOW_RSS) + act->queue.ctx = input->rss_ctx; + + act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); + break; + } + + return flow; +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_create); + +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) +{ + kfree(flow->rule); + kfree(flow); +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); -- cgit v1.2.3 From d6abc5969463359c366d459247b90366fcd6f5c5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Feb 2019 09:45:35 -0800 Subject: net: Introduce ndo_get_port_parent_id() In preparation for getting rid of switchdev_ops, create a dedicated NDO operation for getting the port's parent identifier. There are essentially two classes of drivers that need to implement getting the port's parent ID which are VF/PF drivers with a built-in switch, and pure switchdev drivers such as mlxsw, ocelot, dsa etc. We introduce a helper function: dev_get_port_parent_id() which supports recursion into the lower devices to obtain the first port's parent ID. Convert the bridge, core and ipv4 multicast routing code to check for such ndo_get_port_parent_id() and call the helper function when valid before falling back to switchdev_port_attr_get(). This will allow us to convert all relevant drivers in one go instead of having to implement both switchdev_port_attr_get() and ndo_get_port_parent_id() operations, then get rid of switchdev_port_attr_get(). Acked-by: Jiri Pirko Signed-off-by: Florian Fainelli Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++ net/bridge/br_switchdev.c | 9 ++++++-- net/core/dev.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ net/core/net-sysfs.c | 7 +++++- net/core/rtnetlink.c | 6 ++++- net/ipv4/ipmr.c | 8 ++++++- 6 files changed, 91 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba57d0ba425e..1d95e634f3fe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1188,6 +1188,10 @@ struct dev_ifalias { * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * + * int (*ndo_get_port_parent_id)(struct net_device *dev, + * struct netdev_phys_item_id *ppid) + * Called to get the parent ID of the physical port of this device. + * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@ -1412,6 +1416,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_port_parent_id)(struct net_device *dev, + struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); void (*ndo_udp_tunnel_add)(struct net_device *dev, @@ -3651,6 +3657,9 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4d2b9eb7604a..06b0ae44585f 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -14,7 +14,8 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (switchdev_port_same_parent_id(dev, p->dev)) + if (netdev_port_same_parent_id(dev, p->dev) || + switchdev_port_same_parent_id(dev, p->dev)) return p->offload_fwd_mark; } @@ -23,6 +24,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) int nbp_switchdev_mark_set(struct net_bridge_port *p) { + const struct net_device_ops *ops = p->dev->netdev_ops; struct switchdev_attr attr = { .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -31,7 +33,10 @@ int nbp_switchdev_mark_set(struct net_bridge_port *p) ASSERT_RTNL(); - err = switchdev_port_attr_get(p->dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true); + else + err = switchdev_port_attr_get(p->dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/core/dev.c b/net/core/dev.c index bfa4be42afff..8c6d5cf8a308 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7877,6 +7877,63 @@ int dev_get_phys_port_name(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_name); +/** + * dev_get_port_parent_id - Get the device's port parent identifier + * @dev: network device + * @ppid: pointer to a storage for the port's parent identifier + * @recurse: allow/disallow recursion to lower devices + * + * Get the devices's port parent identifier + */ +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, + bool recurse) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct netdev_phys_item_id first = { }; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops->ndo_get_port_parent_id) + return ops->ndo_get_port_parent_id(dev, ppid); + + if (!recurse) + return err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = dev_get_port_parent_id(lower_dev, ppid, recurse); + if (err) + break; + if (!first.id_len) + first = *ppid; + else if (memcmp(&first, ppid, sizeof(*ppid))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL(dev_get_port_parent_id); + +/** + * netdev_port_same_parent_id - Indicate if two network devices have + * the same port parent identifier + * @a: first network device + * @b: second network device + */ +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) +{ + struct netdev_phys_item_id a_id = { }; + struct netdev_phys_item_id b_id = { }; + + if (dev_get_port_parent_id(a, &a_id, true) || + dev_get_port_parent_id(b, &b_id, true)) + return false; + + return netdev_phys_item_id_same(&a_id, &b_id); +} +EXPORT_SYMBOL(netdev_port_same_parent_id); + /** * dev_change_proto_down - update protocol port state information * @dev: device diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index ff9fd2bb4ce4..4eace9f1dcf9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -495,6 +495,7 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + const struct net_device_ops *ops = netdev->netdev_ops; ssize_t ret = -EINVAL; if (!rtnl_trylock()) @@ -507,7 +508,11 @@ static ssize_t phys_switch_id_show(struct device *dev, .flags = SWITCHDEV_F_NO_RECURSE, }; - ret = switchdev_port_attr_get(netdev, &attr); + if (ops->ndo_get_port_parent_id) + ret = dev_get_port_parent_id(netdev, &attr.u.ppid, + false); + else + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, attr.u.ppid.id); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5a98082ac7a..90dd02c1f561 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1146,6 +1146,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int err; struct switchdev_attr attr = { .orig_dev = dev, @@ -1153,7 +1154,10 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) .flags = SWITCHDEV_F_NO_RECURSE, }; - err = switchdev_port_attr_get(dev, &attr); + if (ops->ndo_get_port_parent_id) + err = dev_get_port_parent_id(dev, &attr.u.ppid, false); + else + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fb99002c3d4e..c71bcc42d66d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -837,6 +837,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { + const struct net_device_ops *ops; int vifi = vifc->vifc_vifi; struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, @@ -920,7 +921,12 @@ static int vif_add(struct net *net, struct mr_table *mrt, (VIFF_TUNNEL | VIFF_REGISTER)); attr.orig_dev = dev; - if (!switchdev_port_attr_get(dev, &attr)) { + ops = dev->netdev_ops; + if (ops->ndo_get_port_parent_id && + !dev_get_port_parent_id(dev, &attr.u.ppid, true)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else if (!switchdev_port_attr_get(dev, &attr)) { memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); v->dev_parent_id.id_len = attr.u.ppid.id_len; } else { -- cgit v1.2.3 From 0803de78049fe1b0baf44bcddc727b036fb9139b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Feb 2019 11:55:39 +0100 Subject: blktrace: Show requests without sector Currently, blktrace will not show requests that don't have any data as rq->__sector is initialized to -1 which is out of device range and thus discarded by act_log_check(). This is most notably the case for cache flush requests sent to the device. Fix the problem by making blk_rq_trace_sector() return 0 for requests without initialized sector. Reviewed-by: Johannes Thumshirn Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) -- cgit v1.2.3 From 71bd106d2567675668e253cba3960e3c4bf2e80e Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 7 Feb 2019 09:52:10 -0800 Subject: net: fixed-phy: Add fixed_phy_register_with_gpiod() API Add fixed_phy_register_with_gpiod() API. It lets users create a fixed_phy instance that uses a GPIO descriptor which was obtained externally e.g. through platform data. This enables platform devices (non-DT based) to use GPIOs for link status. Signed-off-by: Moritz Fischer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 32 +++++++++++++++++++++++++------- include/linux/phy_fixed.h | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index d810f914aaa4..b0d1368c3400 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -229,12 +229,12 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) } #endif -struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np) +static struct phy_device *__fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np, + struct gpio_desc *gpiod) { struct fixed_mdio_bus *fmb = &platform_fmb; - struct gpio_desc *gpiod = NULL; struct phy_device *phy; int phy_addr; int ret; @@ -243,9 +243,11 @@ struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(-EPROBE_DEFER); /* Check if we have a GPIO associated with this fixed phy */ - gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) { + gpiod = fixed_phy_get_gpiod(np); + if (IS_ERR(gpiod)) + return ERR_CAST(gpiod); + } /* Get the next available PHY address, up to PHY_MAX_ADDR */ phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); @@ -308,8 +310,24 @@ struct phy_device *fixed_phy_register(unsigned int irq, return phy; } + +struct phy_device *fixed_phy_register(unsigned int irq, + struct fixed_phy_status *status, + struct device_node *np) +{ + return __fixed_phy_register(irq, status, np, NULL); +} EXPORT_SYMBOL_GPL(fixed_phy_register); +struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod) +{ + return __fixed_phy_register(irq, status, NULL, gpiod); +} +EXPORT_SYMBOL_GPL(fixed_phy_register_with_gpiod); + void fixed_phy_unregister(struct phy_device *phy) { phy_device_remove(phy); diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index c78fc203db43..1e5d86ebdaeb 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -19,6 +19,12 @@ extern int fixed_phy_add(unsigned int irq, int phy_id, extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); + +extern struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod); + extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, @@ -35,6 +41,15 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq, { return ERR_PTR(-ENODEV); } + +static inline struct phy_device * +fixed_phy_register_with_gpiod(unsigned int irq, + struct fixed_phy_status *status, + struct gpio_desc *gpiod) +{ + return ERR_PTR(-ENODEV); +} + static inline void fixed_phy_unregister(struct phy_device *phydev) { } -- cgit v1.2.3 From 998a8a8387ff5f65da456d1fc448dbb926fb5d78 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 7 Feb 2019 21:41:46 +0100 Subject: net: phy: let genphy_c45_read_link manage the devices to check Let genphy_c45_read_link manage the devices to check, this removes overhead from callers. Add C22EXT to the list of excluded devices because it doesn't implement the status register. According to the 802.3 clause 45 spec registers 29.0 - 29.4 are reserved. At the moment we have very few clause 45 PHY drivers, so we are lacking experience whether other drivers will have to exclude further devices, or may need to check PHY XS. If we should figure out that list of devices to check needs to be configurable, I think best will be to add a device list member to struct phy_driver. v2: - adjusted commit message - exclude also device C22EXT from link checking Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 10 +--------- drivers/net/phy/phy-c45.c | 18 ++++++++++-------- include/linux/phy.h | 2 +- include/uapi/linux/mdio.h | 2 ++ 4 files changed, 14 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 296a537cdfcb..96a79c6c7810 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -428,16 +428,8 @@ static int mv3310_read_10gbr_status(struct phy_device *phydev) static int mv3310_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; int val; - /* The vendor devads do not report link status. Avoid the PHYXS - * instance as there are three, and its status depends on the MAC - * being appropriately configured for the negotiated speed. - */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2) | - BIT(MDIO_MMD_PHYXS)); - phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; linkmode_zero(phydev->lp_advertising); @@ -453,7 +445,7 @@ static int mv3310_read_status(struct phy_device *phydev) if (val & MDIO_STAT1_LSTATUS) return mv3310_read_10gbr_status(phydev); - val = genphy_c45_read_link(phydev, mmd_mask); + val = genphy_c45_read_link(phydev); if (val < 0) return val; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index c92d0fb7ec4f..6adfe1f6319e 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -118,17 +118,24 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done); /** * genphy_c45_read_link - read the overall link status from the MMDs * @phydev: target phy_device struct - * @mmd_mask: MMDs to read status from * * Read the link status from the specified MMDs, and if they all indicate * that the link is up, set phydev->link to 1. If an error is encountered, * a negative errno will be returned, otherwise zero. */ -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask) +int genphy_c45_read_link(struct phy_device *phydev) { + u32 mmd_mask = phydev->c45_ids.devices_in_package; int val, devad; bool link = true; + /* The vendor devads and C22EXT do not report link status. Avoid the + * PHYXS instance as its status may depend on the MAC being + * appropriately configured for the negotiated speed. + */ + mmd_mask &= ~(MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2 | MDIO_DEVS_C22EXT | + MDIO_DEVS_PHYXS); + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); @@ -266,16 +273,11 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg); int gen10g_read_status(struct phy_device *phydev) { - u32 mmd_mask = phydev->c45_ids.devices_in_package; - /* For now just lie and say it's 10G all the time */ phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; - /* Avoid reading the vendor MMDs */ - mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2)); - - return genphy_c45_read_link(phydev, mmd_mask); + return genphy_c45_read_link(phydev); } EXPORT_SYMBOL_GPL(gen10g_read_status); diff --git a/include/linux/phy.h b/include/linux/phy.h index 237dd035858a..f41bf651f6a0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1094,7 +1094,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); int genphy_c45_aneg_done(struct phy_device *phydev); -int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask); +int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index d435b00d64ad..2e6e309f0847 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -123,6 +123,8 @@ #define MDIO_DEVS_TC MDIO_DEVS_PRESENT(MDIO_MMD_TC) #define MDIO_DEVS_AN MDIO_DEVS_PRESENT(MDIO_MMD_AN) #define MDIO_DEVS_C22EXT MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT) +#define MDIO_DEVS_VEND1 MDIO_DEVS_PRESENT(MDIO_MMD_VEND1) +#define MDIO_DEVS_VEND2 MDIO_DEVS_PRESENT(MDIO_MMD_VEND2) /* Control register 2. */ #define MDIO_PMA_CTRL2_TYPE 0x000f /* PMA/PMD type selection */ -- cgit v1.2.3 From dcf6e2e38a1c7ccbc535de5e1d9b14998847499d Mon Sep 17 00:00:00 2001 From: Zachary Hays Date: Thu, 7 Feb 2019 10:03:08 -0500 Subject: mmc: block: handle complete_work on separate workqueue The kblockd workqueue is created with the WQ_MEM_RECLAIM flag set. This generates a rescuer thread for that queue that will trigger when the CPU is under heavy load and collect the uncompleted work. In the case of mmc, this creates the possibility of a deadlock when there are multiple partitions on the device as other blk-mq work is also run on the same queue. For example: - worker 0 claims the mmc host to work on partition 1 - worker 1 attempts to claim the host for partition 2 but has to wait for worker 0 to finish - worker 0 schedules complete_work to release the host - rescuer thread is triggered after time-out and collects the dangling work - rescuer thread attempts to complete the work in order starting with claim host - the task to release host is now blocked by a task to claim it and will never be called The above results in multiple hung tasks that lead to failures to mount partitions. Handling complete_work on a separate workqueue avoids this by keeping the work completion tasks separate from the other blk-mq work. This allows the host to be released without getting blocked by other tasks attempting to claim the host. Signed-off-by: Zachary Hays Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 10 +++++++++- include/linux/mmc/card.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index aef1185f383d..14f3fdb8c6bb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2112,7 +2112,7 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) if (waiting) wake_up(&mq->wait); else - kblockd_schedule_work(&mq->complete_work); + queue_work(mq->card->complete_wq, &mq->complete_work); return; } @@ -2924,6 +2924,13 @@ static int mmc_blk_probe(struct mmc_card *card) mmc_fixup_device(card, mmc_blk_fixups); + card->complete_wq = alloc_workqueue("mmc_complete", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (unlikely(!card->complete_wq)) { + pr_err("Failed to create mmc completion workqueue"); + return -ENOMEM; + } + md = mmc_blk_alloc(card); if (IS_ERR(md)) return PTR_ERR(md); @@ -2987,6 +2994,7 @@ static void mmc_blk_remove(struct mmc_card *card) pm_runtime_put_noidle(&card->dev); mmc_blk_remove_req(md); dev_set_drvdata(&card->dev, NULL); + destroy_workqueue(card->complete_wq); } static int _mmc_blk_suspend(struct mmc_card *card) diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index de7377815b6b..8ef330027b13 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -308,6 +308,7 @@ struct mmc_card { unsigned int nr_parts; unsigned int bouncesz; /* Bounce buffer size */ + struct workqueue_struct *complete_wq; /* Private workqueue */ }; static inline bool mmc_large_sector(struct mmc_card *card) -- cgit v1.2.3 From 9069a3817d82b01b3a55da382c774e3575946130 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:46 +0000 Subject: lib: objagg: implement optimization hints assembly and use hints for object creation Implement simple greedy algo to find more optimized root-delta tree for a given objagg instance. This "hints" can be used by a driver to: 1) check if the hints are better (driver's choice) than the original objagg tree. Driver does comparison of objagg stats and hints stats. 2) use the hints to create a new objagg instance which will construct the root-delta tree according to the passed hints. Currently, only a simple greedy algorithm is implemented. Basically it finds the roots according to the maximal possible user count including deltas. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c | 37 +- include/linux/objagg.h | 20 +- lib/objagg.c | 573 ++++++++++++++++++++- lib/test_objagg.c | 194 ++++++- 4 files changed, 802 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 2941967e1cc5..302070a74f2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1200,6 +1200,32 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, return 0; } +static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, + const void *obj) +{ + const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + const struct mlxsw_sp_acl_erp_key *key = obj; + u16 delta_start; + u8 delta_mask; + int err; + + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + return err ? false : true; +} + +static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) +{ + const struct mlxsw_sp_acl_erp_key *key1 = obj1; + const struct mlxsw_sp_acl_erp_key *key2 = obj2; + + /* For hints purposes, two objects are considered equal + * in case the masks are the same. Does not matter what + * the "ctcam" value is. + */ + return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); +} + static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, void *obj) { @@ -1254,12 +1280,17 @@ static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj) +static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj, + unsigned int root_id) { struct mlxsw_sp_acl_atcam_region *aregion = priv; struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; struct mlxsw_sp_acl_erp_key *key = obj; + if (!key->ctcam && + root_id != OBJAGG_OBJ_ROOT_ID_INVALID && + root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return ERR_PTR(-ENOBUFS); return erp_table->ops->erp_create(erp_table, key); } @@ -1273,6 +1304,8 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), + .delta_check = mlxsw_sp_acl_erp_delta_check, + .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, .delta_create = mlxsw_sp_acl_erp_delta_create, .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, .root_create = mlxsw_sp_acl_erp_root_create, @@ -1290,7 +1323,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) return ERR_PTR(-ENOMEM); erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, - aregion); + NULL, aregion); if (IS_ERR(erp_table->objagg)) { err = PTR_ERR(erp_table->objagg); goto err_objagg_create; diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 34f38c186ea0..a675286df1af 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -6,14 +6,19 @@ struct objagg_ops { size_t obj_size; + bool (*delta_check)(void *priv, const void *parent_obj, + const void *obj); + int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); - void * (*root_create)(void *priv, void *obj); + void * (*root_create)(void *priv, void *obj, unsigned int root_id); +#define OBJAGG_OBJ_ROOT_ID_INVALID UINT_MAX void (*root_destroy)(void *priv, void *root_priv); }; struct objagg; struct objagg_obj; +struct objagg_hints; const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj); const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj); @@ -21,7 +26,8 @@ const void *objagg_obj_raw(const struct objagg_obj *objagg_obj); struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj); void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj); -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv); +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *hints, void *priv); void objagg_destroy(struct objagg *objagg); struct objagg_obj_stats { @@ -43,4 +49,14 @@ struct objagg_stats { const struct objagg_stats *objagg_stats_get(struct objagg *objagg); void objagg_stats_put(const struct objagg_stats *objagg_stats); +enum objagg_opt_algo_type { + OBJAGG_OPT_ALGO_SIMPLE_GREEDY, +}; + +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type); +void objagg_hints_put(struct objagg_hints *objagg_hints); +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints); + #endif diff --git a/lib/objagg.c b/lib/objagg.c index dae390bcef1a..befe8a47d080 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,34 @@ #define CREATE_TRACE_POINTS #include +struct objagg_hints { + struct rhashtable node_ht; + struct rhashtable_params ht_params; + struct list_head node_list; + unsigned int node_count; + unsigned int root_count; + unsigned int refcount; + const struct objagg_ops *ops; +}; + +struct objagg_hints_node { + struct rhash_head ht_node; /* member of objagg_hints->node_ht */ + struct list_head list; /* member of objagg_hints->node_list */ + struct objagg_hints_node *parent; + unsigned int root_id; + struct objagg_obj_stats_info stats_info; + unsigned long obj[0]; +}; + +static struct objagg_hints_node * +objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj) +{ + if (!objagg_hints) + return NULL; + return rhashtable_lookup_fast(&objagg_hints->node_ht, obj, + objagg_hints->ht_params); +} + struct objagg { const struct objagg_ops *ops; void *priv; @@ -18,6 +47,8 @@ struct objagg { struct rhashtable_params ht_params; struct list_head obj_list; unsigned int obj_count; + struct ida root_ida; + struct objagg_hints *hints; }; struct objagg_obj { @@ -30,6 +61,7 @@ struct objagg_obj { void *delta_priv; /* user delta private */ void *root_priv; /* user root private */ }; + unsigned int root_id; unsigned int refcount; /* counts number of users of this object * including nested objects */ @@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj) static int objagg_obj_parent_assign(struct objagg *objagg, struct objagg_obj *objagg_obj, - struct objagg_obj *parent) + struct objagg_obj *parent, + bool take_parent_ref) { void *delta_priv; @@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg, */ objagg_obj->parent = parent; objagg_obj->delta_priv = delta_priv; - objagg_obj_ref_inc(objagg_obj->parent); + if (take_parent_ref) + objagg_obj_ref_inc(objagg_obj->parent); trace_objagg_obj_parent_assign(objagg, objagg_obj, parent, parent->refcount); @@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg, if (!objagg_obj_is_root(objagg_obj_cur)) continue; err = objagg_obj_parent_assign(objagg, objagg_obj, - objagg_obj_cur); + objagg_obj_cur, true); if (!err) return 0; } @@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg, __objagg_obj_put(objagg, objagg_obj->parent); } +static int objagg_obj_root_id_alloc(struct objagg *objagg, + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) +{ + unsigned int min, max; + int root_id; + + /* In case there are no hints available, the root id is invalid. */ + if (!objagg->hints) { + objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID; + return 0; + } + + if (hnode) { + min = hnode->root_id; + max = hnode->root_id; + } else { + /* For objects with no hint, start after the last + * hinted root_id. + */ + min = objagg->hints->root_count; + max = ~0; + } + + root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL); + + if (root_id < 0) + return root_id; + objagg_obj->root_id = root_id; + return 0; +} + +static void objagg_obj_root_id_free(struct objagg *objagg, + struct objagg_obj *objagg_obj) +{ + if (!objagg->hints) + return; + ida_free(&objagg->root_ida, objagg_obj->root_id); +} + static int objagg_obj_root_create(struct objagg *objagg, - struct objagg_obj *objagg_obj) + struct objagg_obj *objagg_obj, + struct objagg_hints_node *hnode) { - objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, - objagg_obj->obj); - if (IS_ERR(objagg_obj->root_priv)) - return PTR_ERR(objagg_obj->root_priv); + int err; + err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode); + if (err) + return err; + objagg_obj->root_priv = objagg->ops->root_create(objagg->priv, + objagg_obj->obj, + objagg_obj->root_id); + if (IS_ERR(objagg_obj->root_priv)) { + err = PTR_ERR(objagg_obj->root_priv); + goto err_root_create; + } trace_objagg_obj_root_create(objagg, objagg_obj); return 0; + +err_root_create: + objagg_obj_root_id_free(objagg, objagg_obj); + return err; } static void objagg_obj_root_destroy(struct objagg *objagg, @@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg, { trace_objagg_obj_root_destroy(objagg, objagg_obj); objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv); + objagg_obj_root_id_free(objagg, objagg_obj); +} + +static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj); + +static int objagg_obj_init_with_hints(struct objagg *objagg, + struct objagg_obj *objagg_obj, + bool *hint_found) +{ + struct objagg_hints_node *hnode; + struct objagg_obj *parent; + int err; + + hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj); + if (!hnode) { + *hint_found = false; + return 0; + } + *hint_found = true; + + if (!hnode->parent) + return objagg_obj_root_create(objagg, objagg_obj, hnode); + + parent = __objagg_obj_get(objagg, hnode->parent->obj); + if (IS_ERR(parent)) + return PTR_ERR(parent); + + err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false); + if (err) { + *hint_found = false; + err = 0; + goto err_parent_assign; + } + + return 0; + +err_parent_assign: + objagg_obj_put(objagg, parent); + return err; } static int objagg_obj_init(struct objagg *objagg, struct objagg_obj *objagg_obj) { + bool hint_found; int err; + /* First, try to use hints if they are available and + * if they provide result. + */ + err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found); + if (err) + return err; + + if (hint_found) + return 0; + /* Try to find if the object can be aggregated under an existing one. */ err = objagg_obj_parent_lookup_assign(objagg, objagg_obj); if (!err) return 0; /* If aggregation is not possible, make the object a root. */ - return objagg_obj_root_create(objagg, objagg_obj); + return objagg_obj_root_create(objagg, objagg_obj, NULL); } static void objagg_obj_fini(struct objagg *objagg, @@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put); /** * objagg_create - creates a new objagg instance - * @ops: user-specific callbacks - * @priv: pointer to a private data passed to the ops + * @ops: user-specific callbacks + * @objagg_hints: hints, can be NULL + * @priv: pointer to a private data passed to the ops * * Note: all locking must be provided by the caller. * @@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put); * Returns a pointer to newly created objagg instance in case of success, * otherwise it returns pointer error using ERR_PTR macro. */ -struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) +struct objagg *objagg_create(const struct objagg_ops *ops, + struct objagg_hints *objagg_hints, void *priv) { struct objagg *objagg; int err; if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy || - !ops->delta_create || !ops->delta_destroy)) + !ops->delta_check || !ops->delta_create || + !ops->delta_destroy)) return ERR_PTR(-EINVAL); + objagg = kzalloc(sizeof(*objagg), GFP_KERNEL); if (!objagg) return ERR_PTR(-ENOMEM); objagg->ops = ops; + if (objagg_hints) { + objagg->hints = objagg_hints; + objagg_hints->refcount++; + } objagg->priv = priv; INIT_LIST_HEAD(&objagg->obj_list); @@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv) if (err) goto err_rhashtable_init; + ida_init(&objagg->root_ida); + trace_objagg_create(objagg); return objagg; @@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create); void objagg_destroy(struct objagg *objagg) { trace_objagg_destroy(objagg); + ida_destroy(&objagg->root_ida); WARN_ON(!list_empty(&objagg->obj_list)); rhashtable_destroy(&objagg->obj_ht); + if (objagg->hints) + objagg_hints_put(objagg->hints); kfree(objagg); } EXPORT_SYMBOL(objagg_destroy); @@ -496,6 +645,404 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats) } EXPORT_SYMBOL(objagg_stats_put); +static struct objagg_hints_node * +objagg_hints_node_create(struct objagg_hints *objagg_hints, + struct objagg_obj *objagg_obj, size_t obj_size, + struct objagg_hints_node *parent_hnode) +{ + unsigned int user_count = objagg_obj->stats.user_count; + struct objagg_hints_node *hnode; + int err; + + hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL); + if (!hnode) + return ERR_PTR(-ENOMEM); + memcpy(hnode->obj, &objagg_obj->obj, obj_size); + hnode->stats_info.stats.user_count = user_count; + hnode->stats_info.stats.delta_user_count = user_count; + if (parent_hnode) { + parent_hnode->stats_info.stats.delta_user_count += user_count; + } else { + hnode->root_id = objagg_hints->root_count++; + hnode->stats_info.is_root = true; + } + hnode->stats_info.objagg_obj = objagg_obj; + + err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + if (err) + goto err_ht_insert; + + list_add(&hnode->list, &objagg_hints->node_list); + hnode->parent = parent_hnode; + objagg_hints->node_count++; + + return hnode; + +err_ht_insert: + kfree(hnode); + return ERR_PTR(err); +} + +static void objagg_hints_flush(struct objagg_hints *objagg_hints) +{ + struct objagg_hints_node *hnode, *tmp; + + list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) { + list_del(&hnode->list); + rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node, + objagg_hints->ht_params); + kfree(hnode); + } +} + +struct objagg_tmp_node { + struct objagg_obj *objagg_obj; + bool crossed_out; +}; + +struct objagg_tmp_graph { + struct objagg_tmp_node *nodes; + unsigned long nodes_count; + unsigned long *edges; +}; + +static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + return index * graph->nodes_count + parent_index; +} + +static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + __set_bit(edge_index, graph->edges); +} + +static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph, + int parent_index, int index) +{ + int edge_index = objagg_tmp_graph_edge_index(graph, index, + parent_index); + + return test_bit(edge_index, graph->edges); +} + +static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph, + unsigned int index) +{ + struct objagg_tmp_node *node = &graph->nodes[index]; + unsigned int weight = node->objagg_obj->stats.user_count; + int j; + + /* Node weight is sum of node users and all other nodes users + * that this node can represent with delta. + */ + + if (node->crossed_out) + return 0; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + weight += node->objagg_obj->stats.user_count; + } + return weight; +} + +static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph) +{ + unsigned int max_weight = 0; + unsigned int weight; + int max_index = -1; + int i; + + for (i = 0; i < graph->nodes_count; i++) { + weight = objagg_tmp_graph_node_weight(graph, i); + if (weight > max_weight) { + max_weight = weight; + max_index = i; + } + } + return max_index; +} + +static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg) +{ + unsigned int nodes_count = objagg->obj_count; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + struct objagg_tmp_node *pnode; + struct objagg_obj *objagg_obj; + size_t alloc_size; + int i, j; + + graph = kzalloc(sizeof(*graph), GFP_KERNEL); + if (!graph) + return NULL; + + graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL); + if (!graph->nodes) + goto err_nodes_alloc; + graph->nodes_count = nodes_count; + + alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) * + sizeof(unsigned long); + graph->edges = kzalloc(alloc_size, GFP_KERNEL); + if (!graph->edges) + goto err_edges_alloc; + + i = 0; + list_for_each_entry(objagg_obj, &objagg->obj_list, list) { + node = &graph->nodes[i++]; + node->objagg_obj = objagg_obj; + } + + /* Assemble a temporary graph. Insert edge X->Y in case Y can be + * in delta of X. + */ + for (i = 0; i < nodes_count; i++) { + for (j = 0; j < nodes_count; j++) { + if (i == j) + continue; + pnode = &graph->nodes[i]; + node = &graph->nodes[j]; + if (objagg->ops->delta_check(objagg->priv, + pnode->objagg_obj->obj, + node->objagg_obj->obj)) { + objagg_tmp_graph_edge_set(graph, i, j); + + } + } + } + return graph; + +err_edges_alloc: + kfree(graph->nodes); +err_nodes_alloc: + kfree(graph); + return NULL; +} + +static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph) +{ + kfree(graph->edges); + kfree(graph->nodes); + kfree(graph); +} + +static int +objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints, + struct objagg *objagg) +{ + struct objagg_hints_node *hnode, *parent_hnode; + struct objagg_tmp_graph *graph; + struct objagg_tmp_node *node; + int index; + int j; + int err; + + graph = objagg_tmp_graph_create(objagg); + if (!graph) + return -ENOMEM; + + /* Find the nodes from the ones that can accommodate most users + * and cross them out of the graph. Save them to the hint list. + */ + while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) { + node = &graph->nodes[index]; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + NULL); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + parent_hnode = hnode; + for (j = 0; j < graph->nodes_count; j++) { + if (!objagg_tmp_graph_is_edge(graph, index, j)) + continue; + node = &graph->nodes[j]; + if (node->crossed_out) + continue; + node->crossed_out = true; + hnode = objagg_hints_node_create(objagg_hints, + node->objagg_obj, + objagg->ops->obj_size, + parent_hnode); + if (IS_ERR(hnode)) { + err = PTR_ERR(hnode); + goto out; + } + } + } + + err = 0; +out: + objagg_tmp_graph_destroy(graph); + return err; +} + +struct objagg_opt_algo { + int (*fillup_hints)(struct objagg_hints *objagg_hints, + struct objagg *objagg); +}; + +static const struct objagg_opt_algo objagg_opt_simple_greedy = { + .fillup_hints = objagg_opt_simple_greedy_fillup_hints, +}; + + +static const struct objagg_opt_algo *objagg_opt_algos[] = { + [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, +}; + +static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + struct objagg_hints *objagg_hints = + container_of(ht, struct objagg_hints, node_ht); + const struct objagg_ops *ops = objagg_hints->ops; + const char *ptr = obj; + + ptr += ht->p.key_offset; + return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : + memcmp(ptr, arg->key, ht->p.key_len); +} + +/** + * objagg_hints_get - obtains hints instance + * @objagg: objagg instance + * @opt_algo_type: type of hints finding algorithm + * + * Note: all locking must be provided by the caller. + * + * According to the algo type, the existing objects of objagg instance + * are going to be went-through to assemble an optimal tree. We call this + * tree hints. These hints can be later on used for creation of + * a new objagg instance. There, the future object creations are going + * to be consulted with these hints in order to find out, where exactly + * the new object should be put as a root or delta. + * + * Returns a pointer to hints instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +struct objagg_hints *objagg_hints_get(struct objagg *objagg, + enum objagg_opt_algo_type opt_algo_type) +{ + const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type]; + struct objagg_hints *objagg_hints; + int err; + + objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL); + if (!objagg_hints) + return ERR_PTR(-ENOMEM); + + objagg_hints->ops = objagg->ops; + objagg_hints->refcount = 1; + + INIT_LIST_HEAD(&objagg_hints->node_list); + + objagg_hints->ht_params.key_len = objagg->ops->obj_size; + objagg_hints->ht_params.key_offset = + offsetof(struct objagg_hints_node, obj); + objagg_hints->ht_params.head_offset = + offsetof(struct objagg_hints_node, ht_node); + objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; + + err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); + if (err) + goto err_rhashtable_init; + + err = algo->fillup_hints(objagg_hints, objagg); + if (err) + goto err_fillup_hints; + + if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) + goto err_node_count_check; + + return objagg_hints; + +err_node_count_check: +err_fillup_hints: + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); +err_rhashtable_init: + kfree(objagg_hints); + return ERR_PTR(err); +} +EXPORT_SYMBOL(objagg_hints_get); + +/** + * objagg_hints_put - puts hints instance + * @objagg_hints: objagg hints instance + * + * Note: all locking must be provided by the caller. + */ +void objagg_hints_put(struct objagg_hints *objagg_hints) +{ + if (--objagg_hints->refcount) + return; + objagg_hints_flush(objagg_hints); + rhashtable_destroy(&objagg_hints->node_ht); + kfree(objagg_hints); +} +EXPORT_SYMBOL(objagg_hints_put); + +/** + * objagg_hints_stats_get - obtains stats of the hints instance + * @objagg_hints: hints instance + * + * Note: all locking must be provided by the caller. + * + * The returned structure contains statistics of all objects + * currently in use, ordered by following rules: + * 1) Root objects are always on lower indexes than the rest. + * 2) Objects with higher delta user count are always on lower + * indexes. + * 3) In case multiple objects have the same delta user count, + * the objects are ordered by user count. + * + * Returns a pointer to stats instance in case of success, + * otherwise it returns pointer error using ERR_PTR macro. + */ +const struct objagg_stats * +objagg_hints_stats_get(struct objagg_hints *objagg_hints) +{ + struct objagg_stats *objagg_stats; + struct objagg_hints_node *hnode; + int i; + + objagg_stats = kzalloc(struct_size(objagg_stats, stats_info, + objagg_hints->node_count), + GFP_KERNEL); + if (!objagg_stats) + return ERR_PTR(-ENOMEM); + + i = 0; + list_for_each_entry(hnode, &objagg_hints->node_list, list) { + memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, + sizeof(objagg_stats->stats_info[0])); + i++; + } + objagg_stats->stats_info_count = i; + + sort(objagg_stats->stats_info, objagg_stats->stats_info_count, + sizeof(struct objagg_obj_stats_info), + objagg_stats_info_sort_cmp_func, NULL); + + return objagg_stats; +} +EXPORT_SYMBOL(objagg_hints_stats_get); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko "); MODULE_DESCRIPTION("Object aggregation manager"); diff --git a/lib/test_objagg.c b/lib/test_objagg.c index ab57144bb0cd..3744573b6365 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg, #define MAX_KEY_ID_DIFF 5 +static bool delta_check(void *priv, const void *parent_obj, const void *obj) +{ + const struct tokey *parent_key = parent_obj; + const struct tokey *key = obj; + int diff = key->id - parent_key->id; + + return diff >= 0 && diff <= MAX_KEY_ID_DIFF; +} + static void *delta_create(void *priv, void *parent_obj, void *obj) { struct tokey *parent_key = parent_obj; @@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj) int diff = key->id - parent_key->id; struct delta *delta; - if (diff < 0 || diff > MAX_KEY_ID_DIFF) + if (!delta_check(priv, parent_obj, obj)) return ERR_PTR(-EINVAL); delta = kzalloc(sizeof(*delta), GFP_KERNEL); @@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv) kfree(delta); } -static void *root_create(void *priv, void *obj) +static void *root_create(void *priv, void *obj, unsigned int id) { struct world *world = priv; struct tokey *key = obj; @@ -268,6 +277,12 @@ stats_put: return err; } +static bool delta_check_dummy(void *priv, const void *parent_obj, + const void *obj) +{ + return false; +} + static void *delta_create_dummy(void *priv, void *parent_obj, void *obj) { return ERR_PTR(-EOPNOTSUPP); @@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv) static const struct objagg_ops nodelta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check_dummy, .delta_create = delta_create_dummy, .delta_destroy = delta_destroy_dummy, .root_create = root_create, @@ -292,7 +308,7 @@ static int test_nodelta(void) int i; int err; - objagg = objagg_create(&nodelta_ops, &world); + objagg = objagg_create(&nodelta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -357,6 +373,7 @@ err_stats_second_zero: static const struct objagg_ops delta_ops = { .obj_size = sizeof(struct tokey), + .delta_check = delta_check, .delta_create = delta_create, .delta_destroy = delta_destroy, .root_create = root_create, @@ -793,7 +810,7 @@ static int test_delta(void) int i; int err; - objagg = objagg_create(&delta_ops, &world); + objagg = objagg_create(&delta_ops, NULL, &world); if (IS_ERR(objagg)) return PTR_ERR(objagg); @@ -815,6 +832,170 @@ err_do_action_item: return err; } +struct hints_case { + const unsigned int *key_ids; + size_t key_ids_count; + struct expect_stats expect_stats; + struct expect_stats expect_stats_hints; +}; + +static const unsigned int hints_case_key_ids[] = { + 1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8, +}; + +static const struct hints_case hints_case = { + .key_ids = hints_case_key_ids, + .key_ids_count = ARRAY_SIZE(hints_case_key_ids), + .expect_stats = + EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(3, 2), + DELTA(5, 2), DELTA(6, 1)), + .expect_stats_hints = + EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1), + DELTA(8, 3), DELTA(5, 2), + DELTA(6, 1), DELTA(7, 1)), +}; + +static void __pr_debug_stats(const struct objagg_stats *stats) +{ + int i; + + for (i = 0; i < stats->stats_info_count; i++) + pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i, + obj_to_key_id(stats->stats_info[i].objagg_obj), + stats->stats_info[i].stats.user_count, + stats->stats_info[i].stats.delta_user_count, + stats->stats_info[i].is_root ? "root" : "noroot"); +} + +static void pr_debug_stats(struct objagg *objagg) +{ + const struct objagg_stats *stats; + + stats = objagg_stats_get(objagg); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static void pr_debug_hints_stats(struct objagg_hints *objagg_hints) +{ + const struct objagg_stats *stats; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return; + __pr_debug_stats(stats); + objagg_stats_put(stats); +} + +static int check_expect_hints_stats(struct objagg_hints *objagg_hints, + const struct expect_stats *expect_stats, + const char **errmsg) +{ + const struct objagg_stats *stats; + int err; + + stats = objagg_hints_stats_get(objagg_hints); + if (IS_ERR(stats)) + return PTR_ERR(stats); + err = __check_expect_stats(stats, expect_stats, errmsg); + objagg_stats_put(stats); + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world2 = {}; + struct world world = {}; + struct objagg *objagg2; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + pr_debug_hints_stats(hints); + err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Hints stats: %s\n", errmsg); + goto err_check_expect_hints_stats; + } + + objagg2 = objagg_create(&delta_ops, hints, &world2); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world2, objagg2, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world2_obj_get; + } + } + + pr_debug_stats(objagg2); + err = check_expect_stats(objagg2, &hints_case->expect_stats_hints, + &errmsg); + if (err) { + pr_err("Stats2: %s\n", errmsg); + goto err_check_expect_stats2; + } + + err = 0; + +err_check_expect_stats2: +err_world2_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world2, objagg, hints_case->key_ids[i]); + objagg_hints_put(hints); + objagg_destroy(objagg2); + i = hints_case->key_ids_count; +err_check_expect_hints_stats: +err_hints_get: +err_check_expect_stats: +err_world_obj_get: + for (i--; i >= 0; i--) + world_obj_put(&world, objagg, hints_case->key_ids[i]); + + objagg_destroy(objagg); + return err; +} +static int test_hints(void) +{ + return test_hints_case(&hints_case); +} + static int __init test_objagg_init(void) { int err; @@ -822,7 +1003,10 @@ static int __init test_objagg_init(void) err = test_nodelta(); if (err) return err; - return test_delta(); + err = test_delta(); + if (err) + return err; + return test_hints(); } static void __exit test_objagg_exit(void) -- cgit v1.2.3 From 204f6a8c413ec41a7ec5e3f0f0590d4318f7a1f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Feb 2019 11:22:47 +0000 Subject: lib: objagg: add root count to stats Count number of roots and add it to stats. It is handy for the library user to have this stats available as it can act upon it without counting roots itself. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/objagg.h | 1 + lib/objagg.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/objagg.h b/include/linux/objagg.h index a675286df1af..78021777df46 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -42,6 +42,7 @@ struct objagg_obj_stats_info { }; struct objagg_stats { + unsigned int root_count; unsigned int stats_info_count; struct objagg_obj_stats_info stats_info[]; }; diff --git a/lib/objagg.c b/lib/objagg.c index befe8a47d080..781f41c3c47d 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -621,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg) objagg_stats->stats_info[i].objagg_obj = objagg_obj; objagg_stats->stats_info[i].is_root = objagg_obj_is_root(objagg_obj); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; @@ -1031,6 +1033,8 @@ objagg_hints_stats_get(struct objagg_hints *objagg_hints) list_for_each_entry(hnode, &objagg_hints->node_list, list) { memcpy(&objagg_stats->stats_info[i], &hnode->stats_info, sizeof(objagg_stats->stats_info[0])); + if (objagg_stats->stats_info[i].is_root) + objagg_stats->root_count++; i++; } objagg_stats->stats_info_count = i; -- cgit v1.2.3 From df9c716deb76642d0077770bca7107a31568c113 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 7 Feb 2019 06:20:11 -0800 Subject: qed: Add API for SmartAN query. The patch adds driver interface to read the SmartAN capability from management firmware. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 6 ++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 10 ++++++++++ include/linux/qed/qed_if.h | 1 + 5 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 417121e74ee9..37edaa847512 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12796,6 +12796,7 @@ struct public_drv_mb { #define FW_MB_PARAM_GET_PF_RDMA_BOTH 0x3 /* get MFW feature support response */ +#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ 0x00000001 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE 0x00000002 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK 0x00010000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b47352643fb5..f164d4acebcb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -281,6 +281,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) dev_info->wol_support = true; + dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn); + dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id; } else { qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bb8541847aa5..cc27fd60d689 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3654,6 +3654,12 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, } } +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn) +{ + return !!(p_hwfn->mcp_info->capabilities & + FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ); +} + int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 mcp_resp; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 6e1d72a669ae..2799e6741765 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -1148,6 +1148,16 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock, struct qed_resc_unlock_params *p_unlock, enum qed_resc_lock resource, bool b_is_permanent); + +/** + * @brief - Return whether management firmware support smart AN + * + * @param p_hwfn + * + * @return bool - true if feature is supported. + */ +bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn); + /** * @brief Learn of supported MFW features; To be done during early init * diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 35170f74ed80..f6165d304b4d 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -643,6 +643,7 @@ struct qed_dev_info { u16 mtu; bool wol_support; + bool smart_an; /* MBI version */ u32 mbi_version; -- cgit v1.2.3 From efbdfdc29bdd4dbf79ad4bddc8f7a5ac62c66bfe Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 9 Feb 2019 15:24:47 +0100 Subject: net: phy: Add support for asking the PHY its abilities Add support for runtime determination of what the PHY supports, by adding a new function to the phy driver. The get_features call should set the phydev->supported member with the features the PHY supports. It is only called if phydrv->features is NULL. This requires minor changes to pause. The PHY driver should not set pause abilities, except for when it has odd cause capabilities, e.g. pause cannot be disabled. With this change, phydev->supported already contains the drivers abilities, including pause. So rather than considering phydrv->features, look at the phydev->supported, and enable pause if neither of the pause bits are already set. Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: fixed small checkpatch complaint in one comment] Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 31 +++++++++++++++---------------- include/linux/phy.h | 6 ++++++ 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 92b7a71df0ac..8573d17ece0f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2236,7 +2236,14 @@ static int phy_probe(struct device *dev) * a controller will attach, and may modify one * or both of these values */ - linkmode_copy(phydev->supported, phydrv->features); + if (phydrv->features) { + linkmode_copy(phydev->supported, phydrv->features); + } else { + err = phydrv->get_features(phydev); + if (err) + goto out; + } + of_set_phy_supported(phydev); linkmode_copy(phydev->advertising, phydev->supported); @@ -2256,20 +2263,8 @@ static int phy_probe(struct device *dev) * (e.g. hardware erratum) where the driver wants to set only one * of these bits. */ - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) || - test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) { - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydrv->features)) - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - } else { + if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) && + !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) { linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, @@ -2315,7 +2310,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; - if (WARN_ON(!new_driver->features)) { + /* Either the features are hard coded, or dynamically + * determine. It cannot be both or neither + */ + if (WARN_ON((!new_driver->features && !new_driver->get_features) || + (new_driver->features && new_driver->get_features))) { pr_err("%s: Driver features are missing\n", new_driver->name); return -EINVAL; } diff --git a/include/linux/phy.h b/include/linux/phy.h index f41bf651f6a0..d2ffae992e4a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -502,6 +502,12 @@ struct phy_driver { */ int (*probe)(struct phy_device *phydev); + /* + * Probe the hardware to determine what abilities it has. + * Should only set phydev->supported. + */ + int (*get_features)(struct phy_device *phydev); + /* PHY Power Management */ int (*suspend)(struct phy_device *phydev); int (*resume)(struct phy_device *phydev); -- cgit v1.2.3 From b8554d4f7288f86fb278e0bc7b5b19579bf16b69 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Feb 2019 19:57:56 +0100 Subject: net: phy: add register modifying helpers returning 1 on change When modifying registers there are scenarios where we need to know whether the register content actually changed. This patch adds new helpers to not break users of the current ones, phy_modify() etc. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 127 +++++++++++++++++++++++++++++++++++++++++---- include/linux/phy.h | 12 ++++- 2 files changed, 128 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 7d6aad287f84..cdea028d1328 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -531,7 +531,7 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val) EXPORT_SYMBOL(phy_write_mmd); /** - * __phy_modify() - Convenience function for modifying a PHY register + * __phy_modify_changed() - Convenience function for modifying a PHY register * @phydev: a pointer to a &struct phy_device * @regnum: register number * @mask: bit mask of bits to clear @@ -539,16 +539,69 @@ EXPORT_SYMBOL(phy_write_mmd); * * Unlocked helper function which allows a PHY register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change */ -int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set) { - int ret; + int new, ret; ret = __phy_read(phydev, regnum); if (ret < 0) return ret; - ret = __phy_write(phydev, regnum, (ret & ~mask) | set); + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write(phydev, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_changed); + +/** + * phy_modify_changed - Function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_changed(phydev, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_changed); + +/** + * __phy_modify - Convenience function for modifying a PHY register + * @phydev: the phy_device struct + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) +{ + int ret; + + ret = __phy_modify_changed(phydev, regnum, mask, set); return ret < 0 ? ret : 0; } @@ -578,7 +631,7 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set) EXPORT_SYMBOL_GPL(phy_modify); /** - * __phy_modify_mmd - Convenience function for modifying a register on MMD + * __phy_modify_mmd_changed - Function for modifying a register on MMD * @phydev: the phy_device struct * @devad: the MMD containing register to modify * @regnum: register number to modify @@ -587,17 +640,73 @@ EXPORT_SYMBOL_GPL(phy_modify); * * Unlocked helper function which allows a MMD register to be modified as * new register value = (old register value & ~mask) | set + * + * Returns negative errno, 0 if there was no change, and 1 in case of change */ -int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set) +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) { - int ret; + int new, ret; ret = __phy_read_mmd(phydev, devad, regnum); if (ret < 0) return ret; - ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set); + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __phy_write_mmd(phydev, devad, regnum, new); + + return ret < 0 ? ret : 1; +} +EXPORT_SYMBOL_GPL(__phy_modify_mmd_changed); + +/** + * phy_modify_mmd_changed - Function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + * + * Returns negative errno, 0 if there was no change, and 1 in case of change + */ +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + mutex_lock(&phydev->mdio.bus->mdio_lock); + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); + mutex_unlock(&phydev->mdio.bus->mdio_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(phy_modify_mmd_changed); + +/** + * __phy_modify_mmd - Convenience function for modifying a register on MMD + * @phydev: the phy_device struct + * @devad: the MMD containing register to modify + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: new value of bits set in mask to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set) +{ + int ret; + + ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set); return ret < 0 ? ret : 0; } diff --git a/include/linux/phy.h b/include/linux/phy.h index d2ffae992e4a..378da9a6165e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -799,13 +799,21 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); +int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); +int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, + u16 set); int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set); +int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); +int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum, + u16 mask, u16 set); int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum, - u16 mask, u16 set); + u16 mask, u16 set); /** * __phy_set_bits - Convenience function for setting bits in a PHY register -- cgit v1.2.3 From 46f8bc92758c6259bcf945e9216098661c1587cd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:20 -0800 Subject: bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 12 +++++ include/uapi/linux/bpf.h | 12 ++++- kernel/bpf/verifier.c | 132 +++++++++++++++++++++++++++++++++-------------- net/core/filter.c | 42 +++++++++++++++ 4 files changed, 157 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bd169a7bcc93..a60463b45b54 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,6 +194,7 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ }; /* type of values returned from helper functions */ @@ -256,6 +257,8 @@ enum bpf_reg_type { PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ PTR_TO_SOCKET, /* reg points to struct bpf_sock */ PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ + PTR_TO_SOCK_COMMON, /* reg points to sock_common */ + PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -920,6 +923,9 @@ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) +bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info); bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, @@ -928,6 +934,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size); #else +static inline bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} static inline bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1777fa0c61e4..5d79cba74ddc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2329,6 +2329,14 @@ union bpf_attr { * "**y**". * Return * 0 + * + * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_sock** pointer such + * that all the fields in bpf_sock can be accessed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2425,7 +2433,8 @@ union bpf_attr { FN(msg_pop_data), \ FN(rc_pointer_rel), \ FN(spin_lock), \ - FN(spin_unlock), + FN(spin_unlock), \ + FN(sk_fullsock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2545,6 +2554,7 @@ struct __sk_buff { __u64 tstamp; __u32 wire_len; __u32 gso_segs; + __bpf_md_ptr(struct bpf_sock *, sk); }; struct bpf_tunnel_key { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 516dfc6d78de..b755d55a3791 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) type == PTR_TO_PACKET_META; } +static bool type_is_sk_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCK_COMMON; +} + static bool reg_type_may_be_null(enum bpf_reg_type type) { return type == PTR_TO_MAP_VALUE_OR_NULL || - type == PTR_TO_SOCKET_OR_NULL; + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_SOCK_COMMON_OR_NULL; } static bool type_is_refcounted(enum bpf_reg_type type) @@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_sk_release; } +static bool is_acquire_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_sk_lookup_tcp || + func_id == BPF_FUNC_sk_lookup_udp; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -392,6 +405,8 @@ static const char * const reg_type_str[] = { [PTR_TO_FLOW_KEYS] = "flow_keys", [PTR_TO_SOCKET] = "sock", [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", }; static char slot_type_char[] = { @@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) } /* release function corresponding to acquire_reference_state(). Idempotent. */ -static int __release_reference_state(struct bpf_func_state *state, int ptr_id) +static int release_reference_state(struct bpf_func_state *state, int ptr_id) { int i, last_idx; - if (!ptr_id) - return -EFAULT; - last_idx = state->acquired_refs - 1; for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].id == ptr_id) { @@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id) return 0; } } - return -EFAULT; -} - -/* variation on the above for cases where we expect that there must be an - * outstanding reference for the specified ptr_id. - */ -static int release_reference_state(struct bpf_verifier_env *env, int ptr_id) -{ - struct bpf_func_state *state = cur_func(env); - int err; - - err = __release_reference_state(state, ptr_id); - if (WARN_ON_ONCE(err != 0)) - verbose(env, "verifier internal error: can't release reference\n"); - return err; + return -EINVAL; } static int transfer_reference_state(struct bpf_func_state *dst, @@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: return true; default: return false; @@ -1647,6 +1647,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; struct bpf_insn_access_aux info = {}; + bool valid; if (reg->smin_value < 0) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", @@ -1654,15 +1655,28 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } - if (!bpf_sock_is_valid_access(off, size, t, &info)) { - verbose(env, "invalid bpf_sock access off=%d size=%d\n", - off, size); - return -EACCES; + switch (reg->type) { + case PTR_TO_SOCK_COMMON: + valid = bpf_sock_common_is_valid_access(off, size, t, &info); + break; + case PTR_TO_SOCKET: + valid = bpf_sock_is_valid_access(off, size, t, &info); + break; + default: + valid = false; } - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; - return 0; + if (valid) { + env->insn_aux_data[insn_idx].ctx_field_size = + info.ctx_field_size; + return 0; + } + + verbose(env, "R%d invalid %s access off=%d size=%d\n", + regno, reg_type_str[reg->type], off, size); + + return -EACCES; } static bool __is_pointer_value(bool allow_ptr_leaks, @@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { const struct bpf_reg_state *reg = reg_state(env, regno); - return reg->type == PTR_TO_CTX || - reg->type == PTR_TO_SOCKET; + return reg->type == PTR_TO_CTX; +} + +static bool is_sk_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return type_is_sk_pointer(reg->type); } static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) @@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_SOCKET: pointer_desc = "sock "; break; + case PTR_TO_SOCK_COMMON: + pointer_desc = "sock_common "; + break; default: break; } @@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * PTR_TO_PACKET[_META,_END]. In the latter * case, we know the offset is zero. */ - if (reg_type == SCALAR_VALUE) + if (reg_type == SCALAR_VALUE) { mark_reg_unknown(env, regs, value_regno); - else + } else { mark_reg_known_zero(env, regs, value_regno); + if (reg_type_may_be_null(reg_type)) + regs[value_regno].id = ++env->id_gen; + } regs[value_regno].type = reg_type; } @@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_flow_keys_access(env, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_SOCKET) { + } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { - verbose(env, "cannot write into socket\n"); + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); @@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || - is_flow_key_reg(env, insn->dst_reg)) { + is_flow_key_reg(env, insn->dst_reg) || + is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str[reg_state(env, insn->dst_reg)->type]); @@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_ctx_reg(env, reg, regno); if (err < 0) return err; + } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { + expected_type = PTR_TO_SOCK_COMMON; + /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ + if (!type_is_sk_pointer(type)) + goto err_type; } else if (arg_type == ARG_PTR_TO_SOCKET) { expected_type = PTR_TO_SOCKET; if (type != expected_type) @@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env, for (i = 0; i <= vstate->curframe; i++) release_reg_references(env, vstate->frame[i], meta->ptr_id); - return release_reference_state(env, meta->ptr_id); + return release_reference_state(cur_func(env), meta->ptr_id); } static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } } else if (is_release_function(func_id)) { err = release_reference(env, &meta); - if (err) + if (err) { + verbose(env, "func %s#%d reference has not been acquired before\n", + func_id_name(func_id), func_id); return err; + } } regs = cur_regs(env); @@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].id = ++env->id_gen; } } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { - int id = acquire_reference_state(env, insn_idx); - if (id < 0) - return id; mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - regs[BPF_REG_0].id = id; + if (is_acquire_function(func_id)) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + /* For release_reference() */ + regs[BPF_REG_0].id = id; + } else { + /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = ++env->id_gen; + } } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_PACKET_END: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { reg->type = PTR_TO_SOCKET; + } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { + reg->type = PTR_TO_SOCK_COMMON; } if (is_null || !(reg_is_refcounted(reg) || reg_may_point_to_spin_lock(reg))) { @@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, int i, j; if (reg_is_refcounted_or_null(®s[regno]) && is_null) - __release_reference_state(state, id); + release_reference_state(state, id); for (i = 0; i < MAX_BPF_REG; i++) mark_ptr_or_null_reg(state, ®s[i], id, is_null); @@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_CTX: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: + case PTR_TO_SOCK_COMMON_OR_NULL: return false; default: return true; @@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) convert_ctx_access = ops->convert_ctx_access; break; case PTR_TO_SOCKET: + case PTR_TO_SOCK_COMMON: convert_ctx_access = bpf_sock_convert_ctx_access; break; default: diff --git a/net/core/filter.c b/net/core/filter.c index 3a49f68eda10..401d2e0aebf8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1793,6 +1793,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_sk_fullsock_proto = { + .func = bpf_sk_fullsock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + static inline int sk_skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { @@ -5406,6 +5420,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) switch (func_id) { case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; default: return sk_filter_func_proto(func_id, prog); } @@ -5477,6 +5493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; @@ -5764,6 +5782,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(__u64)) return false; break; + case offsetof(struct __sk_buff, sk): + if (type == BPF_WRITE || size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5950,6 +5973,18 @@ static bool __sock_filter_check_size(int off, int size, return size == size_default; } +bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range_till(struct bpf_sock, type, priority): + return false; + default: + return bpf_sock_is_valid_access(off, size, type, info); + } +} + bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { @@ -6748,6 +6783,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, pkt_len); *target_size = 4; *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); + break; + + case offsetof(struct __sk_buff, sk): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + break; } return insn - insn_buf; -- cgit v1.2.3 From 655a51e536c09d15ffa3603b1b6fce2b45b85a1f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 9 Feb 2019 23:22:24 -0800 Subject: bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock This patch adds a helper function BPF_FUNC_tcp_sock and it is currently available for cg_skb and sched_(cls|act): struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk); int cg_skb_foo(struct __sk_buff *skb) { struct bpf_tcp_sock *tp; struct bpf_sock *sk; __u32 snd_cwnd; sk = skb->sk; if (!sk) return 1; tp = bpf_tcp_sock(sk); if (!tp) return 1; snd_cwnd = tp->snd_cwnd; /* ... */ return 1; } A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide read-only access. bpf_tcp_sock has all the existing tcp_sock's fields that has already been exposed by the bpf_sock_ops. i.e. no new tcp_sock's fields are exposed in bpf.h. This helper returns a pointer to the tcp_sock. If it is not a tcp_sock or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it returns NULL. Hence, the caller needs to check for NULL before accessing it. The current use case is to expose members from tcp_sock to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 30 ++++++++++++++++++ include/uapi/linux/bpf.h | 51 ++++++++++++++++++++++++++++++- kernel/bpf/verifier.c | 31 +++++++++++++++++-- net/core/filter.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a60463b45b54..7f58828755fd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -204,6 +204,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ + RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -259,6 +260,8 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ PTR_TO_SOCK_COMMON, /* reg points to sock_common */ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ + PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, } #endif +#ifdef CONFIG_INET +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_tcp_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} + +static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif /* CONFIG_INET */ + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d8f91777c5b6..25c8c0e62ecf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2337,6 +2337,15 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or NULL in * case of failure. + * + * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) + * Description + * This helper gets a **struct bpf_tcp_sock** pointer from a + * **struct bpf_sock** pointer. + * + * Return + * A **struct bpf_tcp_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2434,7 +2443,8 @@ union bpf_attr { FN(rc_pointer_rel), \ FN(spin_lock), \ FN(spin_unlock), \ - FN(sk_fullsock), + FN(sk_fullsock), \ + FN(tcp_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2616,6 +2626,45 @@ struct bpf_sock { __u32 state; }; +struct bpf_tcp_sock { + __u32 snd_cwnd; /* Sending congestion window */ + __u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + __u32 rtt_min; + __u32 snd_ssthresh; /* Slow start size threshold */ + __u32 rcv_nxt; /* What we want to receive next */ + __u32 snd_nxt; /* Next sequence we send */ + __u32 snd_una; /* First byte we want an ack for */ + __u32 mss_cache; /* Cached effective mss, not including SACKS */ + __u32 ecn_flags; /* ECN status bits. */ + __u32 rate_delivered; /* saved rate sample: packets delivered */ + __u32 rate_interval_us; /* saved rate sample: time elapsed */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 retrans_out; /* Retransmitted packets out */ + __u32 total_retrans; /* Total retransmits for entire connection */ + __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ + __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ + __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + * sum(delta(rcv_nxt)), or how many bytes + * were acked. + */ + __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked + * sum(delta(snd_una)), or how many bytes + * were acked. + */ +}; + struct bpf_sock_tuple { union { struct { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b755d55a3791..1b9496c41383 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type) static bool type_is_sk_pointer(enum bpf_reg_type type) { return type == PTR_TO_SOCKET || - type == PTR_TO_SOCK_COMMON; + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK; } static bool reg_type_may_be_null(enum bpf_reg_type type) { return type == PTR_TO_MAP_VALUE_OR_NULL || type == PTR_TO_SOCKET_OR_NULL || - type == PTR_TO_SOCK_COMMON_OR_NULL; + type == PTR_TO_SOCK_COMMON_OR_NULL || + type == PTR_TO_TCP_SOCK_OR_NULL; } static bool type_is_refcounted(enum bpf_reg_type type) @@ -407,6 +409,8 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", [PTR_TO_SOCK_COMMON] = "sock_common", [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", }; static char slot_type_char[] = { @@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return true; default: return false; @@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, case PTR_TO_SOCKET: valid = bpf_sock_is_valid_access(off, size, t, &info); break; + case PTR_TO_TCP_SOCK: + valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); + break; default: valid = false; } @@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_SOCK_COMMON: pointer_desc = "sock_common "; break; + case PTR_TO_TCP_SOCK: + pointer_desc = "tcp_sock "; + break; default: break; } @@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; } + } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose(env, "unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; @@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_SOCKET; } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { reg->type = PTR_TO_SOCK_COMMON; + } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { + reg->type = PTR_TO_TCP_SOCK; } if (is_null || !(reg_is_refcounted(reg) || reg_may_point_to_spin_lock(reg))) { @@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: /* Only valid matches are exact, which memcmp() above * would have accepted */ @@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: + case PTR_TO_TCP_SOCK_OR_NULL: return false; default: return true; @@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_SOCK_COMMON: convert_ctx_access = bpf_sock_convert_ctx_access; break; + case PTR_TO_TCP_SOCK: + convert_ctx_access = bpf_tcp_sock_convert_ctx_access; + break; default: continue; } diff --git a/net/core/filter.c b/net/core/filter.c index c0d7b9ef279f..353735575204 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + return false; + + if (off % size != 0) + return false; + + switch (off) { + case offsetof(struct bpf_tcp_sock, bytes_received): + case offsetof(struct bpf_tcp_sock, bytes_acked): + return size == sizeof(__u64); + default: + return size == sizeof(__u32); + } +} + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + +#define BPF_TCP_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ + si->dst_reg, si->src_reg, \ + offsetof(struct tcp_sock, FIELD)); \ + } while (0) + + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, + BPF_TCP_SOCK_GET_COMMON); + + if (insn > insn_buf) + return insn - insn_buf; + + switch (si->off) { + case offsetof(struct bpf_tcp_sock, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct tcp_sock, rtt_min) + + offsetof(struct minmax_sample, v)); + break; + } + + return insn - insn_buf; +} + +BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_tcp_sock_proto = { + .func = bpf_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; +#ifdef CONFIG_INET + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; +#endif default: return sk_filter_func_proto(func_id, prog); } @@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; #endif default: return bpf_base_func_proto(func_id); -- cgit v1.2.3 From b90efd2258749e04e1b3f71ef0d716f2ac2337e0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 7 Feb 2019 14:54:16 -0500 Subject: bpf: only adjust gso_size on bytestream protocols bpf_skb_change_proto and bpf_skb_adjust_room change skb header length. For GSO packets they adjust gso_size to maintain the same MTU. The gso size can only be safely adjusted on bytestream protocols. Commit d02f51cbcf12 ("bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp skbs") excluded SKB_GSO_SCTP. Since then type SKB_GSO_UDP_L4 has been added, whose contents are one gso_size unit per datagram. Also exclude these. Move from a blacklist to a whitelist check to future proof against additional such new GSO types, e.g., for fraglist based GRO. Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/linux/skbuff.h | 6 ++++++ net/core/filter.c | 12 ++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95d25b010a25..5a7a8b93a5ab 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4212,6 +4212,12 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } +static inline bool skb_is_gso_tcp(const struct sk_buff *skb) +{ + return skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); +} + static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; diff --git a/net/core/filter.c b/net/core/filter.c index 7a54dc11ac2d..f7d0004fc160 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2789,8 +2789,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2831,8 +2830,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2957,8 +2955,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2987,8 +2984,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */ - if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb))) + if (!skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit v1.2.3 From 81ec3f3c4c4d78f2d3b6689c9816bfbdf7417dbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Feb 2019 13:35:32 +0100 Subject: perf/x86: Add check_period PMU callback Vince (and later on Ravi) reported crashes in the BTS code during fuzzing with the following backtrace: general protection fault: 0000 [#1] SMP PTI ... RIP: 0010:perf_prepare_sample+0x8f/0x510 ... Call Trace: ? intel_pmu_drain_bts_buffer+0x194/0x230 intel_pmu_drain_bts_buffer+0x160/0x230 ? tick_nohz_irq_exit+0x31/0x40 ? smp_call_function_single_interrupt+0x48/0xe0 ? call_function_single_interrupt+0xf/0x20 ? call_function_single_interrupt+0xa/0x20 ? x86_schedule_events+0x1a0/0x2f0 ? x86_pmu_commit_txn+0xb4/0x100 ? find_busiest_group+0x47/0x5d0 ? perf_event_set_state.part.42+0x12/0x50 ? perf_mux_hrtimer_restart+0x40/0xb0 intel_pmu_disable_event+0xae/0x100 ? intel_pmu_disable_event+0xae/0x100 x86_pmu_stop+0x7a/0xb0 x86_pmu_del+0x57/0x120 event_sched_out.isra.101+0x83/0x180 group_sched_out.part.103+0x57/0xe0 ctx_sched_out+0x188/0x240 ctx_resched+0xa8/0xd0 __perf_event_enable+0x193/0x1e0 event_function+0x8e/0xc0 remote_function+0x41/0x50 flush_smp_call_function_queue+0x68/0x100 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x3e/0xe0 call_function_single_interrupt+0xf/0x20 The reason is that while event init code does several checks for BTS events and prevents several unwanted config bits for BTS event (like precise_ip), the PERF_EVENT_IOC_PERIOD allows to create BTS event without those checks being done. Following sequence will cause the crash: If we create an 'almost' BTS event with precise_ip and callchains, and it into a BTS event it will crash the perf_prepare_sample() function because precise_ip events are expected to come in with callchain data initialized, but that's not the case for intel_pmu_drain_bts_buffer() caller. Adding a check_period callback to be called before the period is changed via PERF_EVENT_IOC_PERIOD. It will deny the change if the event would become BTS. Plus adding also the limit_period check as well. Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Naveen N. Rao Cc: Ravi Bangoria Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190204123532.GA4794@krava Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 14 ++++++++++++++ arch/x86/events/intel/core.c | 9 +++++++++ arch/x86/events/perf_event.h | 16 ++++++++++++++-- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 16 ++++++++++++++++ 5 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 374a19712e20..b684f0294f35 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2278,6 +2278,19 @@ void perf_check_microcode(void) x86_pmu.check_microcode(); } +static int x86_pmu_check_period(struct perf_event *event, u64 value) +{ + if (x86_pmu.check_period && x86_pmu.check_period(event, value)) + return -EINVAL; + + if (value && x86_pmu.limit_period) { + if (x86_pmu.limit_period(event, value) > value) + return -EINVAL; + } + + return 0; +} + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, @@ -2302,6 +2315,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, .task_ctx_size = sizeof(struct x86_perf_task_context), + .check_period = x86_pmu_check_period, }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index daafb893449b..730978dff63f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3587,6 +3587,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx, intel_pmu_lbr_sched_task(ctx, sched_in); } +static int intel_pmu_check_period(struct perf_event *event, u64 value) +{ + return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; +} + PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); @@ -3667,6 +3672,8 @@ static __initconst const struct x86_pmu core_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .cpu_dead = intel_pmu_cpu_dead, + + .check_period = intel_pmu_check_period, }; static struct attribute *intel_pmu_attrs[]; @@ -3711,6 +3718,8 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, + + .check_period = intel_pmu_check_period, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78d7b7031bfc..d46fd6754d92 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -646,6 +646,11 @@ struct x86_pmu { * Intel host/guest support (KVM) */ struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 period); }; struct x86_perf_task_context { @@ -857,7 +862,7 @@ static inline int amd_pmu_init(void) #ifdef CONFIG_CPU_SUP_INTEL -static inline bool intel_pmu_has_bts(struct perf_event *event) +static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) { struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; @@ -868,7 +873,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return hw_event == bts_event && hwc->sample_period == 1; + return hw_event == bts_event && period == 1; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return intel_pmu_has_bts_period(event, hwc->sample_period); } int intel_pmu_save_and_restart(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1d5c551a5add..e1a051724f7e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -447,6 +447,11 @@ struct pmu { * Filter events for PMU-specific reasons. */ int (*filter_match) (struct perf_event *event); /* optional */ + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 value); /* optional */ }; enum perf_addr_filter_action_t { diff --git a/kernel/events/core.c b/kernel/events/core.c index e5ede6918050..26d6edab051a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4963,6 +4963,11 @@ static void __perf_event_period(struct perf_event *event, } } +static int perf_event_check_period(struct perf_event *event, u64 value) +{ + return event->pmu->check_period(event, value); +} + static int perf_event_period(struct perf_event *event, u64 __user *arg) { u64 value; @@ -4979,6 +4984,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (event->attr.freq && value > sysctl_perf_event_sample_rate) return -EINVAL; + if (perf_event_check_period(event, value)) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -9391,6 +9399,11 @@ static int perf_pmu_nop_int(struct pmu *pmu) return 0; } +static int perf_event_nop_int(struct perf_event *event, u64 value) +{ + return 0; +} + static DEFINE_PER_CPU(unsigned int, nop_txn_flags); static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) @@ -9691,6 +9704,9 @@ got_cpu_context: pmu->pmu_disable = perf_pmu_nop_void; } + if (!pmu->check_period) + pmu->check_period = perf_event_nop_int; + if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; -- cgit v1.2.3 From dd27c2e3d0a05c01ff14bb672d1a3f0fdd8f98fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Feb 2019 00:20:39 -0800 Subject: bpf: offload: add priv field for drivers Currently bpf_offload_dev does not have any priv pointer, forcing the drivers to work backwards from the netdev in program metadata. This is not great given programs are conceptually associated with the offload device, and it means one or two unnecessary deferences. Add a priv pointer to bpf_offload_dev. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 2 +- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 4 +--- drivers/net/netdevsim/bpf.c | 5 +++-- include/linux/bpf.h | 3 ++- kernel/bpf/offload.c | 10 +++++++++- 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index dccae0319204..275de9f4c61c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app) app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf); } - bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops); + bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf); err = PTR_ERR_OR_ZERO(bpf->bpf_dev); if (err) goto err_free_neutral_maps; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 55c7dbf8b421..15dce97650a5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -185,8 +185,6 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) static int nfp_bpf_verifier_prep(struct bpf_prog *prog) { - struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev); - struct nfp_app *app = nn->app; struct nfp_prog *nfp_prog; int ret; @@ -197,7 +195,7 @@ static int nfp_bpf_verifier_prep(struct bpf_prog *prog) INIT_LIST_HEAD(&nfp_prog->insns); nfp_prog->type = prog->type; - nfp_prog->bpf = app->priv; + nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev); ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); if (ret) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 172b271c8bd2..f92c43453ec6 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -248,7 +248,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) static int nsim_bpf_verifier_prep(struct bpf_prog *prog) { - struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev); + struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev); if (!ns->bpf_bind_accept) return -EOPNOTSUPP; @@ -589,7 +589,8 @@ int nsim_bpf_init(struct netdevsim *ns) if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs)) return -ENOMEM; - ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops); + ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, + ns); err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev); if (err) return err; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f58828755fd..de18227b3d95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -773,8 +773,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map); struct bpf_offload_dev * -bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops); +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv); void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev); +void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev); int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, struct net_device *netdev); void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 39dba8c90331..ba635209ae9a 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock); struct bpf_offload_dev { const struct bpf_prog_offload_ops *ops; struct list_head netdevs; + void *priv; }; struct bpf_offload_netdev { @@ -669,7 +670,7 @@ unlock: EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); struct bpf_offload_dev * -bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) +bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv) { struct bpf_offload_dev *offdev; int err; @@ -688,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops) return ERR_PTR(-ENOMEM); offdev->ops = ops; + offdev->priv = priv; INIT_LIST_HEAD(&offdev->netdevs); return offdev; @@ -700,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev) kfree(offdev); } EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy); + +void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev) +{ + return offdev->priv; +} +EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); -- cgit v1.2.3 From 86e58135bc4ac68b41be11d82f1b1f87cb6119ba Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Feb 2019 11:46:06 +0000 Subject: net: phylink: add phylink_init_eee() helper Provide phylink_init_eee() to allow MAC drivers to initialise PHY EEE from within the ethtool set_eee() method. Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 18 ++++++++++++++++++ include/linux/phylink.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a148866cbb14..33f66dcd369a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1271,6 +1271,24 @@ int phylink_get_eee_err(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_get_eee_err); +/** + * phylink_init_eee() - init and check the EEE features + * @pl: a pointer to a &struct phylink returned from phylink_create() + * @clk_stop_enable: allow PHY to stop receive clock + * + * Must be called either with RTNL held or within mac_link_up() + */ +int phylink_init_eee(struct phylink *pl, bool clk_stop_enable) +{ + int ret = -EOPNOTSUPP; + + if (pl->phydev) + ret = phy_init_eee(pl->phydev, clk_stop_enable); + + return ret; +} +EXPORT_SYMBOL_GPL(phylink_init_eee); + /** * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 021fc6595856..f57059e4353f 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -220,6 +220,7 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); +int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); -- cgit v1.2.3 From 4ea7b0cf0da7494d5c7b8dc328493c50640d0cbc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 11 Feb 2019 13:02:25 -0800 Subject: net/skbuff: fix up kernel-doc placement There are several skb_* functions where the locked and unlocked functions are confusingly documented. For several of them, the kernel-doc for the unlocked version is placed above the locked version, which to the casual reader makes it seems like the locked version "takes no locks and you must therefore hold required locks before calling it." One can see, for example, that this link claims to document skb_queue_head(), while instead describing __skb_queue_head(). https://www.kernel.org/doc/html/latest/networking/kapi.html#c.skb_queue_head The correct documentation for skb_queue_head() is also included further down the page. This diff tested via: $ scripts/kernel-doc -rst include/linux/skbuff.h net/core/skbuff.c No new warnings were seen, and the output makes a little more sense. Signed-off-by: Brian Norris Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 831846617d07..a41e84f7730c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1889,12 +1889,12 @@ static inline void __skb_queue_before(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail @@ -1906,12 +1906,12 @@ static inline void __skb_queue_head(struct sk_buff_head *list, * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with @@ -1938,7 +1938,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1946,6 +1945,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue @@ -1955,7 +1955,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1963,6 +1962,7 @@ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) @@ -2653,13 +2653,13 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb(skb); } +void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); @@ -3028,7 +3028,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) } /** - * skb_put_padto - increase size and pad an skbuff up to a minimal size + * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error -- cgit v1.2.3 From 1e562c815e67185e030bcaa06323e95d85d80987 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:56 +0800 Subject: ptp_qoriq: make structure/function names more consistent Strings containing "ptp_qoriq" or "qoriq_ptp" which were used for structure/function names were complained by users. Let's just use the unique "ptp_qoriq" to make these names more consistent. This patch is just to unify the names using "ptp_qoriq". It hasn't changed any functions. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +- drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +- drivers/ptp/ptp_qoriq.c | 288 ++++++++++----------- drivers/ptp/ptp_qoriq_debugfs.c | 36 +-- include/linux/fsl/ptp_qoriq.h | 14 +- 5 files changed, 171 insertions(+), 171 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 62497119c85f..bdee441bc3b7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -501,7 +501,7 @@ static int dpaa_get_ts_info(struct net_device *net_dev, struct device_node *mac_node = dev->of_node; struct device_node *fman_node = NULL, *ptp_node = NULL; struct platform_device *ptp_dev = NULL; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 241325c35cb4..27ed995f439a 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1492,7 +1492,7 @@ static int gfar_get_ts_info(struct net_device *dev, struct gfar_private *priv = netdev_priv(dev); struct platform_device *ptp_dev; struct device_node *ptp_node; - struct qoriq_ptp *ptp = NULL; + struct ptp_qoriq *ptp = NULL; info->phc_index = -1; diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 43416b2e8a13..8c10d0f8864f 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -37,10 +37,10 @@ * Register access functions */ -/* Caller must hold qoriq_ptp->lock. */ -static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; @@ -51,10 +51,10 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp) return ns; } -/* Caller must hold qoriq_ptp->lock. */ -static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns) +/* Caller must hold ptp_qoriq->lock. */ +static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 hi = ns >> 32; u32 lo = ns & 0xffffffff; @@ -62,36 +62,36 @@ static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns) qoriq_write(®s->ctrl_regs->tmr_cnt_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_alarm(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_alarm(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u64 ns; u32 lo, hi; - ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL; + ns = tmr_cnt_read(ptp_qoriq) + 1500000000ULL; ns = div_u64(ns, 1000000000UL) * 1000000000ULL; - ns -= qoriq_ptp->tclk_period; + ns -= ptp_qoriq->tclk_period; hi = ns >> 32; lo = ns & 0xffffffff; qoriq_write(®s->alarm_regs->tmr_alarm1_l, lo); qoriq_write(®s->alarm_regs->tmr_alarm1_h, hi); } -/* Caller must hold qoriq_ptp->lock. */ -static void set_fipers(struct qoriq_ptp *qoriq_ptp) +/* Caller must hold ptp_qoriq->lock. */ +static void set_fipers(struct ptp_qoriq *ptp_qoriq) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - set_alarm(qoriq_ptp); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); + set_alarm(ptp_qoriq); + qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } -static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, +static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event) { - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; void __iomem *reg_etts_l; void __iomem *reg_etts_h; @@ -122,11 +122,11 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, if (update_event) { event.timestamp = ((u64) hi) << 32; event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } stat = qoriq_read(®s->ctrl_regs->tmr_stat); - } while (qoriq_ptp->extts_fifo_support && (stat & valid)); + } while (ptp_qoriq->extts_fifo_support && (stat & valid)); return 0; } @@ -137,61 +137,61 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, static irqreturn_t isr(int irq, void *priv) { - struct qoriq_ptp *qoriq_ptp = priv; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = priv; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; struct ptp_clock_event event; u64 ns; u32 ack = 0, lo, hi, mask, val, irqs; - spin_lock(&qoriq_ptp->lock); + spin_lock(&ptp_qoriq->lock); val = qoriq_read(®s->ctrl_regs->tmr_tevent); mask = qoriq_read(®s->ctrl_regs->tmr_temask); - spin_unlock(&qoriq_ptp->lock); + spin_unlock(&ptp_qoriq->lock); irqs = val & mask; if (irqs & ETS1) { ack |= ETS1; - extts_clean_up(qoriq_ptp, 0, true); + extts_clean_up(ptp_qoriq, 0, true); } if (irqs & ETS2) { ack |= ETS2; - extts_clean_up(qoriq_ptp, 1, true); + extts_clean_up(ptp_qoriq, 1, true); } if (irqs & ALM2) { ack |= ALM2; - if (qoriq_ptp->alarm_value) { + if (ptp_qoriq->alarm_value) { event.type = PTP_CLOCK_ALARM; event.index = 0; - event.timestamp = qoriq_ptp->alarm_value; - ptp_clock_event(qoriq_ptp->clock, &event); + event.timestamp = ptp_qoriq->alarm_value; + ptp_clock_event(ptp_qoriq->clock, &event); } - if (qoriq_ptp->alarm_interval) { - ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval; + if (ptp_qoriq->alarm_interval) { + ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); - qoriq_ptp->alarm_value = ns; + ptp_qoriq->alarm_value = ns; } else { - spin_lock(&qoriq_ptp->lock); + spin_lock(&ptp_qoriq->lock); mask = qoriq_read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock(&qoriq_ptp->lock); - qoriq_ptp->alarm_value = 0; - qoriq_ptp->alarm_interval = 0; + spin_unlock(&ptp_qoriq->lock); + ptp_qoriq->alarm_value = 0; + ptp_qoriq->alarm_interval = 0; } } if (irqs & PP1) { ack |= PP1; event.type = PTP_CLOCK_PPS; - ptp_clock_event(qoriq_ptp->clock, &event); + ptp_clock_event(ptp_qoriq->clock, &event); } if (ack) { @@ -210,14 +210,14 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) u64 adj, diff; u32 tmr_add; int neg_adj = 0; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; if (scaled_ppm < 0) { neg_adj = 1; scaled_ppm = -scaled_ppm; } - tmr_add = qoriq_ptp->tmr_add; + tmr_add = ptp_qoriq->tmr_add; adj = tmr_add; /* calculate diff as adj*(scaled_ppm/65536)/1000000 @@ -238,16 +238,16 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) { s64 now; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - now = tmr_cnt_read(qoriq_ptp); + now = tmr_cnt_read(ptp_qoriq); now += delta; - tmr_cnt_write(qoriq_ptp, now); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, now); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -257,13 +257,13 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - ns = tmr_cnt_read(qoriq_ptp); + ns = tmr_cnt_read(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); *ts = ns_to_timespec64(ns); @@ -275,16 +275,16 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, { u64 ns; unsigned long flags; - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); ns = timespec64_to_ns(ts); - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - tmr_cnt_write(qoriq_ptp, ns); - set_fipers(qoriq_ptp); + tmr_cnt_write(ptp_qoriq, ns); + set_fipers(ptp_qoriq); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -292,8 +292,8 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, static int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { - struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; unsigned long flags; u32 bit, mask = 0; @@ -311,7 +311,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, } if (on) - extts_clean_up(qoriq_ptp, rq->extts.index, false); + extts_clean_up(ptp_qoriq, rq->extts.index, false); break; case PTP_CLK_REQ_PPS: @@ -321,7 +321,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); mask = qoriq_read(®s->ctrl_regs->tmr_temask); if (on) { @@ -333,7 +333,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } @@ -354,7 +354,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { }; /** - * qoriq_ptp_nominal_freq - calculate nominal frequency according to + * ptp_qoriq_nominal_freq - calculate nominal frequency according to * reference clock frequency * * @clk_src: reference clock frequency @@ -365,7 +365,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = { * * Return the nominal frequency */ -static u32 qoriq_ptp_nominal_freq(u32 clk_src) +static u32 ptp_qoriq_nominal_freq(u32 clk_src) { u32 remainder = 0; @@ -385,9 +385,9 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) } /** - * qoriq_ptp_auto_config - calculate a set of default configurations + * ptp_qoriq_auto_config - calculate a set of default configurations * - * @qoriq_ptp: pointer to qoriq_ptp + * @ptp_qoriq: pointer to ptp_qoriq * @node: pointer to device_node * * If below dts properties are not provided, this function will be @@ -401,7 +401,7 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src) * * Return 0 if success */ -static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, +static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, struct device_node *node) { struct clk *clk; @@ -411,7 +411,7 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, u32 remainder = 0; u32 clk_src = 0; - qoriq_ptp->cksel = DEFAULT_CKSEL; + ptp_qoriq->cksel = DEFAULT_CKSEL; clk = of_clk_get(node, 0); if (!IS_ERR(clk)) { @@ -424,12 +424,12 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, return -EINVAL; } - nominal_freq = qoriq_ptp_nominal_freq(clk_src); + nominal_freq = ptp_qoriq_nominal_freq(clk_src); if (!nominal_freq) return -EINVAL; - qoriq_ptp->tclk_period = 1000000000UL / nominal_freq; - qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC; + ptp_qoriq->tclk_period = 1000000000UL / nominal_freq; + ptp_qoriq->tmr_prsc = DEFAULT_TMR_PRSC; /* Calculate initial frequency compensation value for TMR_ADD register. * freq_comp = ceil(2^32 / freq_ratio) @@ -440,171 +440,171 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp, if (remainder) freq_comp++; - qoriq_ptp->tmr_add = freq_comp; - qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period; - qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period; + ptp_qoriq->tmr_add = freq_comp; + ptp_qoriq->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - ptp_qoriq->tclk_period; + ptp_qoriq->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - ptp_qoriq->tclk_period; /* max_adj = 1000000000 * (freq_ratio - 1.0) - 1 * freq_ratio = reference_clock_freq / nominal_freq */ max_adj = 1000000000ULL * (clk_src - nominal_freq); max_adj = div_u64(max_adj, nominal_freq) - 1; - qoriq_ptp->caps.max_adj = max_adj; + ptp_qoriq->caps.max_adj = max_adj; return 0; } -static int qoriq_ptp_probe(struct platform_device *dev) +static int ptp_qoriq_probe(struct platform_device *dev) { struct device_node *node = dev->dev.of_node; - struct qoriq_ptp *qoriq_ptp; - struct qoriq_ptp_registers *regs; + struct ptp_qoriq *ptp_qoriq; + struct ptp_qoriq_registers *regs; struct timespec64 now; int err = -ENOMEM; u32 tmr_ctrl; unsigned long flags; void __iomem *base; - qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL); - if (!qoriq_ptp) + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) goto no_memory; err = -EINVAL; - qoriq_ptp->dev = &dev->dev; - qoriq_ptp->caps = ptp_qoriq_caps; + ptp_qoriq->dev = &dev->dev; + ptp_qoriq->caps = ptp_qoriq_caps; - if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) - qoriq_ptp->cksel = DEFAULT_CKSEL; + if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) + ptp_qoriq->cksel = DEFAULT_CKSEL; if (of_property_read_bool(node, "fsl,extts-fifo")) - qoriq_ptp->extts_fifo_support = true; + ptp_qoriq->extts_fifo_support = true; else - qoriq_ptp->extts_fifo_support = false; + ptp_qoriq->extts_fifo_support = false; if (of_property_read_u32(node, - "fsl,tclk-period", &qoriq_ptp->tclk_period) || + "fsl,tclk-period", &ptp_qoriq->tclk_period) || of_property_read_u32(node, - "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) || + "fsl,tmr-prsc", &ptp_qoriq->tmr_prsc) || of_property_read_u32(node, - "fsl,tmr-add", &qoriq_ptp->tmr_add) || + "fsl,tmr-add", &ptp_qoriq->tmr_add) || of_property_read_u32(node, - "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) || + "fsl,tmr-fiper1", &ptp_qoriq->tmr_fiper1) || of_property_read_u32(node, - "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) || + "fsl,tmr-fiper2", &ptp_qoriq->tmr_fiper2) || of_property_read_u32(node, - "fsl,max-adj", &qoriq_ptp->caps.max_adj)) { + "fsl,max-adj", &ptp_qoriq->caps.max_adj)) { pr_warn("device tree node missing required elements, try automatic configuration\n"); - if (qoriq_ptp_auto_config(qoriq_ptp, node)) + if (ptp_qoriq_auto_config(ptp_qoriq, node)) goto no_config; } err = -ENODEV; - qoriq_ptp->irq = platform_get_irq(dev, 0); + ptp_qoriq->irq = platform_get_irq(dev, 0); - if (qoriq_ptp->irq < 0) { + if (ptp_qoriq->irq < 0) { pr_err("irq not in device tree\n"); goto no_node; } - if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) { + if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) { pr_err("request_irq failed\n"); goto no_node; } - qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!qoriq_ptp->rsrc) { + ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!ptp_qoriq->rsrc) { pr_err("no resource\n"); goto no_resource; } - if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) { + if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { pr_err("resource busy\n"); goto no_resource; } - spin_lock_init(&qoriq_ptp->lock); + spin_lock_init(&ptp_qoriq->lock); - base = ioremap(qoriq_ptp->rsrc->start, - resource_size(qoriq_ptp->rsrc)); + base = ioremap(ptp_qoriq->rsrc->start, + resource_size(ptp_qoriq->rsrc)); if (!base) { pr_err("ioremap ptp registers failed\n"); goto no_ioremap; } - qoriq_ptp->base = base; + ptp_qoriq->base = base; if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { - qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; + ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; } else { - qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET; - qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET; - qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET; - qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET; + ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET; } ktime_get_real_ts64(&now); - ptp_qoriq_settime(&qoriq_ptp->caps, &now); + ptp_qoriq_settime(&ptp_qoriq->caps, &now); tmr_ctrl = - (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | - (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT; + (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | + (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; - spin_lock_irqsave(&qoriq_ptp->lock, flags); + spin_lock_irqsave(&ptp_qoriq->lock, flags); - regs = &qoriq_ptp->regs; + regs = &ptp_qoriq->regs; qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); - qoriq_write(®s->ctrl_regs->tmr_add, qoriq_ptp->tmr_add); - qoriq_write(®s->ctrl_regs->tmr_prsc, qoriq_ptp->tmr_prsc); - qoriq_write(®s->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); - set_alarm(qoriq_ptp); + qoriq_write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); + qoriq_write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); + qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + set_alarm(ptp_qoriq); qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl|FIPERST|RTPE|TE|FRD); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + spin_unlock_irqrestore(&ptp_qoriq->lock, flags); - qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev); - if (IS_ERR(qoriq_ptp->clock)) { - err = PTR_ERR(qoriq_ptp->clock); + ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev); + if (IS_ERR(ptp_qoriq->clock)) { + err = PTR_ERR(ptp_qoriq->clock); goto no_clock; } - qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock); + ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); - ptp_qoriq_create_debugfs(qoriq_ptp); - platform_set_drvdata(dev, qoriq_ptp); + ptp_qoriq_create_debugfs(ptp_qoriq); + platform_set_drvdata(dev, ptp_qoriq); return 0; no_clock: - iounmap(qoriq_ptp->base); + iounmap(ptp_qoriq->base); no_ioremap: - release_resource(qoriq_ptp->rsrc); + release_resource(ptp_qoriq->rsrc); no_resource: - free_irq(qoriq_ptp->irq, qoriq_ptp); + free_irq(ptp_qoriq->irq, ptp_qoriq); no_config: no_node: - kfree(qoriq_ptp); + kfree(ptp_qoriq); no_memory: return err; } -static int qoriq_ptp_remove(struct platform_device *dev) +static int ptp_qoriq_remove(struct platform_device *dev) { - struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev); - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev); + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; qoriq_write(®s->ctrl_regs->tmr_temask, 0); qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); - ptp_qoriq_remove_debugfs(qoriq_ptp); - ptp_clock_unregister(qoriq_ptp->clock); - iounmap(qoriq_ptp->base); - release_resource(qoriq_ptp->rsrc); - free_irq(qoriq_ptp->irq, qoriq_ptp); - kfree(qoriq_ptp); + ptp_qoriq_remove_debugfs(ptp_qoriq); + ptp_clock_unregister(ptp_qoriq->clock); + iounmap(ptp_qoriq->base); + release_resource(ptp_qoriq->rsrc); + free_irq(ptp_qoriq->irq, ptp_qoriq); + kfree(ptp_qoriq); return 0; } @@ -616,16 +616,16 @@ static const struct of_device_id match_table[] = { }; MODULE_DEVICE_TABLE(of, match_table); -static struct platform_driver qoriq_ptp_driver = { +static struct platform_driver ptp_qoriq_driver = { .driver = { .name = "ptp_qoriq", .of_match_table = match_table, }, - .probe = qoriq_ptp_probe, - .remove = qoriq_ptp_remove, + .probe = ptp_qoriq_probe, + .remove = ptp_qoriq_remove, }; -module_platform_driver(qoriq_ptp_driver); +module_platform_driver(ptp_qoriq_driver); MODULE_AUTHOR("Richard Cochran "); MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer"); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c index 970595021088..3a70daf03727 100644 --- a/drivers/ptp/ptp_qoriq_debugfs.c +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -7,8 +7,8 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -19,8 +19,8 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -38,8 +38,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get, static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -50,8 +50,8 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) { - struct qoriq_ptp *qoriq_ptp = data; - struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_qoriq *ptp_qoriq = data; + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); @@ -67,35 +67,35 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get, ptp_qoriq_fiper2_lpbk_set, "%llu\n"); -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { struct dentry *root; - root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL); + root = debugfs_create_dir(dev_name(ptp_qoriq->dev), NULL); if (IS_ERR(root)) return; if (!root) goto err_root; - qoriq_ptp->debugfs_root = root; + ptp_qoriq->debugfs_root = root; if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper1_fops)) + ptp_qoriq, &ptp_qoriq_fiper1_fops)) goto err_node; if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root, - qoriq_ptp, &ptp_qoriq_fiper2_fops)) + ptp_qoriq, &ptp_qoriq_fiper2_fops)) goto err_node; return; err_node: debugfs_remove_recursive(root); - qoriq_ptp->debugfs_root = NULL; + ptp_qoriq->debugfs_root = NULL; err_root: - dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n"); + dev_err(ptp_qoriq->dev, "failed to initialize debugfs\n"); } -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { - debugfs_remove_recursive(qoriq_ptp->debugfs_root); - qoriq_ptp->debugfs_root = NULL; + debugfs_remove_recursive(ptp_qoriq->debugfs_root); + ptp_qoriq->debugfs_root = NULL; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 94e9797e434c..c2a32d9ec6ba 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -49,7 +49,7 @@ struct etts_regs { u32 tmr_etts2_l; /* Timestamp of general purpose external trigger */ }; -struct qoriq_ptp_registers { +struct ptp_qoriq_registers { struct ctrl_regs __iomem *ctrl_regs; struct alarm_regs __iomem *alarm_regs; struct fiper_regs __iomem *fiper_regs; @@ -136,9 +136,9 @@ struct qoriq_ptp_registers { #define DEFAULT_FIPER1_PERIOD 1000000000 #define DEFAULT_FIPER2_PERIOD 100000 -struct qoriq_ptp { +struct ptp_qoriq { void __iomem *base; - struct qoriq_ptp_registers regs; + struct ptp_qoriq_registers regs; spinlock_t lock; /* protects regs */ struct ptp_clock *clock; struct ptp_clock_info caps; @@ -172,12 +172,12 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) } #ifdef CONFIG_DEBUG_FS -void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp); -void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp); +void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); +void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); #else -static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) { } -static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) { } #endif -- cgit v1.2.3 From 73356e4ea895d5d4fb2bed30c32d3293b090f3ce Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:57 +0800 Subject: ptp_qoriq: make ptp operations global This patch is to make functions of ptp operations global, so that ENETC PTP driver which is a PCI driver for same 1588 timer IP block could reuse them. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 27 ++++++++++++++++----------- include/linux/fsl/ptp_qoriq.h | 9 +++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 8c10d0f8864f..1f3e73e62de9 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -135,7 +134,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, * Interrupt service routine */ -static irqreturn_t isr(int irq, void *priv) +irqreturn_t ptp_qoriq_isr(int irq, void *priv) { struct ptp_qoriq *ptp_qoriq = priv; struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; @@ -200,12 +199,13 @@ static irqreturn_t isr(int irq, void *priv) } else return IRQ_NONE; } +EXPORT_SYMBOL_GPL(ptp_qoriq_isr); /* * PTP clock operations */ -static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { u64 adj, diff; u32 tmr_add; @@ -233,8 +233,9 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjfine); -static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) { s64 now; unsigned long flags; @@ -251,9 +252,9 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_adjtime); -static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { u64 ns; unsigned long flags; @@ -269,9 +270,10 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp, return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_gettime); -static int ptp_qoriq_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) { u64 ns; unsigned long flags; @@ -288,9 +290,10 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp, return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_settime); -static int ptp_qoriq_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; @@ -336,6 +339,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; } +EXPORT_SYMBOL_GPL(ptp_qoriq_enable); static const struct ptp_clock_info ptp_qoriq_caps = { .owner = THIS_MODULE, @@ -508,7 +512,8 @@ static int ptp_qoriq_probe(struct platform_device *dev) pr_err("irq not in device tree\n"); goto no_node; } - if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) { + if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, + DRIVER, ptp_qoriq)) { pr_err("request_irq failed\n"); goto no_node; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index c2a32d9ec6ba..75e6f0523cb1 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -7,6 +7,7 @@ #define __PTP_QORIQ_H__ #include +#include #include /* @@ -171,6 +172,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) iowrite32be(val, addr); } +irqreturn_t ptp_qoriq_isr(int irq, void *priv); +int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); +int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); +int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); +int ptp_qoriq_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts); +int ptp_qoriq_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on); #ifdef CONFIG_DEBUG_FS void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); -- cgit v1.2.3 From ff54571a747bc1fc8e132ef9c512451ec6de3336 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:58 +0800 Subject: ptp_qoriq: convert to use ptp_qoriq_init/free Moved QorIQ PTP clock initialization/free into new functions ptp_qoriq_init()/ptp_qoriq_free(). These functions could also be reused by ENETC PTP drvier which is a PCI driver for same 1588 timer IP block. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 144 ++++++++++++++++++++++-------------------- include/linux/fsl/ptp_qoriq.h | 3 + 2 files changed, 80 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 1f3e73e62de9..db4f929ea4e9 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -458,25 +458,17 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, return 0; } -static int ptp_qoriq_probe(struct platform_device *dev) +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps) { - struct device_node *node = dev->dev.of_node; - struct ptp_qoriq *ptp_qoriq; + struct device_node *node = ptp_qoriq->dev->of_node; struct ptp_qoriq_registers *regs; struct timespec64 now; - int err = -ENOMEM; - u32 tmr_ctrl; unsigned long flags; - void __iomem *base; - - ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); - if (!ptp_qoriq) - goto no_memory; - - err = -EINVAL; + u32 tmr_ctrl; - ptp_qoriq->dev = &dev->dev; - ptp_qoriq->caps = ptp_qoriq_caps; + ptp_qoriq->base = base; + ptp_qoriq->caps = caps; if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) ptp_qoriq->cksel = DEFAULT_CKSEL; @@ -501,44 +493,9 @@ static int ptp_qoriq_probe(struct platform_device *dev) pr_warn("device tree node missing required elements, try automatic configuration\n"); if (ptp_qoriq_auto_config(ptp_qoriq, node)) - goto no_config; + return -ENODEV; } - err = -ENODEV; - - ptp_qoriq->irq = platform_get_irq(dev, 0); - - if (ptp_qoriq->irq < 0) { - pr_err("irq not in device tree\n"); - goto no_node; - } - if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, - DRIVER, ptp_qoriq)) { - pr_err("request_irq failed\n"); - goto no_node; - } - - ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (!ptp_qoriq->rsrc) { - pr_err("no resource\n"); - goto no_resource; - } - if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { - pr_err("resource busy\n"); - goto no_resource; - } - - spin_lock_init(&ptp_qoriq->lock); - - base = ioremap(ptp_qoriq->rsrc->start, - resource_size(ptp_qoriq->rsrc)); - if (!base) { - pr_err("ioremap ptp registers failed\n"); - goto no_ioremap; - } - - ptp_qoriq->base = base; - if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; @@ -558,6 +515,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; + spin_lock_init(&ptp_qoriq->lock); spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; @@ -571,16 +529,77 @@ static int ptp_qoriq_probe(struct platform_device *dev) spin_unlock_irqrestore(&ptp_qoriq->lock, flags); - ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev); - if (IS_ERR(ptp_qoriq->clock)) { - err = PTR_ERR(ptp_qoriq->clock); - goto no_clock; - } - ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); + ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, ptp_qoriq->dev); + if (IS_ERR(ptp_qoriq->clock)) + return PTR_ERR(ptp_qoriq->clock); + ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock); ptp_qoriq_create_debugfs(ptp_qoriq); - platform_set_drvdata(dev, ptp_qoriq); + return 0; +} +EXPORT_SYMBOL_GPL(ptp_qoriq_init); + +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq) +{ + struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; + + qoriq_write(®s->ctrl_regs->tmr_temask, 0); + qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + + ptp_qoriq_remove_debugfs(ptp_qoriq); + ptp_clock_unregister(ptp_qoriq->clock); + iounmap(ptp_qoriq->base); + free_irq(ptp_qoriq->irq, ptp_qoriq); +} +EXPORT_SYMBOL_GPL(ptp_qoriq_free); + +static int ptp_qoriq_probe(struct platform_device *dev) +{ + struct ptp_qoriq *ptp_qoriq; + int err = -ENOMEM; + void __iomem *base; + ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) + goto no_memory; + + ptp_qoriq->dev = &dev->dev; + + err = -ENODEV; + + ptp_qoriq->irq = platform_get_irq(dev, 0); + if (ptp_qoriq->irq < 0) { + pr_err("irq not in device tree\n"); + goto no_node; + } + if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED, + DRIVER, ptp_qoriq)) { + pr_err("request_irq failed\n"); + goto no_node; + } + + ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!ptp_qoriq->rsrc) { + pr_err("no resource\n"); + goto no_resource; + } + if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) { + pr_err("resource busy\n"); + goto no_resource; + } + + base = ioremap(ptp_qoriq->rsrc->start, + resource_size(ptp_qoriq->rsrc)); + if (!base) { + pr_err("ioremap ptp registers failed\n"); + goto no_ioremap; + } + + err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps); + if (err) + goto no_clock; + + platform_set_drvdata(dev, ptp_qoriq); return 0; no_clock: @@ -589,7 +608,6 @@ no_ioremap: release_resource(ptp_qoriq->rsrc); no_resource: free_irq(ptp_qoriq->irq, ptp_qoriq); -no_config: no_node: kfree(ptp_qoriq); no_memory: @@ -599,18 +617,10 @@ no_memory: static int ptp_qoriq_remove(struct platform_device *dev) { struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev); - struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_temask, 0); - qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); - - ptp_qoriq_remove_debugfs(ptp_qoriq); - ptp_clock_unregister(ptp_qoriq->clock); - iounmap(ptp_qoriq->base); + ptp_qoriq_free(ptp_qoriq); release_resource(ptp_qoriq->rsrc); - free_irq(ptp_qoriq->irq, ptp_qoriq); kfree(ptp_qoriq); - return 0; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 75e6f0523cb1..757aec385493 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -173,6 +173,9 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) } irqreturn_t ptp_qoriq_isr(int irq, void *priv); +int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, + const struct ptp_clock_info caps); +void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq); int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); -- cgit v1.2.3 From f038ddf25b80be90e1af9439935bdb66fdbf5e28 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:23:59 +0800 Subject: ptp_qoriq: add little enadian support There is QorIQ 1588 timer IP block on the new ENETC Ethernet controller. However it uses little endian mode which is different with before. This patch is to add little endian support for the driver by using "little-endian" dts node property. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 69 +++++++++++++++++++++++------------------ drivers/ptp/ptp_qoriq_debugfs.c | 12 +++---- include/linux/fsl/ptp_qoriq.h | 21 +++++++++---- 3 files changed, 60 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index db4f929ea4e9..ed4dc398c57b 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -43,8 +43,8 @@ static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq) u64 ns; u32 lo, hi; - lo = qoriq_read(®s->ctrl_regs->tmr_cnt_l); - hi = qoriq_read(®s->ctrl_regs->tmr_cnt_h); + lo = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_l); + hi = ptp_qoriq->read(®s->ctrl_regs->tmr_cnt_h); ns = ((u64) hi) << 32; ns |= lo; return ns; @@ -57,8 +57,8 @@ static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns) u32 hi = ns >> 32; u32 lo = ns & 0xffffffff; - qoriq_write(®s->ctrl_regs->tmr_cnt_l, lo); - qoriq_write(®s->ctrl_regs->tmr_cnt_h, hi); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_l, lo); + ptp_qoriq->write(®s->ctrl_regs->tmr_cnt_h, hi); } /* Caller must hold ptp_qoriq->lock. */ @@ -73,8 +73,8 @@ static void set_alarm(struct ptp_qoriq *ptp_qoriq) ns -= ptp_qoriq->tclk_period; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm1_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm1_h, hi); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm1_h, hi); } /* Caller must hold ptp_qoriq->lock. */ @@ -83,8 +83,8 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; set_alarm(ptp_qoriq); - qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); } static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, @@ -115,8 +115,8 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, event.index = index; do { - lo = qoriq_read(reg_etts_l); - hi = qoriq_read(reg_etts_h); + lo = ptp_qoriq->read(reg_etts_l); + hi = ptp_qoriq->read(reg_etts_h); if (update_event) { event.timestamp = ((u64) hi) << 32; @@ -124,7 +124,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, ptp_clock_event(ptp_qoriq->clock, &event); } - stat = qoriq_read(®s->ctrl_regs->tmr_stat); + stat = ptp_qoriq->read(®s->ctrl_regs->tmr_stat); } while (ptp_qoriq->extts_fifo_support && (stat & valid)); return 0; @@ -144,8 +144,8 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) spin_lock(&ptp_qoriq->lock); - val = qoriq_read(®s->ctrl_regs->tmr_tevent); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + val = ptp_qoriq->read(®s->ctrl_regs->tmr_tevent); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); spin_unlock(&ptp_qoriq->lock); @@ -173,14 +173,14 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; - qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); - qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_l, lo); + ptp_qoriq->write(®s->alarm_regs->tmr_alarm2_h, hi); ptp_qoriq->alarm_value = ns; } else { spin_lock(&ptp_qoriq->lock); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; - qoriq_write(®s->ctrl_regs->tmr_temask, mask); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); spin_unlock(&ptp_qoriq->lock); ptp_qoriq->alarm_value = 0; ptp_qoriq->alarm_interval = 0; @@ -194,7 +194,7 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv) } if (ack) { - qoriq_write(®s->ctrl_regs->tmr_tevent, ack); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, ack); return IRQ_HANDLED; } else return IRQ_NONE; @@ -229,7 +229,7 @@ int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff; - qoriq_write(®s->ctrl_regs->tmr_add, tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, tmr_add); return 0; } @@ -326,15 +326,15 @@ int ptp_qoriq_enable(struct ptp_clock_info *ptp, spin_lock_irqsave(&ptp_qoriq->lock, flags); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); + mask = ptp_qoriq->read(®s->ctrl_regs->tmr_temask); if (on) { mask |= bit; - qoriq_write(®s->ctrl_regs->tmr_tevent, bit); + ptp_qoriq->write(®s->ctrl_regs->tmr_tevent, bit); } else { mask &= ~bit; } - qoriq_write(®s->ctrl_regs->tmr_temask, mask); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, mask); spin_unlock_irqrestore(&ptp_qoriq->lock, flags); return 0; @@ -496,6 +496,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, return -ENODEV; } + if (of_property_read_bool(node, "little-endian")) { + ptp_qoriq->read = qoriq_read_le; + ptp_qoriq->write = qoriq_write_le; + } else { + ptp_qoriq->read = qoriq_read_be; + ptp_qoriq->write = qoriq_write_be; + } + if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; @@ -519,13 +527,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); - qoriq_write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); - qoriq_write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); - qoriq_write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); - qoriq_write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_add, ptp_qoriq->tmr_add); + ptp_qoriq->write(®s->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1); + ptp_qoriq->write(®s->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2); set_alarm(ptp_qoriq); - qoriq_write(®s->ctrl_regs->tmr_ctrl, tmr_ctrl|FIPERST|RTPE|TE|FRD); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, + tmr_ctrl|FIPERST|RTPE|TE|FRD); spin_unlock_irqrestore(&ptp_qoriq->lock, flags); @@ -543,8 +552,8 @@ void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq) { struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; - qoriq_write(®s->ctrl_regs->tmr_temask, 0); - qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + ptp_qoriq->write(®s->ctrl_regs->tmr_temask, 0); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, 0); ptp_qoriq_remove_debugfs(ptp_qoriq); ptp_clock_unregister(ptp_qoriq->clock); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c index 3a70daf03727..e8dddcedf288 100644 --- a/drivers/ptp/ptp_qoriq_debugfs.c +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -11,7 +11,7 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP1L ? 1 : 0; return 0; @@ -23,13 +23,13 @@ static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP1L; else ctrl |= PP1L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } @@ -42,7 +42,7 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); *val = ctrl & PP2L ? 1 : 0; return 0; @@ -54,13 +54,13 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) struct ptp_qoriq_registers *regs = &ptp_qoriq->regs; u32 ctrl; - ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + ctrl = ptp_qoriq->read(®s->ctrl_regs->tmr_ctrl); if (val == 0) ctrl &= ~PP2L; else ctrl |= PP2L; - qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + ptp_qoriq->write(®s->ctrl_regs->tmr_ctrl, ctrl); return 0; } diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 757aec385493..1f8bb6a6a121 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -157,21 +157,30 @@ struct ptp_qoriq { u32 cksel; u32 tmr_fiper1; u32 tmr_fiper2; + u32 (*read)(unsigned __iomem *addr); + void (*write)(unsigned __iomem *addr, u32 val); }; -static inline u32 qoriq_read(unsigned __iomem *addr) +static inline u32 qoriq_read_be(unsigned __iomem *addr) { - u32 val; - - val = ioread32be(addr); - return val; + return ioread32be(addr); } -static inline void qoriq_write(unsigned __iomem *addr, u32 val) +static inline void qoriq_write_be(unsigned __iomem *addr, u32 val) { iowrite32be(val, addr); } +static inline u32 qoriq_read_le(unsigned __iomem *addr) +{ + return ioread32(addr); +} + +static inline void qoriq_write_le(unsigned __iomem *addr, u32 val) +{ + iowrite32(val, addr); +} + irqreturn_t ptp_qoriq_isr(int irq, void *priv); int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, const struct ptp_clock_info caps); -- cgit v1.2.3 From d4e176870bffde373d9688c54aad8f92b3394ba6 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 12 Feb 2019 12:24:01 +0800 Subject: ptp_qoriq: fix register memory map The 1588 timer on eTSEC Ethernet controller uses different register memory map with DPAA Ethernet controller. Now the new ENETC Ethernet controller uses same reigster memory map with DPAA. To support ENETC, let's use register memory map of DPAA/ENETC in default. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_qoriq.c | 11 ++++++----- include/linux/fsl/ptp_qoriq.h | 18 +++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index ed4dc398c57b..42d3654f77f0 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -504,11 +504,12 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, ptp_qoriq->write = qoriq_write_be; } - if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) { - ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET; - ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET; - ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET; - ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET; + /* The eTSEC uses differnt memory map with DPAA/ENETC */ + if (of_device_is_compatible(node, "fsl,etsec-ptp")) { + ptp_qoriq->regs.ctrl_regs = base + ETSEC_CTRL_REGS_OFFSET; + ptp_qoriq->regs.alarm_regs = base + ETSEC_ALARM_REGS_OFFSET; + ptp_qoriq->regs.fiper_regs = base + ETSEC_FIPER_REGS_OFFSET; + ptp_qoriq->regs.etts_regs = base + ETSEC_ETTS_REGS_OFFSET; } else { ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET; ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 1f8bb6a6a121..f127adb71041 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -58,15 +58,15 @@ struct ptp_qoriq_registers { }; /* Offset definitions for the four register groups */ -#define CTRL_REGS_OFFSET 0x0 -#define ALARM_REGS_OFFSET 0x40 -#define FIPER_REGS_OFFSET 0x80 -#define ETTS_REGS_OFFSET 0xa0 - -#define FMAN_CTRL_REGS_OFFSET 0x80 -#define FMAN_ALARM_REGS_OFFSET 0xb8 -#define FMAN_FIPER_REGS_OFFSET 0xd0 -#define FMAN_ETTS_REGS_OFFSET 0xe0 +#define ETSEC_CTRL_REGS_OFFSET 0x0 +#define ETSEC_ALARM_REGS_OFFSET 0x40 +#define ETSEC_FIPER_REGS_OFFSET 0x80 +#define ETSEC_ETTS_REGS_OFFSET 0xa0 + +#define CTRL_REGS_OFFSET 0x80 +#define ALARM_REGS_OFFSET 0xb8 +#define FIPER_REGS_OFFSET 0xd0 +#define ETTS_REGS_OFFSET 0xe0 /* Bit definitions for the TMR_CTRL register */ -- cgit v1.2.3 From a4eaed9f9a895b16bb2c54e0ff6b3c99404fec92 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:26 +0100 Subject: net: phy: Mask-out non-compatible modes when setting the max-speed When setting a PHY's max speed using either the max-speed DT property or ethtool, we should mask-out all non-compatible modes according to the settings table, instead of just the 10/100BASET modes. Signed-off-by: Maxime Chevallier Suggested-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 45 +++++++++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 53 -------------------------------------------- include/linux/phy.h | 1 + 3 files changed, 46 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index cdea028d1328..855abf487279 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -4,6 +4,7 @@ */ #include #include +#include const char *phy_speed_to_str(int speed) { @@ -338,6 +339,50 @@ size_t phy_speeds(unsigned int *speeds, size_t size, return count; } +static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) +{ + const struct phy_setting *p; + int i; + + for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) { + if (p->speed > max_speed) + linkmode_clear_bit(p->bit, phydev->supported); + else + break; + } + + return 0; +} + +int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) +{ + int err; + + err = __set_phy_supported(phydev, max_speed); + if (err) + return err; + + linkmode_copy(phydev->advertising, phydev->supported); + + return 0; +} +EXPORT_SYMBOL(phy_set_max_speed); + +void of_set_phy_supported(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 max_speed; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (!of_property_read_u32(node, "max-speed", &max_speed)) + __set_phy_supported(phydev, max_speed); +} + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2c61282a2726..64497ec293e1 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1949,44 +1949,6 @@ int genphy_loopback(struct phy_device *phydev, bool enable) } EXPORT_SYMBOL(genphy_loopback); -static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) -{ - switch (max_speed) { - case SPEED_10: - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - phydev->supported); - /* fall through */ - case SPEED_100: - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported); - break; - case SPEED_1000: - break; - default: - return -ENOTSUPP; - } - - return 0; -} - -int phy_set_max_speed(struct phy_device *phydev, u32 max_speed) -{ - int err; - - err = __set_phy_supported(phydev, max_speed); - if (err) - return err; - - linkmode_copy(phydev->advertising, phydev->supported); - - return 0; -} -EXPORT_SYMBOL(phy_set_max_speed); - /** * phy_remove_link_mode - Remove a supported link mode * @phydev: phy_device structure to remove link mode from @@ -2117,21 +2079,6 @@ bool phy_validate_pause(struct phy_device *phydev, } EXPORT_SYMBOL(phy_validate_pause); -static void of_set_phy_supported(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 max_speed; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (!of_property_read_u32(node, "max-speed", &max_speed)) - __set_phy_supported(phydev, max_speed); -} - static void of_set_phy_eee_broken(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; diff --git a/include/linux/phy.h b/include/linux/phy.h index 378da9a6165e..20344c7744d8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -673,6 +673,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, bool exact); size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); +void of_set_phy_supported(struct phy_device *phydev); static inline bool __phy_is_started(struct phy_device *phydev) { -- cgit v1.2.3 From 3feb9b23bf4cbf9f34568035170c6f1c25416523 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:27 +0100 Subject: net: phy: Move of_set_phy_eee_broken to phy-core.c Since of_set_phy_supported was moved to phy-core.c, we can also move of_set_phy_eee_broken to the same location, so that we have all OF functions in the same place. This patch doesn't intend to introduce any change in behaviour. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 27 +++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 28 ---------------------------- include/linux/phy.h | 1 + 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 855abf487279..de58a59815d5 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -383,6 +383,33 @@ void of_set_phy_supported(struct phy_device *phydev) __set_phy_supported(phydev, max_speed); } +void of_set_phy_eee_broken(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + u32 broken = 0; + + if (!IS_ENABLED(CONFIG_OF_MDIO)) + return; + + if (!node) + return; + + if (of_property_read_bool(node, "eee-broken-100tx")) + broken |= MDIO_EEE_100TX; + if (of_property_read_bool(node, "eee-broken-1000t")) + broken |= MDIO_EEE_1000T; + if (of_property_read_bool(node, "eee-broken-10gt")) + broken |= MDIO_EEE_10GT; + if (of_property_read_bool(node, "eee-broken-1000kx")) + broken |= MDIO_EEE_1000KX; + if (of_property_read_bool(node, "eee-broken-10gkx4")) + broken |= MDIO_EEE_10GKX4; + if (of_property_read_bool(node, "eee-broken-10gkr")) + broken |= MDIO_EEE_10GKR; + + phydev->eee_broken_modes = broken; +} + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 64497ec293e1..a752de2fff5e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -30,7 +30,6 @@ #include #include #include -#include MODULE_DESCRIPTION("PHY library"); MODULE_AUTHOR("Andy Fleming"); @@ -2079,33 +2078,6 @@ bool phy_validate_pause(struct phy_device *phydev, } EXPORT_SYMBOL(phy_validate_pause); -static void of_set_phy_eee_broken(struct phy_device *phydev) -{ - struct device_node *node = phydev->mdio.dev.of_node; - u32 broken = 0; - - if (!IS_ENABLED(CONFIG_OF_MDIO)) - return; - - if (!node) - return; - - if (of_property_read_bool(node, "eee-broken-100tx")) - broken |= MDIO_EEE_100TX; - if (of_property_read_bool(node, "eee-broken-1000t")) - broken |= MDIO_EEE_1000T; - if (of_property_read_bool(node, "eee-broken-10gt")) - broken |= MDIO_EEE_10GT; - if (of_property_read_bool(node, "eee-broken-1000kx")) - broken |= MDIO_EEE_1000KX; - if (of_property_read_bool(node, "eee-broken-10gkx4")) - broken |= MDIO_EEE_10GKX4; - if (of_property_read_bool(node, "eee-broken-10gkr")) - broken |= MDIO_EEE_10GKR; - - phydev->eee_broken_modes = broken; -} - static bool phy_drv_supports_irq(struct phy_driver *phydrv) { return phydrv->config_intr && phydrv->ack_interrupt; diff --git a/include/linux/phy.h b/include/linux/phy.h index 20344c7744d8..1a1d93a2a906 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -674,6 +674,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); void of_set_phy_supported(struct phy_device *phydev); +void of_set_phy_eee_broken(struct phy_device *phydev); static inline bool __phy_is_started(struct phy_device *phydev) { -- cgit v1.2.3 From ac3f5533343f6ec7fa24a27f0ae22bbfd27e0b23 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 11 Feb 2019 15:25:28 +0100 Subject: net: phy: Extract genphy_c45_pma_read_abilities from marvell10g Marvell 10G PHY driver has a generic way of initializing the supported link modes by reading the PHY's C45 PMA abilities. This can be made generic, since these registers are part of the 802.3 specifications. This commit extracts the config_init link_mode initialization code from marvell10g and uses it to introduce the genphy_c45_pma_read_abilities function. Only PMA modes are read, it's still up to the caller to set the Pause parameters. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 78 +++++--------------------------------------- drivers/net/phy/phy-c45.c | 74 +++++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 3 files changed, 83 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 08362dc657cd..496805c0ddfe 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -233,8 +233,7 @@ static int mv3310_resume(struct phy_device *phydev) static int mv3310_config_init(struct phy_device *phydev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; - int val; + int ret, val; /* Check that the PHY interface type is compatible */ if (phydev->interface != PHY_INTERFACE_MODE_SGMII && @@ -243,8 +242,8 @@ static int mv3310_config_init(struct phy_device *phydev) phydev->interface != PHY_INTERFACE_MODE_10GKR) return -ENODEV; - __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported); + __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported); if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); @@ -252,74 +251,13 @@ static int mv3310_config_init(struct phy_device *phydev) return val; if (val & MDIO_AN_STAT1_ABLE) - __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); + __set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->supported); } - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); - if (val < 0) - return val; - - /* Ethtool does not support the WAN mode bits */ - if (val & (MDIO_PMA_STAT2_10GBSR | MDIO_PMA_STAT2_10GBLR | - MDIO_PMA_STAT2_10GBER | MDIO_PMA_STAT2_10GBLX4 | - MDIO_PMA_STAT2_10GBSW | MDIO_PMA_STAT2_10GBLW | - MDIO_PMA_STAT2_10GBEW)) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBSR) - __set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBLR) - __set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported); - if (val & MDIO_PMA_STAT2_10GBER) - __set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported); - - if (val & MDIO_PMA_STAT2_EXTABLE) { - val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); - if (val < 0) - return val; - - if (val & (MDIO_PMA_EXTABLE_10GBT | MDIO_PMA_EXTABLE_1000BT | - MDIO_PMA_EXTABLE_100BTX | MDIO_PMA_EXTABLE_10BT)) - __set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported); - if (val & (MDIO_PMA_EXTABLE_10GBKX4 | MDIO_PMA_EXTABLE_10GBKR | - MDIO_PMA_EXTABLE_1000BKX)) - __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, supported); - if (val & MDIO_PMA_EXTABLE_10GBLRM) - __set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBT) - __set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKX4) - __set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_10GBKR) - __set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BT) - __set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_1000BKX) - __set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, - supported); - if (val & MDIO_PMA_EXTABLE_100BTX) { - __set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, - supported); - } - if (val & MDIO_PMA_EXTABLE_10BT) { - __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, - supported); - __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, - supported); - } - } - - linkmode_copy(phydev->supported, supported); - linkmode_and(phydev->advertising, phydev->advertising, - phydev->supported); + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; return 0; } diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index eff9e5a4d831..6f028de4dae1 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -271,6 +271,80 @@ int genphy_c45_read_mdix(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); +/** + * genphy_c45_pma_read_abilities - read supported link modes from PMA + * @phydev: target phy_device struct + * + * Read the supported link modes from the PMA Status 2 (1.8) register. If bit + * 1.8.9 is set, the list of supported modes is build using the values in the + * PMA Extended Abilities (1.11) register, indicating 1000BASET an 10G related + * modes. If bit 1.11.14 is set, then the list is also extended with the modes + * in the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and + * 5GBASET are supported. + */ +int genphy_c45_pma_read_abilities(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBSR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBLR); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, + phydev->supported, + val & MDIO_PMA_STAT2_10GBER); + + if (val & MDIO_PMA_STAT2_EXTABLE) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBLRM); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKX4); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10GBKR); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_1000BKX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_100BTX); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + phydev->supported, + val & MDIO_PMA_EXTABLE_10BT); + } + + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities); + /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev) diff --git a/include/linux/phy.h b/include/linux/phy.h index 1a1d93a2a906..177a330d84e5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1116,6 +1116,7 @@ int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); +int genphy_c45_pma_read_abilities(struct phy_device *phydev); /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); -- cgit v1.2.3 From c25fff7171bebb76243ccc77f0f04aafa3db87be Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 13 Feb 2019 02:55:40 +0100 Subject: mm: add dma_addr_t to struct page The page_pool API is using page->private to store DMA addresses. As pointed out by David Miller we can't use that on 32-bit architectures with 64-bit DMA This patch adds a new dma_addr_t struct to allow storing DMA addresses Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Acked-by: Andrew Morton Signed-off-by: David S. Miller --- include/linux/mm_types.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2c471a2c43fa..0a36a22228e7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -95,6 +95,13 @@ struct page { */ unsigned long private; }; + struct { /* page_pool used by netstack */ + /** + * @dma_addr: might require a 64-bit value even on + * 32-bit architectures. + */ + dma_addr_t dma_addr; + }; struct { /* slab, slob and slub */ union { struct list_head slab_list; /* uses lru */ -- cgit v1.2.3 From a2fc9d7e36f6d484d9be4a0a204400aaf6059544 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 13 Feb 2019 20:11:40 +0100 Subject: net: phy: don't use locking in phy_is_started Russell suggested to remove the locking from phy_is_started() because the read is atomic anyway and actually the locking may be more misleading. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Suggested-by: Russell King - ARM Linux admin Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 11 +++++------ include/linux/phy.h | 15 +-------------- 2 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ca5e0c0f018c..602816d70281 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -553,7 +553,7 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (__phy_is_started(phydev)) { + if (phy_is_started(phydev)) { if (phydev->autoneg == AUTONEG_ENABLE) { err = phy_check_link_status(phydev); } else { @@ -709,7 +709,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (__phy_is_started(phydev)) + if (phy_is_started(phydev)) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); } @@ -839,15 +839,14 @@ EXPORT_SYMBOL(phy_stop_interrupts); */ void phy_stop(struct phy_device *phydev) { - mutex_lock(&phydev->lock); - - if (!__phy_is_started(phydev)) { + if (!phy_is_started(phydev)) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); - mutex_unlock(&phydev->lock); return; } + mutex_lock(&phydev->lock); + if (phy_interrupt_is_valid(phydev)) phy_disable_interrupts(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index ef20aeea10cc..127fcc9c3778 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -674,26 +674,13 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask, size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); -static inline bool __phy_is_started(struct phy_device *phydev) -{ - WARN_ON(!mutex_is_locked(&phydev->lock)); - - return phydev->state >= PHY_UP; -} - /** * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct */ static inline bool phy_is_started(struct phy_device *phydev) { - bool started; - - mutex_lock(&phydev->lock); - started = __phy_is_started(phydev); - mutex_unlock(&phydev->lock); - - return started; + return phydev->state >= PHY_UP; } void phy_resolve_aneg_linkmode(struct phy_device *phydev); -- cgit v1.2.3 From 20bbf22a622178db71fb8bea5f9000d6f346185a Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:32 -0800 Subject: net/mlx5: Use void pointer as the type in address_of macro Better to use void * and avoid unnecessary casts. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0845a227a7b2..46223efa1877 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,7 +67,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ -- cgit v1.2.3 From 7e4c4330a3bc7f34f82b5bc370821e1e16d425fb Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:33 -0800 Subject: net/mlx5: Use consistent vport num argument type Use u16 for vport number, which matches how hardware refers to this argument throughout commands. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 8 +++--- include/linux/mlx5/vport.h | 8 +++--- 3 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a44ea7b85614..d7382892e81c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -52,7 +52,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -115,7 +115,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -152,7 +152,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -215,7 +215,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -224,7 +224,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -237,7 +237,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -377,7 +377,7 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; /* Skip mlx5_mpfs_add_mac for PFs, @@ -409,7 +409,7 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; /* Skip mlx5_mpfs_del_mac for PFs, @@ -438,7 +438,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + u16 vport_idx = 0; for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { struct mlx5_vport *vport = &esw->vports[vport_idx]; @@ -485,7 +485,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -525,7 +525,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -564,7 +564,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -599,7 +599,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; @@ -686,7 +686,7 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; struct l2addr_node *node; @@ -721,7 +721,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; @@ -764,7 +764,7 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; int promisc_all = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9b150ce9d315..9a928eb48522 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -255,7 +255,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +373,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +526,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +827,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 9c694808c212..1654b911cdb2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -60,7 +60,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid); + u16 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, @@ -78,7 +78,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size); @@ -87,7 +87,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int list_size); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all); @@ -96,7 +96,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_mc, int promisc_all); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size); int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 591905ba96796e3b677b14fa79f27127bfaab4ab Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:35 -0800 Subject: net/mlx5: Introduce Mellanox SmartNIC and modify page management logic Mellanox's SmartNIC combines embedded CPU(e.g, ARM) processing power with advanced network offloads to accelerate a multitude of security, networking and storage applications. With the introduction of the SmartNIC, there is a new PCI function called Embedded CPU Physical Function(ECPF). And it's possible for a PF to get its ICM pages from the ECPF PCI function. Driver shall identify if it is running on such a function by reading a bit in the initialization segment. When firmware asks for pages, it would issue a page request event specifying how many pages it requests and for which function. That driver responds with a manage_pages command providing the requested pages along with an indication for which function it is providing these pages. The encoding before this patch was as follows: function_id == 0: pages are requested for the function receiving the EQE. function_id != 0: pages are requested for VF identified by the function_id value A new one bit field in the EQE identifies that pages are requested for the ECPF. The notion of page_supplier can be introduced here and to support that, manage pages and query pages were modified so firmware can distinguish the following cases: 1. Function provides pages for itself 2. PF provides pages for its VF 3. ECPF provides pages to itself 4. ECPF provides pages for another function This distinction is possible through the introduction of the bit "embedded_cpu_function" in query_pages, manage_pages and page request EQE. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 9 ++++ drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 25 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 54 +++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/driver.h | 9 +++- include/linux/mlx5/mlx5_ifc.h | 12 +++-- 10 files changed, 94 insertions(+), 25 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 0257731e6d42..07965350b903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..28b8c5c5c8c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..8b684f0ab48f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6d45518edbdc..08a3da2a8358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -65,6 +65,7 @@ #include "lib/vxlan.h" #include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -898,6 +899,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c68dcea5985b..b127044293b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -121,7 +121,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, struct ptp_system_timestamp *sts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index a83b517b0714..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -48,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -253,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -305,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -316,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -328,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } +enum { + EC_FUNCTION_MASK = 0x8000, +}; + static int req_pages_handler(struct notifier_block *nb, unsigned long type, void *data) { @@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb, struct mlx5_core_dev *dev; struct mlx5_priv *priv; struct mlx5_eqe *eqe; + bool ec_function; u16 func_id; s32 npages; @@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb, func_id = be16_to_cpu(eqe->data.req_pages.func_id); npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); @@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb, req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); return NOTIFY_OK; @@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } @@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6e178030d8fb..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 46223efa1877..f2070350f60a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -591,7 +591,7 @@ struct mlx5_eqe_cmd { }; struct mlx5_eqe_page_req { - u8 rsvd0[2]; + __be16 ec_function; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 039c9398614c..cce4e8293384 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -522,6 +522,7 @@ struct mlx5_priv { atomic_t reg_pages; struct list_head free_list; int vfs_pages; + int peer_pf_pages; struct mlx5_core_health health; @@ -652,6 +653,7 @@ struct mlx5_core_dev { u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; + u8 embedded_cpu; } caps; u64 sys_image_guid; phys_addr_t iseg_base; @@ -922,7 +924,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages); + s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1076,6 +1078,11 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } +static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu; +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c5c679390fbd..46799b4c8859 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4441,7 +4441,8 @@ struct mlx5_ifc_query_pages_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 num_pages[0x20]; @@ -4460,7 +4461,8 @@ struct mlx5_ifc_query_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5880,7 +5882,8 @@ struct mlx5_ifc_manage_pages_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 input_num_entries[0x20]; @@ -8749,7 +8752,8 @@ struct mlx5_ifc_initial_seg_bits { u8 initializing[0x1]; u8 reserved_at_fe1[0x4]; u8 nic_interface_supported[0x3]; - u8 reserved_at_fe8[0x18]; + u8 embedded_cpu[0x1]; + u8 reserved_at_fe9[0x17]; struct mlx5_ifc_health_buffer_bits health_buffer; -- cgit v1.2.3 From 22e939a91dcb9bd4b773f2a0c0cb4eb016679b49 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:36 -0800 Subject: net/mlx5: Update enable HCA dependency With the introduction of ECPF, we require that the ECPF driver will aways call enable/disable HCA for that PF in the same way a PF does this for its VFs. The PF is still responsible for calling enable and disable HCA for its VFs. To distinguish between the ECPF executing enable/disable HCA for itself or for the PF, it sets the embedded CPU function bit in the input params struct of these commands. When the bit is cleared and function ID is zero, it refers to the peer PF. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 76 ++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 4 ++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 +++++ include/linux/mlx5/mlx5_ifc.h | 6 +- 4 files changed, 98 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 28b8c5c5c8c7..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -7,3 +7,79 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; } + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 8b684f0ab48f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -14,11 +14,15 @@ enum { }; bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 08a3da2a8358..40d591c8e76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -612,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -622,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -1071,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1088,6 +1098,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1162,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 46799b4c8859..1b6d5a563a3a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6061,7 +6061,8 @@ struct mlx5_ifc_enable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6105,7 +6106,8 @@ struct mlx5_ifc_disable_hca_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; -- cgit v1.2.3 From c3a4e9f10714911486d09e7729195cc8fbedecd3 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:37 -0800 Subject: net/mlx5: Add query host params command The QUERY_HOST_PARAMS command is used by an Embedded CPU Physical Function (ECPF) driver to identify and retrieve information about the PF on the host side. E.g, number of virtual functions and PCI BDF. The number of VFs can be changed on the fly, a function is added to query current number of VFs and will be used in downstream patches. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 27 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 4 +++ include/linux/mlx5/mlx5_ifc.h | 41 ++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a25a8c6f938e..46d70eb2d2f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -627,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 1bcf8b8f9713..4746f2d28fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,3 +83,30 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index d3d7a00a02ac..346372df218f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,6 +16,7 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -23,6 +24,9 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1b6d5a563a3a..565046830559 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -142,6 +142,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, + MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9522,4 +9523,44 @@ struct mlx5_ifc_mtrc_ctrl_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_host_params_context_bits { + u8 host_number[0x8]; + u8 reserved_at_8[0x8]; + u8 host_num_of_vfs[0x10]; + + u8 reserved_at_20[0x10]; + u8 host_pci_bus[0x10]; + + u8 reserved_at_40[0x10]; + u8 host_pci_device[0x10]; + + u8 reserved_at_60[0x10]; + u8 host_pci_function[0x10]; + + u8 reserved_at_80[0x180]; +}; + +struct mlx5_ifc_query_host_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_host_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_host_params_context_bits host_params_context; + + u8 reserved_at_280[0x180]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From 7f0d11c7e0d08304de55b6a571a69166f3d54160 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:38 -0800 Subject: net/mlx5: Add host params change event In Embedded CPU (EC) configurations, the EC driver needs to know when the number of virtual functions change on the corresponding PF at the host side. This is required so the EC driver can create or destroy representor net devices that represent the VFs ports. Whenever a change in the number of VFs occurs, firmware will generate an event towards the EC which will trigger a work to complete the rest of the handling. The specifics of the handling will be introduced in a downstream patch. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++ include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 5 +++++ 4 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7092457705a2..5c02f9291799 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -530,6 +530,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index fbc42b7252a9..4f7f776d6332 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f2070350f60a..f93a5598b942 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,6 +342,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cce4e8293384..151563a12fc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1083,6 +1083,11 @@ static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu; } +static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) +{ + return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); +} + #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ -- cgit v1.2.3 From feb393693316bd5de2c88a020f6ded51e3a4120b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:39 -0800 Subject: net/mlx5: Provide an alternative VF upper bound for ECPF ECPF doesn't support SR-IOV, but an ECPF E-Switch manager shall know the max VFs supported by its peer host PF in order to control those VF vports. The current driver implementation uses the total vfs quantity as provided by the pci sub-system for an upper bound of the VF vports the e-switch code needs to deal with. This obviously can't work as is on ECPF e-switch manager. For now, we use a hard coded value of 128 on such systems. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 151563a12fc2..46e0aa52a58a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,7 +1088,16 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) +#define MLX5_HOST_PF_MAX_VFS (127u) +static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_ecpf_esw_manager(dev)) + return MLX5_HOST_PF_MAX_VFS; + else + return pci_sriov_get_totalvfs(dev->pdev); +} + +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ -- cgit v1.2.3 From b05af6aacdb920dc3bfd27d53ade7f680d43265c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:40 -0800 Subject: net/mlx5: E-Switch, Normalize the name of uplink vport number Driver used to name uplink vport as FDB_UPLINK_VPORT, it's hard to comply with the same naming convention along with the introduction of other vports. Use MLX5_VPORT as the prefix for such vports and relocate the uplink vport definition to public header file for the benefits of both net and IB drivers. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 22 +++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +++----- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 -- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++++++------- include/linux/mlx5/vport.h | 4 ++++ 7 files changed, 30 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 3fb22967c098..99cae9a10195 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ +#include #include "ib_rep.h" #include "srq.h" @@ -48,11 +49,10 @@ static const struct mlx5_ib_profile vf_rep_profile = { static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { -#define FDB_UPLINK_VPORT 0xffff const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else profile = &vf_rep_profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 96cc0c6a4014..c78d21b2501e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -152,7 +152,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_uplink_rep_update_hw_counters(priv); else mlx5e_vf_rep_update_hw_counters(priv); @@ -1207,7 +1207,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) return false; rep = rpriv->rep; - return (rep->vport == FDB_UPLINK_VPORT); + return (rep->vport == MLX5_VPORT_UPLINK); } static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) @@ -1343,7 +1343,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) params->sw_mtu = netdev->mtu; /* SQ */ - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; else params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; @@ -1370,7 +1370,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ @@ -1402,7 +1402,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport != FDB_UPLINK_VPORT) + if (rep->vport != MLX5_VPORT_UPLINK) netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= netdev->hw_features; @@ -1555,7 +1555,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return err; } - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { uplink_priv = &rpriv->uplink_priv; /* init shared tc flow table */ @@ -1591,7 +1591,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); - if (rpriv->rep->vport == FDB_UPLINK_VPORT) { + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { /* clean indirect TC block notifications */ unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); mlx5e_rep_indr_clean_block_privs(rpriv); @@ -1710,7 +1710,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1723,7 +1723,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); - if (rep->vport == FDB_UPLINK_VPORT) { + if (rep->vport == MLX5_VPORT_UPLINK) { err = mlx5e_create_mdev_resources(dev); if (err) goto err_destroy_netdev; @@ -1759,7 +1759,7 @@ err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_mdev_resources: - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(dev); err_destroy_netdev: @@ -1779,7 +1779,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) unregister_netdev(netdev); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); - if (rep->vport == FDB_UPLINK_VPORT) + if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..1a73e661056a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1834,7 +1834,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -2724,7 +2724,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) { struct mlx5_esw_flow_attr *attr = flow->esw_attr; - bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT && + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && flow->flags & MLX5E_TC_FLOW_INGRESS; bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); @@ -2849,7 +2849,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, * original flow and packets redirected from uplink use the * peer mdev. */ - if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT) + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d7382892e81c..0db56b4b7009 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,8 +40,6 @@ #include "eswitch.h" #include "fs_core.h" -#define UPLINK_VPORT 0xFFFF - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -188,7 +186,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -499,7 +497,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -736,7 +734,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9c89eea9b2c3..94da74b1e6ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -49,8 +49,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53065b6ae593..b0b1267eab07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -359,15 +359,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, in_rep = attr->in_rep; out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -409,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -469,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -1227,7 +1227,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; return 0; } @@ -1811,7 +1811,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) + if (vport == MLX5_VPORT_UPLINK) vport = UPLINK_REP_INDEX; rep = &offloads->vport_reps[vport]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 1654b911cdb2..2e2928eacd97 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -42,6 +42,10 @@ enum { MLX5_CAP_INLINE_MODE_NOT_REQUIRED, }; +enum { + MLX5_VPORT_UPLINK = 0xffff +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); -- cgit v1.2.3 From bf3e4d387daed36aad2cfd4f493b07714ac0cd5e Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:41 -0800 Subject: net/mlx5: Relocate vport macros to the vport header file These are two macros in the driver general header which deal with the number of total vports and if a vport is vport manager. Such macros are vport entities, better to place them at the vport header file. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + include/linux/mlx5/driver.h | 6 ------ include/linux/mlx5/vport.h | 7 +++++++ 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5c02f9291799..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 79f122b45def..b6a7bc8f667c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "mlx5_core.h" diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46e0aa52a58a..c5454f985e1d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1097,12 +1097,6 @@ static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) return pci_sriov_get_totalvfs(dev->pdev); } -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) - static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 2e2928eacd97..28f47e868fbc 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,6 +36,13 @@ #include #include +#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) + +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + enum { MLX5_CAP_INLINE_MODE_L2, MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, -- cgit v1.2.3 From cd7e4186af9d968559852b4eeb1039b3419cc590 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 12 Feb 2019 22:55:42 -0800 Subject: net/mlx5: E-Switch, Avoid magic numbers when initializing offloads mode When dealing with the offloads mode initialization, driver refers to the number of VFs and add magic number one (1) to take account of the uplink. This is not clear and will make the code less readable after adding other vports (e.g. host PF). As these are special vports compared to VF vports, add a helper macro to denote such special vports and eliminate the use of magic number. Moreover, when creating offloads flow table and groups, the driver reserves two more slots for UC and MC miss rules. Replace this magic number with a helper macro as well. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 25 +++++++++++++--------- include/linux/mlx5/vport.h | 4 +++- 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0db56b4b7009..49a9e3877d2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1638,7 +1638,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0b1267eab07..1496e82b5108 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,6 +46,11 @@ enum { FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] @@ -904,8 +909,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 + - esw->total_vports; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -999,7 +1004,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1048,7 +1054,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -1062,7 +1068,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -1082,16 +1088,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1407,11 +1412,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 28f47e868fbc..3bc05449ac39 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,9 @@ #include #include -#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ -- cgit v1.2.3 From bc4e12ffefdd886057eabe38135515690d0756a6 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:43 -0800 Subject: net/mlx5: Refactor queries to speed fields in Port Type and Speed register This patch fascicles queries to speed related fields in Port Type and Speed register (PTYS) into a single API. I addition, this patch refactors functions which serves only Ethernet driver: remove the protocol type as an input parameter, move code from 'core' directory into 'en' directory and add 'eth' prefix to the function's name. The patch also encapsulates functions that are not used outside the Ethernet driver removes redundant include files. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 75 +++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en/port.h | 12 +++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 23 ++--- drivers/net/ethernet/mellanox/mlx5/core/port.c | 106 --------------------- include/linux/mlx5/port.h | 11 --- 6 files changed, 91 insertions(+), 142 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 87ce62e44898..efd08b41126c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -393,6 +393,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; struct mlx5_core_dev *mdev; struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; @@ -416,10 +417,11 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. */ - err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper, - mdev_port_num); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; + eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..9a1c2b2f87d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,6 +63,67 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; @@ -78,16 +139,14 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(eproto.oper); if (!(*speed)) err = -EINVAL; @@ -96,17 +155,17 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) return err; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) + if (eproto.cap & MLX5E_PROT_MASK(i)) max_speed = max(max_speed, mlx5e_link_speed[i]); *speed = max_speed; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..4bdab8be10af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,6 +36,18 @@ #include #include "en.h" +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin); u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c9df08133718..c29e141d72fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -882,7 +882,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; u8 an_disable_cap; @@ -898,14 +898,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_port_speed2linkmodes(speed); - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + err = mlx5_port_query_eth_proto(mdev, 1, &eproto); if (err) { - netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); @@ -913,24 +913,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2b82f35f4c35..b81542820528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include -#include #include -#include #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index bf4bc01ffb0c..5be7eefa6d75 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -110,27 +110,16 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask); -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); -- cgit v1.2.3 From a0a899895692d4227cc55b087f5f47e185af7f23 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:44 -0800 Subject: net/mlx5: Add new fields to Port Type and Speed register Register Port Type and Speed (PTYS) introduces three new fields extending the speed/protocols the can be reported and configured. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 565046830559..5decffe565fb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7826,21 +7826,23 @@ struct mlx5_ifc_ptys_reg_bits { u8 proto_mask[0x3]; u8 an_status[0x4]; - u8 reserved_at_24[0x3c]; + u8 reserved_at_24[0x1c]; + + u8 ext_eth_proto_capability[0x20]; u8 eth_proto_capability[0x20]; u8 ib_link_width_capability[0x10]; u8 ib_proto_capability[0x10]; - u8 reserved_at_a0[0x20]; + u8 ext_eth_proto_admin[0x20]; u8 eth_proto_admin[0x20]; u8 ib_link_width_admin[0x10]; u8 ib_proto_admin[0x10]; - u8 reserved_at_100[0x20]; + u8 ext_eth_proto_oper[0x20]; u8 eth_proto_oper[0x20]; @@ -8289,7 +8291,9 @@ struct mlx5_ifc_mpegc_reg_bits { struct mlx5_ifc_pcam_enhanced_features_bits { u8 reserved_at_0[0x6d]; u8 rx_icrc_encapsulated_counter[0x1]; - u8 reserved_at_6e[0x8]; + u8 reserved_at_6e[0x4]; + u8 ptys_extended_ethernet[0x1]; + u8 reserved_at_73[0x3]; u8 pfcc_mask[0x1]; u8 reserved_at_77[0x3]; u8 per_lane_error_counters[0x1]; -- cgit v1.2.3 From a08b4ed1373dc59e3e15029bc6f135ba0f53c9a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 12 Feb 2019 22:55:45 -0800 Subject: net/mlx5: Add support to ext_* fields introduced in Port Type and Speed register This patch exposes new link modes (including 50Gbps per lane), and ext_* fields which describes the new link modes in Port Type and Speed register (PTYS). Access functions, translation functions (speed <-> HW bits) and link max speed function were modified. Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 85 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en/port.h | 8 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 9 ++- include/linux/mlx5/port.h | 19 +++++ 5 files changed, 94 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index efd08b41126c..3677c00fa3bb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -421,7 +421,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_oper); props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_QDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 9a1c2b2f87d8..122927f3a600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,7 +63,31 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; @@ -72,13 +96,17 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, if (!eproto) return -EINVAL; + if (ext != MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet)) + return -EOPNOTSUPP; + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); if (err) return err; - eproto->cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eproto->oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); return 0; } @@ -100,7 +128,7 @@ void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, } int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin) + u32 proto_admin, bool ext) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 in[MLX5_ST_SZ_DW(ptys_reg)]; @@ -118,38 +146,46 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, MLX5_SET(ptys_reg, in, local_port, 1); MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - *speed = mlx5e_port_ptys2speed(eproto.oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } @@ -157,31 +193,38 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 4bdab8be10af..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -42,16 +42,16 @@ struct mlx5e_port_eth_proto { u32 oper; }; -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, struct mlx5e_port_eth_proto *eproto); void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin); -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c29e141d72fb..8343cf7d292c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -695,13 +695,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -896,9 +897,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); + mlx5e_port_speed2linkmodes(mdev, speed); - err = mlx5_port_query_eth_proto(mdev, 1, &eproto); + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); if (err) { netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); @@ -923,7 +924,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false); mlx5_toggle_port_link(mdev); out: diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 5be7eefa6d75..814fa194663b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -92,6 +92,22 @@ enum mlx5e_link_mode { MLX5E_LINK_MODES_NUMBER, }; +enum mlx5e_ext_link_mode { + MLX5E_SGMII_100M = 0, + MLX5E_1000BASE_X_SGMII = 1, + MLX5E_5GBASE_R = 3, + MLX5E_10GBASE_XFI_XAUI_1 = 4, + MLX5E_40GBASE_XLAUI_4_XLPPI_4 = 5, + MLX5E_25GAUI_1_25GBASE_CR_KR = 6, + MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 = 7, + MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR = 8, + MLX5E_CAUI_4_100GBASE_CR4_KR4 = 9, + MLX5E_100GAUI_2_100GBASE_CR2_KR2 = 10, + MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, + MLX5E_400GAUI_8 = 15, + MLX5E_EXT_LINK_MODES_NUMBER, +}; + enum mlx5e_connector_type { MLX5E_PORT_UNKNOWN = 0, MLX5E_PORT_NONE = 1, @@ -106,6 +122,9 @@ enum mlx5e_connector_type { }; #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ + (ext ? MLX5_GET(reg, out, ext_##field) : \ + MLX5_GET(reg, out, field)) int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, -- cgit v1.2.3 From f8ebfaf6684b03084858d8c55f81867e5171af08 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Wed, 13 Feb 2019 18:07:29 +0100 Subject: net: bpf: remove XDP_QUERY_XSK_UMEM enumerator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") moved the umem query code to the AF_XDP core, and therefore removed the need to query the netdevice for a umem. This patch removes XDP_QUERY_XSK_UMEM and all code that implement that behavior, which is just dead code. Signed-off-by: Jan Sokolowski Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 28 ---------------------- drivers/net/ethernet/intel/i40e/i40e_xsk.h | 2 -- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 --- .../net/ethernet/intel/ixgbe/ixgbe_txrx_common.h | 2 -- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 17 ------------- include/linux/netdevice.h | 7 +++--- 7 files changed, 3 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 44856a84738d..5e74a5127849 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev, case XDP_QUERY_PROG: xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; - case XDP_QUERY_XSK_UMEM: - return i40e_xsk_umem_query(vsi, &xdp->xsk.umem, - xdp->xsk.queue_id); case XDP_SETUP_XSK_UMEM: return i40e_xsk_umem_setup(vsi, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 96d849460d9b..e190a2c2b9ff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) return 0; } -/** - * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM - * @vsi: Current VSI - * @umem: UMEM associated to the ring, if any - * @qid: Rx ring to associate UMEM to - * - * This function will store, if any, the UMEM associated to certain ring. - * - * Returns 0 on success, <0 on failure - **/ -int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, - u16 qid) -{ - struct net_device *netdev = vsi->netdev; - struct xdp_umem *queried_umem; - - if (vsi->type != I40E_VSI_MAIN) - return -EINVAL; - - queried_umem = xdp_get_umem_from_qid(netdev, qid); - - if (!queried_umem) - return -EINVAL; - - *umem = queried_umem; - return 0; -} - /** * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid * @vsi: Current VSI diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 9038c5d5cf08..8cc0a2e7d9a2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -10,8 +10,6 @@ struct zero_copy_allocator; int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair); int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair); -int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, - u16 qid); int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, u16 qid); void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b53087a980ef..38c430b94ae3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) xdp->prog_id = adapter->xdp_prog ? adapter->xdp_prog->aux->id : 0; return 0; - case XDP_QUERY_XSK_UMEM: - return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem, - xdp->xsk.queue_id); case XDP_SETUP_XSK_UMEM: return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem, xdp->xsk.queue_id); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index 53d4089f5644..d93a690aff74 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring); struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring); -int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem, - u16 qid); int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem, u16 qid); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 65c3e2c979d4..98870707b51a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid) return 0; } -int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem, - u16 qid) -{ - if (qid >= adapter->num_rx_queues) - return -EINVAL; - - if (adapter->xsk_umems) { - if (qid >= adapter->num_xsk_umems) - return -EINVAL; - *umem = adapter->xsk_umems[qid]; - return 0; - } - - *umem = NULL; - return 0; -} - int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem, u16 qid) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1d95e634f3fe..6aedaf1e9a25 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -868,7 +868,6 @@ enum bpf_netdev_command { /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, - XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM, }; @@ -895,10 +894,10 @@ struct netdev_bpf { struct { struct bpf_offloaded_map *offmap; }; - /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */ + /* XDP_SETUP_XSK_UMEM */ struct { - struct xdp_umem *umem; /* out for query*/ - u16 queue_id; /* in for query */ + struct xdp_umem *umem; + u16 queue_id; } xsk; }; }; -- cgit v1.2.3 From c0d9782f5b6d7157635ae2fd782a4b27d55a6013 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 8 Feb 2019 23:51:05 +0100 Subject: Compiler Attributes: add support for __copy (gcc >= 9) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the GCC manual: copy copy(function) The copy attribute applies the set of attributes with which function has been declared to the declaration of the function to which the attribute is applied. The attribute is designed for libraries that define aliases or function resolvers that are expected to specify the same set of attributes as their targets. The copy attribute can be used with functions, variables, or types. However, the kind of symbol to which the attribute is applied (either function or variable) must match the kind of symbol to which the argument refers. The copy attribute copies only syntactic and semantic attributes but not attributes that affect a symbol’s linkage or visibility such as alias, visibility, or weak. The deprecated attribute is also not copied. https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html The upcoming GCC 9 release extends the -Wmissing-attributes warnings (enabled by -Wall) to C and aliases: it warns when particular function attributes are missing in the aliases but not in their target, e.g.: void __cold f(void) {} void __alias("f") g(void); diagnoses: warning: 'g' specifies less restrictive attribute than its target 'f': 'cold' [-Wmissing-attributes] Using __copy(f) we can copy the __cold attribute from f to g: void __cold f(void) {} void __copy(f) __alias("f") g(void); This attribute is most useful to deal with situations where an alias is declared but we don't know the exact attributes the target has. For instance, in the kernel, the widely used module_init/exit macros define the init/cleanup_module aliases, but those cannot be marked always as __init/__exit since some modules do not have their functions marked as such. Suggested-by: Martin Sebor Reviewed-by: Nick Desaulniers Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 19f32b0c29af..6b318efd8a74 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -34,6 +34,7 @@ #ifndef __has_attribute # define __has_attribute(x) __GCC4_has_attribute_##x # define __GCC4_has_attribute___assume_aligned__ (__GNUC_MINOR__ >= 9) +# define __GCC4_has_attribute___copy__ 0 # define __GCC4_has_attribute___designated_init__ 0 # define __GCC4_has_attribute___externally_visible__ 1 # define __GCC4_has_attribute___noclone__ 1 @@ -100,6 +101,19 @@ */ #define __attribute_const__ __attribute__((__const__)) +/* + * Optional: only supported since gcc >= 9 + * Optional: not supported by clang + * Optional: not supported by icc + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute + */ +#if __has_attribute(__copy__) +# define __copy(symbol) __attribute__((__copy__(symbol))) +#else +# define __copy(symbol) +#endif + /* * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated' * attribute warnings entirely and for good") for more information. -- cgit v1.2.3 From a6e60d84989fa0e91db7f236eda40453b0e44afa Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 19 Jan 2019 20:59:34 +0100 Subject: include/linux/module.h: copy __init/__exit attrs to init/cleanup_module The upcoming GCC 9 release extends the -Wmissing-attributes warnings (enabled by -Wall) to C and aliases: it warns when particular function attributes are missing in the aliases but not in their target. In particular, it triggers for all the init/cleanup_module aliases in the kernel (defined by the module_init/exit macros), ending up being very noisy. These aliases point to the __init/__exit functions of a module, which are defined as __cold (among other attributes). However, the aliases themselves do not have the __cold attribute. Since the compiler behaves differently when compiling a __cold function as well as when compiling paths leading to calls to __cold functions, the warning is trying to point out the possibly-forgotten attribute in the alias. In order to keep the warning enabled, we decided to silence this case. Ideally, we would mark the aliases directly as __init/__exit. However, there are currently around 132 modules in the kernel which are missing __init/__exit in their init/cleanup functions (either because they are missing, or for other reasons, e.g. the functions being called from somewhere else); and a section mismatch is a hard error. A conservative alternative was to mark the aliases as __cold only. However, since we would like to eventually enforce __init/__exit to be always marked, we chose to use the new __copy function attribute (introduced by GCC 9 as well to deal with this). With it, we copy the attributes used by the target functions into the aliases. This way, functions that were not marked as __init/__exit won't have their aliases marked either, and therefore there won't be a section mismatch. Note that the warning would go away marking either the extern declaration, the definition, or both. However, we only mark the definition of the alias, since we do not want callers (which only see the declaration) to be compiled as if the function was __cold (and therefore the paths leading to those calls would be assumed to be unlikely). Link: https://lore.kernel.org/lkml/20190123173707.GA16603@gmail.com/ Link: https://lore.kernel.org/lkml/20190206175627.GA20399@gmail.com/ Suggested-by: Martin Sebor Acked-by: Jessica Yu Signed-off-by: Miguel Ojeda --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 8fa38d3e7538..f5bc4c046461 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -129,13 +129,13 @@ extern void cleanup_module(void); #define module_init(initfn) \ static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); + int init_module(void) __copy(initfn) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); + void cleanup_module(void) __copy(exitfn) __attribute__((alias(#exitfn))); #endif -- cgit v1.2.3 From a1b3839ac4a4933c7c5167efd7b6b091130d11aa Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 8 Nov 2018 22:37:04 +0200 Subject: net/mlx5: E-Switch, Properly refer to the esw manager vport In SmartNIC mode, the eswitch manager is not necessarily the PF (vport 0). Use a helper function to get the correct eswitch manager vport number and cache on the eswitch instance for fast reference. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 10 +++++++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 +++-- include/linux/mlx5/vport.h | 2 ++ 4 files changed, 39 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 05830696abd8..9c622749dbde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -378,16 +378,16 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -410,10 +410,10 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -1457,15 +1457,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1537,8 +1544,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1733,6 +1743,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 0a3eee8746c1..959a9e28d08f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "lib/mpfs.h" @@ -204,6 +205,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -363,6 +365,14 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9128b45f3f37..af2c44d31357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -522,7 +522,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -567,7 +568,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, source_eswitch_owner_vhca_id); dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest->vport.num = 0; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } @@ -666,7 +667,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 3bc05449ac39..b67bcc95ab5d 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -52,6 +52,8 @@ enum { }; enum { + MLX5_VPORT_PF = 0x0, + MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; -- cgit v1.2.3 From cbc44e76bfcdcaccd079487367593ee3f94d006d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 1 Feb 2019 17:34:55 -0600 Subject: net/mlx5: E-Switch, Properly refer to host PF vport as other vport Commands referring to vports use the following scheme: 1. When referring to my own vport, put 0 in vport and 0 in other_vport. 2. When referring to another vport, put the vport number of the referred vport and put 1 in other_vport. It was assumed that driver is accessing other vport when vport number is greater than 0. With the above scheme, the case that ECPF eswitch manager is trying to access host PF vport will fall over with scheme 1 as the vport number is 0. This is apparently wrong as driver is trying to refer other vport. As such usage can only happen in the eswitch context, change relevant functions to provide other vport input properly. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 ++++++------- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 10 ++++------ include/linux/mlx5/vport.h | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 685f1975be58..f84889bbe2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1083,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -1101,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9c622749dbde..648c743cc947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1462,7 +1462,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); /* Host PF has its own mac/guid. */ @@ -1581,10 +1581,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1875,7 +1875,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -2137,7 +2137,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2174,8 +2174,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 9a928eb48522..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b67bcc95ab5d..b7edcb1dadd8 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -59,7 +59,7 @@ enum { u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state); + u16 vport, u8 other_vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -121,7 +121,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, int vf, u8 port_num, void *out, -- cgit v1.2.3 From c9b99abcf232f69ddff158b1f313fd7d2654414b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 14:40:53 -0600 Subject: net/mlx5: E-Switch, Split VF and special vports for offloads mode When driver is entering offloads mode, there are two major tasks to do: initialize flow steering and create representors. Flow steering should make sure enough flow table/group spaces are reserved for all reps. Representors will be created in a group, all or none. With the introduction of ECPF, flow steering should still reserve the same spaces. But, the representors are not always loaded/unloaded in a single piece. Once ECPF is in offloads mode, it will get the number of VF changing event from host PF. In such scenario, only the VF reps should be loaded/unloaded, not the reps for special vports (such as the uplink vport). Thus, when entering offloads mode, driver should specify the total number of reps, and the number of VF reps separately. When leaving offloads mode, the cleanup should use the information self-contained in eswitch such as number of VFs. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 7 ++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 57 ++++++++++++++++------ include/linux/mlx5/vport.h | 1 + 4 files changed, 48 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 648c743cc947..be6c2931d2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1641,7 +1641,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS); + err = esw_offloads_init(esw, nvfs, + nvfs + MLX5_SPECIAL_VPORTS); } if (err) @@ -1683,7 +1684,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1693,7 +1693,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; if (esw->mode == SRIOV_LEGACY) mlx5_eq_notifier_unregister(esw->dev, &esw->nb); @@ -1709,7 +1708,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 959a9e28d08f..fd845e6c44d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -208,8 +208,9 @@ struct mlx5_eswitch { u16 manager_vport; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 19969d487a01..14f7ad67cfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -54,6 +54,8 @@ enum { #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -1239,19 +1241,28 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) return 0; } +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return; + + rep->rep_if[rep_type].unload(rep); +} + static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { struct mlx5_eswitch_rep *rep; int vport; - for (vport = nvports - 1; vport >= 0; vport--) { + for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) { rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; - - rep->rep_if[rep_type].unload(rep); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + __esw_offloads_unload_rep(esw, rep, rep_type); } static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) @@ -1262,6 +1273,15 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) esw_offloads_unload_reps_type(esw, nvports, rep_type); } +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (!rep->rep_if[rep_type].valid) + return 0; + + return rep->rep_if[rep_type].load(esw->dev, rep); +} + static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, u8 rep_type) { @@ -1269,12 +1289,14 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX]; + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto out; - err = rep->rep_if[rep_type].load(esw->dev, rep); + for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) goto err_reps; } @@ -1283,6 +1305,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, err_reps: esw_offloads_unload_reps_type(esw, vport, rep_type); +out: return err; } @@ -1440,17 +1463,18 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) { int err; mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_offloads_steering_init(esw, nvports); + err = esw_offloads_steering_init(esw, total_nvports); if (err) return err; - err = esw_offloads_load_reps(esw, nvports); + err = esw_offloads_load_reps(esw, vf_nvports); if (err) goto err_reps; @@ -1481,10 +1505,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { + u16 num_vfs = esw->dev->priv.sriov.num_vfs; + esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_reps(esw, nvports); + esw_offloads_unload_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); } @@ -1822,7 +1848,6 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index b7edcb1dadd8..755aeea19e1c 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -53,6 +53,7 @@ enum { enum { MLX5_VPORT_PF = 0x0, + MLX5_VPORT_FIRST_VF = 0x1, MLX5_VPORT_ECPF = 0xfffe, MLX5_VPORT_UPLINK = 0xffff }; -- cgit v1.2.3 From f121e0ea9586b2c937bf1ff9a0b682dc6424ce1d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 29 Jan 2019 21:48:31 -0600 Subject: net/mlx5: E-Switch, Add state to eswitch vport representors Currently the eswitch vport reps have a valid indicator, which is set on register and unset on unregister. However, a rep can be loaded or not loaded when doing unregister, current driver checks if the vport of that rep is enabled as a flag to imply the rep is loaded. However, for ECPF, this is not valid as the host PF will enable the vports for its VFs instead. Add three states: {unregistered, registered, loaded}, with the following state changes across different operations: create: (none) -> unregistered reg: unregistered -> registered load: registered -> loaded unload: loaded -> registered unreg: registered -> unregistered Note that the state shall only be updated inside eswitch driver rather than individual drivers such as ETH or IB. Signed-off-by: Bodong Wang Signed-off-by: Or Gerlitz Suggested-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++++------- include/linux/mlx5/eswitch.h | 8 +++++- 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4979c7ee0ad7..c6c9dad69ba8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -364,7 +364,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1256,7 +1256,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) struct mlx5_core_dev *dev = esw->dev; struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1271,6 +1271,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) mlx5_esw_for_all_reps(esw, vport, rep) { rep->vport = vport; ether_addr_copy(rep->hw_id, hw_id); + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; @@ -1281,10 +1284,11 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + if (rep->rep_if[rep_type].state != REP_LOADED) return; rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; } static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1311,10 +1315,18 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (!rep->rep_if[rep_type].valid) + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) return 0; - return rep->rep_if[rep_type].load(esw->dev, rep); + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; } static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, @@ -1861,7 +1873,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); @@ -1873,10 +1885,11 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) + if (esw->mode == SRIOV_OFFLOADS && + rep->rep_if[rep_type].state == REP_LOADED) rep->rep_if[rep_type].unload(rep); - rep->rep_if[rep_type].valid = false; + rep->rep_if[rep_type].state = REP_UNREGISTERED; } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); @@ -1896,7 +1909,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index fab5121ffb8f..e3dbc1bc0917 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -22,6 +22,12 @@ enum { NUM_REP_TYPES, }; +enum { + REP_UNREGISTERED, + REP_REGISTERED, + REP_LOADED, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_if { int (*load)(struct mlx5_core_dev *dev, @@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if { void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); void *priv; - bool valid; + u8 state; }; struct mlx5_eswitch_rep { -- cgit v1.2.3 From f8e8fa0262eaf544490a11746c524333158ef0b6 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 31 Jan 2019 17:42:57 -0600 Subject: net/mlx5: E-Switch, Centralize repersentor reg/unreg to eswitch driver Eswitch has two users: IB and ETH. They both register repersentors when mlx5 interface is added, and unregister the repersentors when mlx5 interface is removed. Ideally, each driver should only deal with the entities which are unique to itself. However, current IB and ETH drivers have to perform the following eswitch operations: 1. When registering, specify how many vports to register. This number is the same for both drivers which is the total available vport numbers. 2. When unregistering, specify the number of registered vports to do unregister. Also, unload the repersentors which are already loaded. It's unnecessary for eswitch driver to hands out the control of above operations to individual driver users, as they're not unique to each driver. Instead, such operations should be centralized to eswitch driver. This consolidates eswitch control flow, and simplified IB and ETH driver. This patch doesn't change any functionality. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 20 ++++------ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 19 +++------ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 45 +++++++++++----------- include/linux/mlx5/eswitch.h | 11 ++---- 4 files changed, 39 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 99cae9a10195..4700cffb5a00 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -95,26 +95,20 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); struct mlx5_eswitch_rep_if rep_if = {}; - int vport; - - for (vport = 0; vport < total_vports; vport++) { - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); - } + + rep_if.load = mlx5_ib_vport_rep_load; + rep_if.unload = mlx5_ib_vport_rep_unload; + rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; + + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vports = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vports - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); } u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f84889bbe2a0..287d48e5b073 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1798,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; + struct mlx5_eswitch_rep_if rep_if = {}; - for (vport = 0; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - for (vport = total_vfs - 1; vport >= 0; vport--) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7131d41796fb..b702b56c457e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1923,40 +1923,39 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep *rep; + int i; - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - rep_if->state = REP_REGISTERED; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; + u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; - - if (esw->mode == SRIOV_OFFLOADS && - rep->rep_if[rep_type].state == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); - rep->rep_if[rep_type].state = REP_UNREGISTERED; + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index e3dbc1bc0917..96d8435421de 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -46,13 +46,10 @@ struct mlx5_eswitch_rep { u32 vlan_refcount; }; -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type); -- cgit v1.2.3 From 5ae5162066d8e59e365678a9e76fc4d8f6b78d40 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Fri, 14 Dec 2018 09:33:22 -0600 Subject: net/mlx5: E-Switch, Assign a different position for uplink rep and vport In offloads mode, the current implementation puts the uplink representor at index zero of the vport reps array. It is not "natural" to place it at index 0 since we want to put the representor for vport 0 at index 0 with the introduction of SmartNIC. A separate patch will handle the case whether a rep is needed for vport 0 (PF vport). So, we want to have a different placeholder for uplink vport and representor. It was placed at the end of vport and rep array. Since vport number can no longer act as an index into the vport or representors arrays, use functions to map vport numbers to indices when accessing the vports or representors arrays, and vice versa. Signed-off-by: Bodong Wang Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++----- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 24 ++++++++++++++++++++++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 ++-------- include/linux/mlx5/vport.h | 11 +++++++--- 4 files changed, 40 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d1454f18c0a7..bb7f72467df9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -84,8 +84,10 @@ enum { static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + WARN_ON(vport_num > esw->total_vports - 1); - return &esw->vports[vport_num]; + return &esw->vports[idx]; } static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, @@ -1756,8 +1758,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; - int vport_num; - int err; + int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; @@ -1798,8 +1799,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - mlx5_esw_for_all_vports(esw, vport_num, vport) { - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd845e6c44d5..2951c1296c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -374,6 +374,30 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b702b56c457e..e787e9212174 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -85,10 +85,7 @@ enum { static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx = vport_num; - - if (vport_num == MLX5_VPORT_UPLINK) - idx = UPLINK_REP_INDEX; + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); WARN_ON(idx > esw->total_vports - 1); return &esw->offloads.vport_reps[idx]; @@ -1254,7 +1251,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; int vport; @@ -1265,19 +1261,16 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = vport; + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) rep->rep_if[rep_type].state = REP_UNREGISTERED; } - offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK; - return 0; } diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 755aeea19e1c..134248c02786 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,9 +36,14 @@ #include #include -#define MLX5_VPORT_PF_PLACEHOLDER (1u) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + mlx5_core_max_vfs(mdev)) +#define MLX5_VPORT_PF_PLACEHOLDER (1u) +#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) + +#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER) + +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ + mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ (MLX5_CAP_GEN(mdev, vport_group_manager) && \ -- cgit v1.2.3 From 81cd229c294e2e416e9161d9286d34f3aaf19348 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Dec 2018 11:59:33 -0600 Subject: net/mlx5: E-Switch, Consider ECPF vport depends on eswitch ownership ECPF connects to the eswitch through vport 0xfffe. ECPF may or may not be the eswitch manager depending on firmware configuration. 1. If ECPF is eswitch manager: ECPF will take over the eswitch manager responsibility. A rep of the host PF shall be created at the ECPF side for the eswitch manager to control. 2. If ECPF is not eswitch manager: host PF will be the eswitch manager, ECPF acts similar as a VF to the host PF. Host PF will be aware of the ECPF vport presence and control it's rep. Signed-off-by: Bodong Wang Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 8 ++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 79 +++++++++++++++++++++- include/linux/mlx5/driver.h | 5 ++ include/linux/mlx5/mlx5_ifc.h | 3 +- include/linux/mlx5/vport.h | 8 ++- 6 files changed, 110 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bb7f72467df9..d2ab1ee19b2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1667,7 +1667,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); err = esw_offloads_init(esw, nvfs, - nvfs + MLX5_SPECIAL_VPORTS); + nvfs + MLX5_SPECIAL_VPORTS(esw->dev)); } if (err) @@ -1687,6 +1687,12 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); esw_enable_vport(esw, vport, enabled_events); + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + /* Enable VF vports */ mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) esw_enable_vport(esw, vport, enabled_events); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2951c1296c3e..2baa0d71380c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -380,9 +380,20 @@ static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) return esw->total_vports - 1; } +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, u16 vport_num) { + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + if (vport_num == MLX5_VPORT_UPLINK) return mlx5_eswitch_uplink_idx(esw); @@ -392,6 +403,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, int index) { + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + if (index == mlx5_eswitch_uplink_idx(esw)) return MLX5_VPORT_UPLINK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e787e9212174..84a33f8e3350 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -639,14 +639,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { MLX5_SET(fte_match_set_misc, misc, source_port, i); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); - esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); - goto add_flow_err; + goto add_vf_flow_err; } flows[i] = flow; } @@ -656,10 +677,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, kvfree(spec); return 0; -add_flow_err: +add_vf_flow_err: nvports = --i; mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); kvfree(flows); alloc_flows_err: kvfree(spec); @@ -676,6 +705,12 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[i]); + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + kvfree(flows); } @@ -1288,6 +1323,16 @@ static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); __esw_offloads_unload_rep(esw, rep, rep_type); } @@ -1351,6 +1396,34 @@ static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); return err; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c5454f985e1d..c2de50f02b33 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1088,6 +1088,11 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev) return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager); } +static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); +} + #define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5decffe565fb..b7bb774b57b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -631,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x17]; + u8 reserved_at_5[0x16]; + u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; u8 nic_vport_node_guid_modify[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 134248c02786..0eef548b9946 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -38,11 +38,13 @@ #define MLX5_VPORT_PF_PLACEHOLDER (1u) #define MLX5_VPORT_UPLINK_PLACEHOLDER (1u) +#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev)) -#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER + \ - MLX5_VPORT_UPLINK_PLACEHOLDER) +#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \ + MLX5_VPORT_UPLINK_PLACEHOLDER + \ + MLX5_VPORT_ECPF_PLACEHOLDER(mdev)) -#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS + \ +#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS(mdev) + \ mlx5_core_max_vfs(mdev)) #define MLX5_VPORT_MANAGER(mdev) \ -- cgit v1.2.3 From 3b89ea9c5902acccdbbdec307c85edd1bf52515e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Fri, 15 Feb 2019 17:58:54 +0100 Subject: net: Fix for_each_netdev_feature on Big endian The features attribute is of type u64 and stored in the native endianes on the system. The for_each_set_bit() macro takes a pointer to a 32 bit array and goes over the bits in this area. On little Endian systems this also works with an u64 as the most significant bit is on the highest address, but on big endian the words are swapped. When we expect bit 15 here we get bit 47 (15 + 32). This patch converts it more or less to its own for_each_set_bit() implementation which works on 64 bit integers directly. This is then completely in host endianness and should work like expected. Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 23 +++++++++++++++++++++-- net/core/dev.c | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2b2a6dce1630..fce28562bed2 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include typedef u64 netdev_features_t; @@ -154,8 +155,26 @@ enum { #define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX) #define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX) -#define for_each_netdev_feature(mask_addr, bit) \ - for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) +/* Finds the next feature with the highest number of the range of start till 0. + */ +static inline int find_next_netdev_feature(u64 feature, unsigned long start) +{ + /* like BITMAP_LAST_WORD_MASK() for u64 + * this sets the most significant 64 - start to 0. + */ + feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1)); + + return fls64(feature) - 1; +} + +/* This goes for the MSB to the LSB through the set feature bits, + * mask_addr should be a u64 and bit an int + */ +#define for_each_netdev_feature(mask_addr, bit) \ + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ + (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/dev.c b/net/core/dev.c index 8e276e0192a1..5d03889502eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8152,7 +8152,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(upper->wanted_features & feature) && (features & feature)) { @@ -8172,7 +8172,7 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", -- cgit v1.2.3 From d5be7f632bad0f489879eed0ff4b99bd7fe0b74c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 15 Feb 2019 12:15:47 -0500 Subject: net: validate untrusted gso packets without csum offload Syzkaller again found a path to a kernel crash through bad gso input. By building an excessively large packet to cause an skb field to wrap. If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in skb_partial_csum_set. GSO packets that do not set checksum offload are suspicious and rare. Most callers of virtio_net_hdr_to_skb already pass them to skb_probe_transport_header. Move that test forward, change it to detect parse failure and drop packets on failure as those cleary are not one of the legitimate VIRTIO_NET_HDR_GSO types. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr") Reported-by: syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- include/linux/virtio_net.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95d25b010a25..4c1c82a5678c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2434,7 +2434,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); - else + else if (offset_hint >= 0) skb_set_transport_header(skb, offset_hint); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index cb462f9ab7dd..71f2394abbf7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -57,6 +57,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + } else { + /* gso packets without NEEDS_CSUM do not set transport_offset. + * probe and drop if does not match one of the above types. + */ + if (gso_type) { + skb_probe_transport_header(skb, -1); + if (!skb_transport_header_was_set(skb)) + return -EINVAL; + } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { -- cgit v1.2.3 From 8a5b403d71affa098009cc3dff1b2c45113021ad Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Feb 2019 13:33:32 +0100 Subject: arm64, mm, efi: Account for GICv3 LPI tables in static memblock reserve table In the irqchip and EFI code, we have what basically amounts to a quirk to work around a peculiarity in the GICv3 architecture, which permits the system memory address of LPI tables to be programmable only once after a CPU reset. This means kexec kernels must use the same memory as the first kernel, and thus ensure that this memory has not been given out for other purposes by the time the ITS init code runs, which is not very early for secondary CPUs. On systems with many CPUs, these reservations could overflow the memblock reservation table, and this was addressed in commit: eff896288872 ("efi/arm: Defer persistent reservations until after paging_init()") However, this turns out to have made things worse, since the allocation of page tables and heap space for the resized memblock reservation table itself may overwrite the regions we are attempting to reserve, which may cause all kinds of corruption, also considering that the ITS will still be poking bits into that memory in response to incoming MSIs. So instead, let's grow the static memblock reservation table on such systems so it can accommodate these reservations at an earlier time. This will permit us to revert the above commit in a subsequent patch. [ mingo: Minor cleanups. ] Signed-off-by: Ard Biesheuvel Acked-by: Mike Rapoport Acked-by: Will Deacon Acked-by: Marc Zyngier Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190215123333.21209-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/memory.h | 11 +++++++++++ include/linux/memblock.h | 3 --- mm/memblock.c | 11 +++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e1ec947e7c0c..0c656850eeea 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -332,6 +332,17 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_addr_valid(kaddr) \ (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) +/* + * Given that the GIC architecture permits ITS implementations that can only be + * configured with a LPI table address once, GICv3 systems with many CPUs may + * end up reserving a lot of different regions after a kexec for their LPI + * tables (one per CPU), as we are forced to reuse the same memory after kexec + * (and thus reserve it persistently with EFI beforehand) + */ +#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS) +# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) +#endif + #include #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 64c41cf45590..859b55b66db2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -29,9 +29,6 @@ extern unsigned long max_pfn; */ extern unsigned long long max_possible_pfn; -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_PHYSMEM_REGIONS 4 - /** * enum memblock_flags - definition of memory region attributes * @MEMBLOCK_NONE: no special request diff --git a/mm/memblock.c b/mm/memblock.c index 022d4cbb3618..ea31045ba704 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,6 +26,13 @@ #include "internal.h" +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_PHYSMEM_REGIONS 4 + +#ifndef INIT_MEMBLOCK_RESERVED_REGIONS +# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#endif + /** * DOC: memblock overview * @@ -92,7 +99,7 @@ unsigned long max_pfn; unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; #endif @@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = { .reserved.regions = memblock_reserved_init_regions, .reserved.cnt = 1, /* empty dummy entry */ - .reserved.max = INIT_MEMBLOCK_REGIONS, + .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -- cgit v1.2.3 From 582a32e708823e5957fd73ccd78dc4a9e49d21ea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Feb 2019 13:33:33 +0100 Subject: efi/arm: Revert "Defer persistent reservations until after paging_init()" This reverts commit eff896288872d687d9662000ec9ae11b6d61766f, which deferred the processing of persistent memory reservations to a point where the memory may have already been allocated and overwritten, defeating the purpose. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Cc: Linus Torvalds Cc: Marc Zyngier Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190215123333.21209-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/setup.c | 1 - drivers/firmware/efi/efi.c | 4 ---- drivers/firmware/efi/libstub/arm-stub.c | 3 --- include/linux/efi.h | 7 ------- 4 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 4b0e1231625c..d09ec76f08cf 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,7 +313,6 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); - efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4c46ff6f2242..55b77c576c42 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -592,11 +592,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, early_memunmap(tbl, sizeof(*tbl)); } - return 0; -} -int __init efi_apply_persistent_mem_reservations(void) -{ if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) { unsigned long prsv = efi.mem_reserve; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index eee42d5e25ee..c037c6c5d0b7 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -75,9 +75,6 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; efi_status_t status; - if (IS_ENABLED(CONFIG_ARM)) - return; - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), (void **)&rsv); if (status != EFI_SUCCESS) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 45ff763fba76..28604a8d0aa9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1198,8 +1198,6 @@ static inline bool efi_enabled(int feature) extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern bool efi_is_table_address(unsigned long phys_addr); - -extern int efi_apply_persistent_mem_reservations(void); #else static inline bool efi_enabled(int feature) { @@ -1218,11 +1216,6 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } - -static inline int efi_apply_persistent_mem_reservations(void) -{ - return 0; -} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From 8681ef1f3d295bd3600315325f3b3396d76d02f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 16 Feb 2019 13:44:39 -0800 Subject: net: Add header for usage of fls64() Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") Suggested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index fce28562bed2..4c76fe2c8488 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -11,6 +11,7 @@ #define _LINUX_NETDEV_FEATURES_H #include +#include #include typedef u64 netdev_features_t; -- cgit v1.2.3 From 744e458aebf8dc7f33eee9af61aeb0145de921a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 17 Feb 2019 10:28:33 +0100 Subject: net: phy: add helper linkmode_adv_to_mii_10gbt_adv_t Add a helper linkmode_adv_to_mii_10gbt_adv_t(), similar to linkmode_adv_to_mii_adv_t. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index bfa7114167d7..dd46828b4c47 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -261,6 +261,31 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv) return reg; } +/** + * linkmode_adv_to_mii_10gbt_adv_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the C45 + * 10GBASE-T AN CONTROL (7.32) register. + */ +static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV2_5G; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV5G; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + advertising)) + result |= MDIO_AN_10GBT_CTRL_ADV10G; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -- cgit v1.2.3 From 9a5dc8af441668a3db7fdcd927cb288be62c0a2e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 17 Feb 2019 10:29:19 +0100 Subject: net: phy: add genphy_c45_an_config_aneg C45 configuration of 10/100 and multi-giga bit auto negotiation advertisement is standardized. Configuration of 1000Base-T however appears to be vendor specific. Move the generic code out of the Marvell driver into the common phy-c45.c file. v2: - change function name to genphy_c45_an_config_aneg Signed-off-by: Andrew Lunn [hkallweit1@gmail.com: use new helper linkmode_adv_to_mii_10gbt_adv_t and split patch] Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 7af5fa81daf6..e17672bd180e 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -74,6 +74,50 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); +/** + * genphy_c45_an_config_aneg - configure advertisement registers + * @phydev: target phy_device struct + * + * Configure advertisement registers based on modes set in phydev->advertising + * + * Returns negative errno code on failure, 0 if advertisement didn't change, + * or 1 if advertised modes changed. + */ +int genphy_c45_an_config_aneg(struct phy_device *phydev) +{ + int changed = 0, ret; + u32 adv; + + linkmode_and(phydev->advertising, phydev->advertising, + phydev->supported); + + adv = linkmode_adv_to_mii_adv_t(phydev->advertising); + + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, + ADVERTISE_ALL | ADVERTISE_100BASE4 | + ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising); + + ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, + MDIO_AN_10GBT_CTRL_ADV10G | + MDIO_AN_10GBT_CTRL_ADV5G | + MDIO_AN_10GBT_CTRL_ADV2_5G, + adv); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + return changed; +} +EXPORT_SYMBOL_GPL(genphy_c45_an_config_aneg); + /** * genphy_c45_an_disable_aneg - disable auto-negotiation * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index bf1070c2a53b..3db507e68191 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1101,6 +1101,7 @@ int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); int genphy_c45_read_pma(struct phy_device *phydev); int genphy_c45_pma_setup_forced(struct phy_device *phydev); +int genphy_c45_an_config_aneg(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); -- cgit v1.2.3 From 58ecf2688cc9b44d2e8f830c16212edbeaef4dce Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 16 Feb 2019 10:37:56 +0800 Subject: ptr_ring: remove duplicated include from ptr_ring.h Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 186cd8e970c7..8da46ac44a2e 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #endif -- cgit v1.2.3 From 9004a14cb688c69194002aa2fadda4433c3b79fb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Feb 2019 17:26:05 +0100 Subject: net: phy: add helper mii_10gbt_stat_mod_linkmode_lpa_t Similar to the existing helpers for the Clause 22 registers add helper mii_10gbt_stat_mod_linkmode_lpa_t. Note that this helper is defined in linux/mdio.h, not like the Clause 22 helpers in linux/mii.h. Reason is that the Clause 45 register constants are defined in uapi/linux/mdio.h. And uapi/linux/mdio.h includes linux/mii.h before defining the C45 register constants. v2: - remove helpers that don't have users in this series Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index dd46828b4c47..3e99ae3ed87f 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -286,6 +286,25 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising) return result; } +/** + * mii_10gbt_stat_mod_linkmode_lpa_t + * @advertising: target the linkmode advertisement settings + * @adv: value of the C45 10GBASE-T AN STATUS register + * + * A small helper function that translates C45 10GBASE-T AN STATUS register bits + * to linkmode advertisement settings. Other bits in advertising aren't changed. + */ +static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, + u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP2_5G); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP5G); + linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + advertising, lpa & MDIO_AN_10GBT_STAT_LP10G); +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -- cgit v1.2.3 From 58066ac9d7f5dcde4ef08c03b7e127f0522d9ea0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Feb 2019 14:21:20 +0000 Subject: ptp_qoriq: don't pass a large struct by value but instead pass it by reference Passing the struct ptp_clock_info caps by parameter is passing over 130 bytes of data by value on the stack. Optimize this by passing it by reference instead. Also shinks the object code size: Before: text data bss dec hex filename 12596 2160 64 14820 39e4 drivers/ptp/ptp_qoriq.o After: text data bss dec hex filename 12567 2160 64 14791 39c7 drivers/ptp/ptp_qoriq.o Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +- drivers/ptp/ptp_qoriq.c | 6 +++--- include/linux/fsl/ptp_qoriq.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index dc2f58a7c9e5..8c1497e7d9c5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -92,7 +92,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev, ptp_qoriq->dev = &pdev->dev; - err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps); + err = ptp_qoriq_init(ptp_qoriq, base, &enetc_ptp_caps); if (err) goto err_no_clock; diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 42d3654f77f0..53775362aac6 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -459,7 +459,7 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq, } int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, - const struct ptp_clock_info caps) + const struct ptp_clock_info *caps) { struct device_node *node = ptp_qoriq->dev->of_node; struct ptp_qoriq_registers *regs; @@ -468,7 +468,7 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, u32 tmr_ctrl; ptp_qoriq->base = base; - ptp_qoriq->caps = caps; + ptp_qoriq->caps = *caps; if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel)) ptp_qoriq->cksel = DEFAULT_CKSEL; @@ -605,7 +605,7 @@ static int ptp_qoriq_probe(struct platform_device *dev) goto no_ioremap; } - err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps); + err = ptp_qoriq_init(ptp_qoriq, base, &ptp_qoriq_caps); if (err) goto no_clock; diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index f127adb71041..992bf9fa1729 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -183,7 +183,7 @@ static inline void qoriq_write_le(unsigned __iomem *addr, u32 val) irqreturn_t ptp_qoriq_isr(int irq, void *priv); int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, - const struct ptp_clock_info caps); + const struct ptp_clock_info *caps); void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq); int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm); int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta); -- cgit v1.2.3 From cd34499cacf3c34e2e094ff2a347b9378417970c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:26:58 +0100 Subject: net: phy: export genphy_config_eee_advert We want to use this function in phy-c45.c too, therefore export it. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- include/linux/phy.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 4bb3b6c2894e..49fdd1ee798e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1575,7 +1575,7 @@ static int genphy_config_advert(struct phy_device *phydev) * efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't * changed, and 1 if it has changed. */ -static int genphy_config_eee_advert(struct phy_device *phydev) +int genphy_config_eee_advert(struct phy_device *phydev) { int err; @@ -1588,6 +1588,7 @@ static int genphy_config_eee_advert(struct phy_device *phydev) /* If the call failed, we assume that EEE is not supported */ return err < 0 ? 0 : err; } +EXPORT_SYMBOL(genphy_config_eee_advert); /** * genphy_setup_forced - configures/forces speed/duplex from @phydev diff --git a/include/linux/phy.h b/include/linux/phy.h index 3db507e68191..761131de4971 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1077,6 +1077,7 @@ void phy_attached_info(struct phy_device *phydev); int genphy_config_init(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); +int genphy_config_eee_advert(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); -- cgit v1.2.3 From 1af9f16840e920c6193490ae58371fc5cc28bb01 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 18 Feb 2019 21:27:18 +0100 Subject: net: phy: add genphy_c45_check_and_restart_aneg This function will be used by config_aneg callback implementations of PHY drivers and allows to reduce boilerplate code. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 30 ++++++++++++++++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 2f5721430e05..fc3173cc078b 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -156,6 +156,36 @@ int genphy_c45_restart_aneg(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); +/** + * genphy_c45_check_and_restart_aneg - Enable and restart auto-negotiation + * @phydev: target phy_device struct + * @restart: whether aneg restart is requested + * + * This assumes that the auto-negotiation MMD is present. + * + * Check, and restart auto-negotiation if needed. + */ +int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) +{ + int ret = 0; + + if (!restart) { + /* Configure and restart aneg if it wasn't set before */ + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (ret < 0) + return ret; + + if (!(ret & MDIO_AN_CTRL1_ENABLE)) + restart = true; + } + + if (restart) + ret = genphy_c45_restart_aneg(phydev); + + return ret; +} +EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg); + /** * genphy_c45_aneg_done - return auto-negotiation complete status * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 761131de4971..8e9fc576472b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1097,6 +1097,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); +int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart); int genphy_c45_aneg_done(struct phy_device *phydev); int genphy_c45_read_link(struct phy_device *phydev); int genphy_c45_read_lpa(struct phy_device *phydev); -- cgit v1.2.3