diff options
| author | Martin KaFai Lau <martin.lau@kernel.org> | 2023-01-23 09:38:12 -0800 |
|---|---|---|
| committer | Martin KaFai Lau <martin.lau@kernel.org> | 2023-01-23 10:00:26 -0800 |
| commit | 74d23931f4096efcdef1bc12cc828585e49e99ce (patch) | |
| tree | 1d83c9253620d168e33cb43ef23cac0d7a0e3ebe /include | |
| parent | 84150795a49ae26cf8096517b543f4cd2ed5e87f (diff) | |
| parent | 297a3f1241550f6969f65a5efeee9162241daae5 (diff) | |
Merge branch 'xdp: hints via kfuncs'
Stanislav Fomichev says:
====================
Please see the first patch in the series for the overall
design and use-cases.
See the following email from Toke for the per-packet metadata overhead:
https://lore.kernel.org/bpf/20221206024554.3826186-1-sdf@google.com/T/#m49d48ea08d525ec88360c7d14c4d34fb0e45e798
Recent changes:
- Keep new functions in en/xdp.c, do 'extern mlx5_xdp_metadata_ops' (Tariq)
- Remove mxbuf pointer and use xsk_buff_to_mxbuf (Tariq)
- Clarify xdp_buff vs 'XDP frame' (Jesper)
- Explicitly mention that AF_XDP RX descriptor lacks metadata size (Jesper)
- Drop libbpf_flags/xdp_flags from selftests and use ifindex instead
of ifname (due to recent xsk.h refactoring)
Prior art (to record pros/cons for different approaches):
- Stable UAPI approach:
https://lore.kernel.org/bpf/20220628194812.1453059-1-alexandr.lobakin@intel.com/
- Metadata+BTF_ID appoach:
https://lore.kernel.org/bpf/166256538687.1434226.15760041133601409770.stgit@firesoul/
- v7:
https://lore.kernel.org/bpf/20230112003230.3779451-1-sdf@google.com/
- v6:
https://lore.kernel.org/bpf/20230104215949.529093-1-sdf@google.com/
- v5:
https://lore.kernel.org/bpf/20221220222043.3348718-1-sdf@google.com/
- v4:
https://lore.kernel.org/bpf/20221213023605.737383-1-sdf@google.com/
- v3:
https://lore.kernel.org/bpf/20221206024554.3826186-1-sdf@google.com/
- v2:
https://lore.kernel.org/bpf/20221121182552.2152891-1-sdf@google.com/
- v1:
https://lore.kernel.org/bpf/20221115030210.3159213-1-sdf@google.com/
- kfuncs v2 RFC:
https://lore.kernel.org/bpf/20221027200019.4106375-1-sdf@google.com/
- kfuncs v1 RFC:
https://lore.kernel.org/bpf/20221104032532.1615099-1-sdf@google.com/
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Stanislav Fomichev (13):
bpf: Document XDP RX metadata
bpf: Rename bpf_{prog,map}_is_dev_bound to is_offloaded
bpf: Move offload initialization into late_initcall
bpf: Reshuffle some parts of bpf/offload.c
bpf: Introduce device-bound XDP programs
selftests/bpf: Update expected test_offload.py messages
bpf: XDP metadata RX kfuncs
veth: Introduce veth_xdp_buff wrapper for xdp_buff
veth: Support RX XDP metadata
selftests/bpf: Verify xdp_metadata xdp->af_xdp path
net/mlx4_en: Introduce wrapper for xdp_buff
net/mlx4_en: Support RX XDP metadata
selftests/bpf: Simple program to dump XDP RX metadata
====================
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/bpf.h | 61 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 8 | ||||
| -rw-r--r-- | include/net/xdp.h | 21 | ||||
| -rw-r--r-- | include/net/xsk_buff_pool.h | 5 | ||||
| -rw-r--r-- | include/uapi/linux/bpf.h | 5 |
5 files changed, 92 insertions, 8 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ae7771c7d750..ad4bb36d4c10 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1261,7 +1261,8 @@ struct bpf_prog_aux { enum bpf_prog_type saved_dst_prog_type; enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ - bool offload_requested; + bool dev_bound; /* Program is bound to the netdev. */ + bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; bool sleepable; @@ -2451,7 +2452,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -void bpf_prog_offload_destroy(struct bpf_prog *prog); +void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); @@ -2479,14 +2480,26 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) -int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux); +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id); +int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog); +void bpf_dev_bound_netdev_unregister(struct net_device *dev); static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) { + return aux->dev_bound; +} + +static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) +{ return aux->offload_requested; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs); + +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); } @@ -2507,18 +2520,50 @@ void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else -static inline int bpf_prog_offload_init(struct bpf_prog *prog, - union bpf_attr *attr) +static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux) +{ + return -EOPNOTSUPP; +} + +static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, + u32 func_id) +{ + return NULL; +} + +static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, + union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, + struct bpf_prog *old_prog) { return -EOPNOTSUPP; } -static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) +{ +} + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return false; +} + +static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) +{ + return false; +} + +static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs) { return false; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..90f2be194bc5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -74,6 +74,7 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_md; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, @@ -1618,6 +1619,11 @@ struct net_device_ops { bool cycles); }; +struct xdp_metadata_ops { + int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); + int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash); +}; + /** * enum netdev_priv_flags - &struct net_device priv_flags * @@ -1801,6 +1807,7 @@ enum netdev_ml_priv_type { * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions + * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -2050,6 +2057,7 @@ struct net_device { unsigned int flags; unsigned long long priv_flags; const struct net_device_ops *netdev_ops; + const struct xdp_metadata_ops *xdp_metadata_ops; int ifindex; unsigned short gflags; unsigned short hard_header_len; diff --git a/include/net/xdp.h b/include/net/xdp.h index 55dbc68bfffc..91292aa13bc0 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -409,4 +409,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +#define XDP_METADATA_KFUNC_xxx \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ + bpf_xdp_metadata_rx_timestamp) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ + bpf_xdp_metadata_rx_hash) \ + +enum { +#define XDP_METADATA_KFUNC(name, _) name, +XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC +MAX_XDP_METADATA_KFUNC, +}; + +#ifdef CONFIG_NET +u32 bpf_xdp_metadata_kfunc_id(int id); +bool bpf_dev_bound_kfunc_id(u32 btf_id); +#else +static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } +static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } +#endif + #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index f787c3f524b0..3e952e569418 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -19,8 +19,11 @@ struct xdp_sock; struct device; struct page; +#define XSK_PRIV_MAX 24 + struct xdp_buff_xsk { struct xdp_buff xdp; + u8 cb[XSK_PRIV_MAX]; dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; @@ -28,6 +31,8 @@ struct xdp_buff_xsk { struct list_head free_list_node; }; +#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) + struct xsk_dma_map { dma_addr_t *dma_pages; struct device *dev; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adae5b168f9d..ba0f0cfb5e42 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1156,6 +1156,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ |
