From 81f6bf81270ce1052b5cd4d60b9edc40cd5ceefa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 26 Jul 2017 17:32:07 -0700 Subject: bpf: testing: fix devmap tests Apparently through one of my revisions of the initial patches series I lost the devmap test. We can add more testing later but for now lets fix the simple one we have. Fixes: 546ac1ffb70d "bpf: add devmap, a map for storing net device references" Reported-by: Jakub Kicinski Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ce2988be4f0e..1579cab49717 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { -- cgit v1.2.3 From 996139e801fd145bc44b70b4f4bfa621d626f948 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 7 Aug 2017 13:14:42 -0700 Subject: selftests: bpf: add a test for XDP redirect Add test for xdp_redirect by creating two namespaces with two veth peers, then forward packets in-between. Signed-off-by: William Tu Cc: Daniel Borkmann Cc: John Fastabend Acked-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 3 +- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/test_xdp_redirect.c | 28 ++++++++++++ tools/testing/selftests/bpf/test_xdp_redirect.sh | 54 ++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_xdp_redirect.c create mode 100755 tools/testing/selftests/bpf/test_xdp_redirect.sh (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1579cab49717..8d9bfcca3fe4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -592,7 +592,8 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 153c3a181a4c..3c2e67da4b41 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,9 +15,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o + test_pkt_md_access.o test_xdp_redirect.o -TEST_PROGS := test_kmod.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh include ../lib.mk diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c new file mode 100644 index 000000000000..ef9e704be140 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2017 VMware + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("redirect_to_111") +int xdp_redirect_to_111(struct xdp_md *xdp) +{ + return bpf_redirect(111, 0); +} +SEC("redirect_to_222") +int xdp_redirect_to_222(struct xdp_md *xdp) +{ + return bpf_redirect(222, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh new file mode 100755 index 000000000000..d8c73ed6e040 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# Create 2 namespaces with two veth peers, and +# forward packets in-between using generic XDP +# +# NS1(veth11) NS2(veth22) +# | | +# | | +# (veth1, ------ (veth2, +# id:111) id:222) +# | xdp forwarding | +# ------------------ + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_xdp_redirect [PASS]"; + else + echo "selftests: test_xdp_redirect [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +set -e + +ip netns add ns1 +ip netns add ns2 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 index 111 type veth peer name veth11 +ip link add veth2 index 222 type veth peer name veth22 + +ip link set veth11 netns ns1 +ip link set veth22 netns ns2 + +ip link set veth1 up +ip link set veth2 up + +ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11 +ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22 + +ip netns exec ns1 ip link set dev veth11 up +ip netns exec ns2 ip link set dev veth22 up + +ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222 +ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111 + +ip netns exec ns1 ping -c 1 10.1.1.22 +ip netns exec ns2 ping -c 1 10.1.1.11 + +exit 0 -- cgit v1.2.3 From 92b31a9af73b3a3fc801899335d6c47966351830 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Aug 2017 01:39:55 +0200 Subject: bpf: add BPF_J{LT,LE,SLT,SLE} instructions Currently, eBPF only understands BPF_JGT (>), BPF_JGE (>=), BPF_JSGT (s>), BPF_JSGE (s>=) instructions, this means that particularly *JLT/*JLE counterparts involving immediates need to be rewritten from e.g. X < [IMM] by swapping arguments into [IMM] > X, meaning the immediate first is required to be loaded into a register Y := [IMM], such that then we can compare with Y > X. Note that the destination operand is always required to be a register. This has the downside of having unnecessarily increased register pressure, meaning complex program would need to spill other registers temporarily to stack in order to obtain an unused register for the [IMM]. Loading to registers will thus also affect state pruning since we need to account for that register use and potentially those registers that had to be spilled/filled again. As a consequence slightly more stack space might have been used due to spilling, and BPF programs are a bit longer due to extra code involving the register load and potentially required spill/fills. Thus, add BPF_JLT (<), BPF_JLE (<=), BPF_JSLT (s<), BPF_JSLE (s<=) counterparts to the eBPF instruction set. Modifying LLVM to remove the NegateCC() workaround in a PoC patch at [1] and allowing it to also emit the new instructions resulted in cilium's BPF programs that are injected into the fast-path to have a reduced program length in the range of 2-3% (e.g. accumulated main and tail call sections from one of the object file reduced from 4864 to 4729 insns), reduced complexity in the range of 10-30% (e.g. accumulated sections reduced in one of the cases from 116432 to 88428 insns), and reduced stack usage in the range of 1-5% (e.g. accumulated sections from one of the object files reduced from 824 to 784b). The modification for LLVM will be incorporated in a backwards compatible way. Plan is for LLVM to have i) a target specific option to offer a possibility to explicitly enable the extension by the user (as we have with -m target specific extensions today for various CPU insns), and ii) have the kernel checked for presence of the extensions and enable them transparently when the user is selecting more aggressive options such as -march=native in a bpf target context. (Other frontends generating BPF byte code, e.g. ply can probe the kernel directly for its code generation.) [1] https://github.com/borkmann/llvm/tree/bpf-insns Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 4 + include/uapi/linux/bpf.h | 5 + kernel/bpf/core.c | 60 ++++++ lib/test_bpf.c | 364 ++++++++++++++++++++++++++++++++++++ net/core/filter.c | 21 ++- tools/include/uapi/linux/bpf.h | 5 + 6 files changed, 455 insertions(+), 4 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index d0fdba7d66e2..6a0df8df6c43 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -906,6 +906,10 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: BPF_JSGE 0x70 /* eBPF only: signed '>=' */ BPF_CALL 0x80 /* eBPF only: function call */ BPF_EXIT 0x90 /* eBPF only: function return */ + BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ + BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ + BPF_JSLT 0xc0 /* eBPF only: signed '<' */ + BPF_JSLE 0xd0 /* eBPF only: signed '<=' */ So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF and eBPF. There are only two registers in classic BPF, so it means A += X. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d06be1569b1..91da8371a2d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad5f55922a13..c69e7f5bfde7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -595,9 +595,13 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP | BPF_JSET | BPF_K: /* Accommodate for extra offset in case of a backjump. */ off = from->off; @@ -833,12 +837,20 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, + [BPF_JMP | BPF_JLT | BPF_X] = &&JMP_JLT_X, + [BPF_JMP | BPF_JLT | BPF_K] = &&JMP_JLT_K, [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, + [BPF_JMP | BPF_JLE | BPF_X] = &&JMP_JLE_X, + [BPF_JMP | BPF_JLE | BPF_K] = &&JMP_JLE_K, [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, + [BPF_JMP | BPF_JSLT | BPF_X] = &&JMP_JSLT_X, + [BPF_JMP | BPF_JSLT | BPF_K] = &&JMP_JSLT_K, [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, + [BPF_JMP | BPF_JSLE | BPF_X] = &&JMP_JSLE_X, + [BPF_JMP | BPF_JSLE | BPF_K] = &&JMP_JSLE_K, [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, /* Program return */ @@ -1073,6 +1085,18 @@ out: CONT_JMP; } CONT; + JMP_JLT_X: + if (DST < SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JLT_K: + if (DST < IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; JMP_JGE_X: if (DST >= SRC) { insn += insn->off; @@ -1085,6 +1109,18 @@ out: CONT_JMP; } CONT; + JMP_JLE_X: + if (DST <= SRC) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JLE_K: + if (DST <= IMM) { + insn += insn->off; + CONT_JMP; + } + CONT; JMP_JSGT_X: if (((s64) DST) > ((s64) SRC)) { insn += insn->off; @@ -1097,6 +1133,18 @@ out: CONT_JMP; } CONT; + JMP_JSLT_X: + if (((s64) DST) < ((s64) SRC)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSLT_K: + if (((s64) DST) < ((s64) IMM)) { + insn += insn->off; + CONT_JMP; + } + CONT; JMP_JSGE_X: if (((s64) DST) >= ((s64) SRC)) { insn += insn->off; @@ -1109,6 +1157,18 @@ out: CONT_JMP; } CONT; + JMP_JSLE_X: + if (((s64) DST) <= ((s64) SRC)) { + insn += insn->off; + CONT_JMP; + } + CONT; + JMP_JSLE_K: + if (((s64) DST) <= ((s64) IMM)) { + insn += insn->off; + CONT_JMP; + } + CONT; JMP_JSET_X: if (DST & SRC) { insn += insn->off; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index d9d5a410955c..aa8812ae6776 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -951,6 +951,32 @@ static struct bpf_test tests[] = { { 4, 4, 4, 3, 3 }, { { 2, 0 }, { 3, 1 }, { 4, MAX_K } }, }, + { + "JGE (jt 0), test 1", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 4, 4, 4, 3, 3 }, + { { 2, 0 }, { 3, 1 }, { 4, 1 } }, + }, + { + "JGE (jt 0), test 2", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 4, 4, 5, 3, 3 }, + { { 4, 1 }, { 5, 1 }, { 6, MAX_K } }, + }, { "JGE", .u.insns = { @@ -4492,6 +4518,35 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JSLT | BPF_K */ + { + "JMP_JSLT_K: Signed jump: if (-2 < -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xfffffffffffffffeLL), + BPF_JMP_IMM(BPF_JSLT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLT_K: Signed jump: if (-1 < -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSLT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JSGT | BPF_K */ { "JMP_JSGT_K: Signed jump: if (-1 > -2) return 1", @@ -4521,6 +4576,73 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JSLE | BPF_K */ + { + "JMP_JSLE_K: Signed jump: if (-2 <= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xfffffffffffffffeLL), + BPF_JMP_IMM(BPF_JSLE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLE_K: Signed jump: if (-1 <= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSLE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLE_K: Signed jump: value walk 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 6), + BPF_ALU64_IMM(BPF_SUB, R1, 1), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 4), + BPF_ALU64_IMM(BPF_SUB, R1, 1), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 2), + BPF_ALU64_IMM(BPF_SUB, R1, 1), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLE_K: Signed jump: value walk 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 4), + BPF_ALU64_IMM(BPF_SUB, R1, 2), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 2), + BPF_ALU64_IMM(BPF_SUB, R1, 2), + BPF_JMP_IMM(BPF_JSLE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JSGE | BPF_K */ { "JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1", @@ -4617,6 +4739,35 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JLT | BPF_K */ + { + "JMP_JLT_K: if (2 < 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 2), + BPF_JMP_IMM(BPF_JLT, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGT_K: Unsigned jump: if (1 < -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 1), + BPF_JMP_IMM(BPF_JLT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGE | BPF_K */ { "JMP_JGE_K: if (3 >= 2) return 1", @@ -4632,6 +4783,21 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JLE | BPF_K */ + { + "JMP_JLE_K: if (2 <= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 2), + BPF_JMP_IMM(BPF_JLE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGT | BPF_K jump backwards */ { "JMP_JGT_K: if (3 > 2) return 1 (jump backwards)", @@ -4662,6 +4828,36 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JLT | BPF_K jump backwards */ + { + "JMP_JGT_K: if (2 < 3) return 1 (jump backwards)", + .u.insns_int = { + BPF_JMP_IMM(BPF_JA, 0, 0, 2), /* goto start */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* out: */ + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), /* start: */ + BPF_LD_IMM64(R1, 2), /* note: this takes 2 insns */ + BPF_JMP_IMM(BPF_JLT, R1, 3, -6), /* goto out */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JLE_K: if (3 <= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JLE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JNE | BPF_K */ { "JMP_JNE_K: if (3 != 2) return 1", @@ -4752,6 +4948,37 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JSLT | BPF_X */ + { + "JMP_JSLT_X: Signed jump: if (-2 < -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSLT, R2, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLT_X: Signed jump: if (-1 < -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSLT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JSGE | BPF_X */ { "JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1", @@ -4783,6 +5010,37 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JSLE | BPF_X */ + { + "JMP_JSLE_X: Signed jump: if (-2 <= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSLE, R2, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSLE_X: Signed jump: if (-1 <= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSLE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGT | BPF_X */ { "JMP_JGT_X: if (3 > 2) return 1", @@ -4814,6 +5072,37 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JLT | BPF_X */ + { + "JMP_JLT_X: if (2 < 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JLT, R2, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JLT_X: Unsigned jump: if (1 < -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, 1), + BPF_JMP_REG(BPF_JLT, R2, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGE | BPF_X */ { "JMP_JGE_X: if (3 >= 2) return 1", @@ -4845,6 +5134,37 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP | BPF_JLE | BPF_X */ + { + "JMP_JLE_X: if (2 <= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JLE, R2, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JLE_X: if (3 <= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JLE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, { /* Mainly testing JIT + imm64 here. */ "JMP_JGE_X: ldimm64 test 1", @@ -4890,6 +5210,50 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "JMP_JLE_X: ldimm64 test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JLE, R2, R1, 2), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xeeeeeeeeU } }, + }, + { + "JMP_JLE_X: ldimm64 test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JLE, R2, R1, 0), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffffU } }, + }, + { + "JMP_JLE_X: ldimm64 test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JLE, R2, R1, 4), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JNE | BPF_X */ { "JMP_JNE_X: if (3 != 2) return 1", diff --git a/net/core/filter.c b/net/core/filter.c index 78d00933dbe7..5afe3ac191ec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -514,14 +514,27 @@ do_pass: break; } - /* Convert JEQ into JNE when 'jump_true' is next insn. */ - if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { - insn->code = BPF_JMP | BPF_JNE | bpf_src; + /* Convert some jumps when 'jump_true' is next insn. */ + if (fp->jt == 0) { + switch (BPF_OP(fp->code)) { + case BPF_JEQ: + insn->code = BPF_JMP | BPF_JNE | bpf_src; + break; + case BPF_JGT: + insn->code = BPF_JMP | BPF_JLE | bpf_src; + break; + case BPF_JGE: + insn->code = BPF_JMP | BPF_JLT | bpf_src; + break; + default: + goto jmp_rest; + } + target = i + fp->jf + 1; BPF_EMIT_JMP; break; } - +jmp_rest: /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8d9bfcca3fe4..bf3b2e230455 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ -- cgit v1.2.3 From 69e8cc134bcbf0ccfcf852c400b8e6788d1d0038 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:33:32 -0700 Subject: bpf: sockmap sample program This program binds a program to a cgroup and then matches hard coded IP addresses and adds these to a sockmap. This will receive messages from the backend and send them to the client. client:X <---> frontend:10000 client:X <---> backend:10001 To keep things simple this is only designed for 1:1 connections using hard coded values. A more complete example would allow many backends and clients. To run, # sockmap Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- samples/bpf/bpf_load.c | 8 +- samples/sockmap/Makefile | 78 ++++++++ samples/sockmap/sockmap_kern.c | 110 ++++++++++++ samples/sockmap/sockmap_user.c | 286 ++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 46 ++++- tools/lib/bpf/bpf.c | 14 +- tools/lib/bpf/bpf.h | 4 + tools/testing/selftests/bpf/bpf_helpers.h | 7 + 8 files changed, 547 insertions(+), 6 deletions(-) create mode 100644 samples/sockmap/Makefile create mode 100644 samples/sockmap/sockmap_kern.c create mode 100644 samples/sockmap/sockmap_user.c (limited to 'tools/include/uapi/linux') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 899f40310bc3..a8552b8a2ab6 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -65,6 +65,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; bool is_sockops = strncmp(event, "sockops", 7) == 0; + bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -92,6 +93,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_CGROUP_SOCK; } else if (is_sockops) { prog_type = BPF_PROG_TYPE_SOCK_OPS; + } else if (is_sk_skb) { + prog_type = BPF_PROG_TYPE_SK_SKB; } else { printf("Unknown event '%s'\n", event); return -1; @@ -109,7 +112,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket || is_sockops) { + if (is_socket || is_sockops || is_sk_skb) { if (is_socket) event += 6; else @@ -567,7 +570,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0 || memcmp(shname, "cgroup/", 7) == 0 || - memcmp(shname, "sockops", 7) == 0) { + memcmp(shname, "sockops", 7) == 0 || + memcmp(shname, "sk_skb", 6) == 0) { ret = load_and_attach(shname, data->d_buf, data->d_size); if (ret != 0) diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile new file mode 100644 index 000000000000..9291ab8e0f8c --- /dev/null +++ b/samples/sockmap/Makefile @@ -0,0 +1,78 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-y := sockmap + +# Libbpf dependencies +LIBBPF := ../../tools/lib/bpf/bpf.o + +HOSTCFLAGS += -I$(objtree)/usr/include +HOSTCFLAGS += -I$(srctree)/tools/lib/ +HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include +HOSTCFLAGS += -I$(srctree)/tools/perf + +sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o + +# Tell kbuild to always build the programs +always := $(hostprogs-y) +always += sockmap_kern.o + +HOSTLOADLIBES_sockmap += -lelf -lpthread + +# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: +# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang +LLC ?= llc +CLANG ?= clang + +# Trick to allow make to be run from this directory +all: + $(MAKE) -C ../../ $(CURDIR)/ + +clean: + $(MAKE) -C ../../ M=$(CURDIR) clean + @rm -f *~ + +$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c + $(call if_changed_dep,cc_s_c) + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +clean-files += syscall_nrs.h + +FORCE: + + +# Verify LLVM compiler tools are available and bpf target is supported by llc +.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) + +verify_cmds: $(CLANG) $(LLC) + @for TOOL in $^ ; do \ + if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ + echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ + exit 1; \ + else true; fi; \ + done + +verify_target_bpf: verify_cmds + @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ + echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ + echo " NOTICE: LLVM version >= 3.7.1 required" ;\ + exit 2; \ + else true; fi + +$(src)/*.c: verify_target_bpf + +# asm/sysreg.h - inline assembly used by it is incompatible with llvm. +# But, there is no easy way to fix it, so just exclude it since it is +# useless for BPF samples. +$(obj)/%.o: $(src)/%.c + $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ + -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ + -Wno-compare-distinct-pointer-types \ + -Wno-gnu-variable-sized-type-not-at-end \ + -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option \ + -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c new file mode 100644 index 000000000000..6ff986f7059b --- /dev/null +++ b/samples/sockmap/sockmap_kern.c @@ -0,0 +1,110 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "../../tools/testing/selftests/bpf/bpf_helpers.h" +#include "../../tools/testing/selftests/bpf/bpf_endian.h" + +/* Sockmap sample program connects a client and a backend together + * using cgroups. + * + * client:X <---> frontend:80 client:X <---> backend:80 + * + * For simplicity we hard code values here and bind 1:1. The hard + * coded values are part of the setup in sockmap.sh script that + * is associated with this BPF program. + * + * The bpf_printk is verbose and prints information as connections + * are established and verdicts are decided. + */ + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") sock_map = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +SEC("sk_skb1") +int bpf_prog1(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb2") +int bpf_prog2(struct __sk_buff *skb) +{ + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + int ret = 0; + + if (lport == 10000) + ret = 10; + else + ret = 1; + + bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); + return bpf_sk_redirect_map(&sock_map, ret, 0); +} + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 lport, rport; + int op, err = 0, index, key, ret; + + + op = (int) skops->op; + + switch (op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (lport == 10000) { + ret = 1; + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST, + BPF_SOCKMAP_STRPARSER); + bpf_printk("passive(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + lport = skops->local_port; + rport = skops->remote_port; + + if (bpf_ntohl(rport) == 10001) { + ret = 10; + err = bpf_sock_map_update(skops, &sock_map, &ret, + BPF_NOEXIST, + BPF_SOCKMAP_STRPARSER); + bpf_printk("active(%i -> %i) map ctx update err: %d\n", + lport, bpf_ntohl(rport), err); + } + break; + default: + break; + } + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c new file mode 100644 index 000000000000..fb78f5abefb4 --- /dev/null +++ b/samples/sockmap/sockmap_user.c @@ -0,0 +1,286 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../bpf/bpf_load.h" +#include "../bpf/bpf_util.h" +#include "../bpf/libbpf.h" + +int running; +void running_handler(int a); + +/* randomly selected ports for testing on lo */ +#define S1_PORT 10000 +#define S2_PORT 10001 + +static int sockmap_test_sockets(int rate, int dot) +{ + int i, sc, err, max_fd, one = 1; + int s1, s2, c1, c2, p1, p2; + struct sockaddr_in addr; + struct timeval timeout; + char buf[1024] = {0}; + int *fds[4] = {&s1, &s2, &c1, &c2}; + fd_set w; + + s1 = s2 = p1 = p2 = c1 = c2 = 0; + + /* Init sockets */ + for (i = 0; i < 4; i++) { + *fds[i] = socket(AF_INET, SOCK_STREAM, 0); + if (*fds[i] < 0) { + perror("socket s1 failed()"); + err = *fds[i]; + goto out; + } + } + + /* Allow reuse */ + for (i = 0; i < 2; i++) { + err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + if (err) { + perror("setsockopt failed()"); + goto out; + } + } + + /* Non-blocking sockets */ + for (i = 0; i < 4; i++) { + err = ioctl(*fds[i], FIONBIO, (char *)&one); + if (err < 0) { + perror("ioctl s1 failed()"); + goto out; + } + } + + /* Bind server sockets */ + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + + addr.sin_port = htons(S1_PORT); + err = bind(s1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = bind(s2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + perror("bind s2 failed()\n"); + goto out; + } + + /* Listen server sockets */ + addr.sin_port = htons(S1_PORT); + err = listen(s1, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = listen(s2, 32); + if (err < 0) { + perror("listen s1 failed()\n"); + goto out; + } + + /* Initiate Connect */ + addr.sin_port = htons(S1_PORT); + err = connect(c1, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c1 failed()\n"); + goto out; + } + + addr.sin_port = htons(S2_PORT); + err = connect(c2, (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0 && errno != EINPROGRESS) { + perror("connect c2 failed()\n"); + goto out; + } + + /* Accept Connecrtions */ + p1 = accept(s1, NULL, NULL); + if (p1 < 0) { + perror("accept s1 failed()\n"); + goto out; + } + + p2 = accept(s2, NULL, NULL); + if (p2 < 0) { + perror("accept s1 failed()\n"); + goto out; + } + + max_fd = p2; + timeout.tv_sec = 10; + timeout.tv_usec = 0; + + printf("connected sockets: c1 <-> p1, c2 <-> p2\n"); + printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n", + c1, s1, c2, s2); + + /* Ping/Pong data from client to server */ + sc = send(c1, buf, sizeof(buf), 0); + if (sc < 0) { + perror("send failed()\n"); + goto out; + } + + do { + int s, rc, i; + + /* FD sets */ + FD_ZERO(&w); + FD_SET(c1, &w); + FD_SET(c2, &w); + FD_SET(p1, &w); + FD_SET(p2, &w); + + s = select(max_fd + 1, &w, NULL, NULL, &timeout); + if (s == -1) { + perror("select()"); + break; + } else if (!s) { + fprintf(stderr, "unexpected timeout\n"); + break; + } + + for (i = 0; i <= max_fd && s > 0; ++i) { + if (!FD_ISSET(i, &w)) + continue; + + s--; + + rc = recv(i, buf, sizeof(buf), 0); + if (rc < 0) { + if (errno != EWOULDBLOCK) { + perror("recv failed()\n"); + break; + } + } + + if (rc == 0) { + close(i); + break; + } + + sc = send(i, buf, rc, 0); + if (sc < 0) { + perror("send failed()\n"); + break; + } + } + sleep(rate); + if (dot) { + printf("."); + fflush(stdout); + + } + } while (running); + +out: + close(s1); + close(s2); + close(p1); + close(p2); + close(c1); + close(c2); + return err; +} + +int main(int argc, char **argv) +{ + int rate = 1, dot = 1; + char filename[256]; + int err, cg_fd; + char *cg_path; + + cg_path = argv[argc - 1]; + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + running = 1; + + /* catch SIGINT */ + signal(SIGINT, running_handler); + + if (load_bpf_file(filename)) { + fprintf(stderr, "load_bpf_file: (%s) %s\n", + filename, strerror(errno)); + return 1; + } + + /* Cgroup configuration */ + cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n", + cg_fd, cg_path); + return cg_fd; + } + + /* Attach programs to sockmap */ + err = __bpf_prog_attach(prog_fd[0], prog_fd[1], map_fd[0], + BPF_CGROUP_SMAP_INGRESS, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + + /* Attach to cgroups */ + err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", + err, strerror(errno)); + return err; + } + + err = sockmap_test_sockets(rate, dot); + if (err) { + fprintf(stderr, "ERROR: test socket failed: %d\n", err); + return err; + } + return 0; +} + +void running_handler(int a) +{ + running = 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf3b2e230455..2d97dd27c8f6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -110,6 +110,7 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, }; enum bpf_prog_type { @@ -127,6 +128,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, }; enum bpf_attach_type { @@ -134,11 +136,18 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_SMAP_INGRESS, __MAX_BPF_ATTACH_TYPE }; #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +enum bpf_sockmap_flags { + BPF_SOCKMAP_UNSPEC, + BPF_SOCKMAP_STRPARSER, + __MAX_BPF_SOCKMAP_FLAG +}; + /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -210,6 +219,7 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -545,6 +555,23 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_sk_redirect_map(map, key, flags) + * Redirect skb to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_REDIRECT + * + * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * @skops: pointer to bpf_sock_ops + * @map: pointer to sockmap to update + * @key: key to insert/update sock in map + * @flags: same flags as map update elem + * @map_flags: sock map specific flags + * bit 1: Enable strparser + * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -598,7 +625,9 @@ union bpf_attr { FN(set_hash), \ FN(setsockopt), \ FN(skb_adjust_room), \ - FN(redirect_map), + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -675,6 +704,15 @@ struct __sk_buff { __u32 data; __u32 data_end; __u32 napi_id; + + /* accessed by BPF_PROG_TYPE_sk_skb types */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; struct bpf_tunnel_key { @@ -734,6 +772,12 @@ struct xdp_md { __u32 data_end; }; +enum sk_action { + SK_ABORTED = 0, + SK_DROP, + SK_REDIRECT, +}; + #define BPF_TAG_SIZE 8 struct bpf_prog_info { diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index e5bbb090bf88..77660157a684 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -211,20 +211,28 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, - unsigned int flags) +int __bpf_prog_attach(int prog_fd1, int prog_fd2, int target_fd, + enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; bzero(&attr, sizeof(attr)); attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd; + attr.attach_bpf_fd = prog_fd1; + attr.attach_bpf_fd2 = prog_fd2; attr.attach_type = type; attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) +{ + return __bpf_prog_attach(prog_fd, 0, target_fd, type, flags); +} + int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 418c86e69bcb..eaee585c1cea 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -50,6 +50,10 @@ int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +int __bpf_prog_attach(int prog1, int prog2, + int attachable_fd, + enum bpf_attach_type type, + unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index acbd60519467..73092d4a898e 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,6 +65,13 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; +static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = + (void *) BPF_FUNC_sk_redirect_map; +static int (*bpf_sock_map_update)(void *map, void *key, void *value, + unsigned long long flags, + unsigned long long map_lags) = + (void *) BPF_FUNC_sock_map_update; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From ad17d0e6c708805bf9e6686eb747cc528b702e67 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 18 Aug 2017 11:28:01 -0700 Subject: bpf: Allow numa selection in INNER_LRU_HASH_PREALLOC test of map_perf_test This patch makes the needed changes to allow each process of the INNER_LRU_HASH_PREALLOC test to provide its numa node id when creating the lru map. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/bpf_load.c | 21 ++++++++++++-------- samples/bpf/bpf_load.h | 1 + samples/bpf/map_perf_test_kern.c | 2 ++ samples/bpf/map_perf_test_user.c | 12 +++++++++--- tools/include/uapi/linux/bpf.h | 10 +++++++++- tools/lib/bpf/bpf.c | 32 +++++++++++++++++++++++++++---- tools/lib/bpf/bpf.h | 6 ++++++ tools/testing/selftests/bpf/bpf_helpers.h | 1 + 8 files changed, 69 insertions(+), 16 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index a8552b8a2ab6..6aa50098dfb8 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -201,7 +201,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) static int load_maps(struct bpf_map_data *maps, int nr_maps, fixup_map_cb fixup_map) { - int i; + int i, numa_node; for (i = 0; i < nr_maps; i++) { if (fixup_map) { @@ -213,21 +213,26 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, } } + numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? + maps[i].def.numa_node : -1; + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; - map_fd[i] = bpf_create_map_in_map(maps[i].def.type, + map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, - maps[i].def.map_flags); + maps[i].def.map_flags, + numa_node); } else { - map_fd[i] = bpf_create_map(maps[i].def.type, - maps[i].def.key_size, - maps[i].def.value_size, - maps[i].def.max_entries, - maps[i].def.map_flags); + map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].def.key_size, + maps[i].def.value_size, + maps[i].def.max_entries, + maps[i].def.map_flags, + numa_node); } if (map_fd[i] < 0) { printf("failed to create a map: %d %s\n", diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index ca0563d04744..453e3226b4ce 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -13,6 +13,7 @@ struct bpf_map_def { unsigned int max_entries; unsigned int map_flags; unsigned int inner_map_idx; + unsigned int numa_node; }; struct bpf_map_data { diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 245165817fbe..ca3b22ed577a 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -40,6 +40,8 @@ struct bpf_map_def SEC("maps") inner_lru_hash_map = { .key_size = sizeof(u32), .value_size = sizeof(long), .max_entries = MAX_ENTRIES, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0, }; struct bpf_map_def SEC("maps") array_of_lru_hashs = { diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 1a8894b5ac51..bccbf8478e43 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -97,14 +97,20 @@ static void do_test_lru(enum test_type test, int cpu) if (test == INNER_LRU_HASH_PREALLOC) { int outer_fd = map_fd[array_of_lru_hashs_idx]; + unsigned int mycpu, mynode; assert(cpu < MAX_NR_CPUS); if (cpu) { + ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); + assert(!ret); + inner_lru_map_fds[cpu] = - bpf_create_map(BPF_MAP_TYPE_LRU_HASH, - sizeof(uint32_t), sizeof(long), - inner_lru_hash_size, 0); + bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + sizeof(uint32_t), + sizeof(long), + inner_lru_hash_size, 0, + mynode); if (inner_lru_map_fds[cpu] == -1) { printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", strerror(errno), errno); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2d97dd27c8f6..f8f6377fd541 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_sockmap_flags { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +/* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list @@ -176,6 +177,8 @@ enum bpf_sockmap_flags { * across different LRU lists. */ #define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -183,8 +186,13 @@ union bpf_attr { __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ - __u32 map_flags; /* prealloc or not */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 77660157a684..a0717610b116 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -57,8 +57,9 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) +int bpf_create_map_node(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags, + int node) { union bpf_attr attr; @@ -69,12 +70,24 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags) +{ + return bpf_create_map_node(map_type, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node) { union bpf_attr attr; @@ -86,10 +99,21 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } +int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, __u32 map_flags) +{ + return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, + max_entries, map_flags, -1); +} + int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index eaee585c1cea..90e9d4e85d08 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,8 +24,14 @@ #include #include +int bpf_create_map_node(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags, + int node); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); +int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, int inner_map_fd, int max_entries, __u32 map_flags); diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 73092d4a898e..98f3be26d390 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -94,6 +94,7 @@ struct bpf_map_def { unsigned int max_entries; unsigned int map_flags; unsigned int inner_map_idx; + unsigned int numa_node; }; static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = -- cgit v1.2.3 From 464bc0fd6273d518aee79fbd37211dd9bc35d863 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Aug 2017 07:10:04 -0700 Subject: bpf: convert sockmap field attach_bpf_fd2 to type In the initial sockmap API we provided strparser and verdict programs using a single attach command by extending the attach API with a the attach_bpf_fd2 field. However, if we add other programs in the future we will be adding a field for every new possible type, attach_bpf_fd(3,4,..). This seems a bit clumsy for an API. So lets push the programs using two new type fields. BPF_SK_SKB_STREAM_PARSER BPF_SK_SKB_STREAM_VERDICT This has the advantage of having a readable name and can easily be extended in the future. Updates to samples and sockmap included here also generalize tests slightly to support upcoming patch for multiple map support. Signed-off-by: John Fastabend Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 +- include/uapi/linux/bpf.h | 9 +- kernel/bpf/sockmap.c | 25 ++-- kernel/bpf/syscall.c | 38 ++---- samples/sockmap/sockmap_kern.c | 6 +- samples/sockmap/sockmap_user.c | 12 +- tools/include/uapi/linux/bpf.h | 9 +- tools/lib/bpf/bpf.c | 14 +-- tools/lib/bpf/bpf.h | 4 - tools/testing/selftests/bpf/bpf_helpers.h | 3 +- tools/testing/selftests/bpf/sockmap_parse_prog.c | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 2 +- tools/testing/selftests/bpf/test_maps.c | 133 +++++++++------------ 13 files changed, 116 insertions(+), 151 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 830f472d8df5..c2cb1b5c094e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,8 +39,6 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); - int (*map_attach)(struct bpf_map *map, - struct bpf_prog *p1, struct bpf_prog *p2); }; struct bpf_map { @@ -387,11 +385,19 @@ static inline void __dev_map_flush(struct bpf_map *map) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { return NULL; } + +static inline int sock_map_attach_prog(struct bpf_map *map, + struct bpf_prog *prog, + u32 type) +{ + return -EOPNOTSUPP; +} #endif /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 843818dff96d..97227be3690c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -136,7 +136,8 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, - BPF_CGROUP_SMAP_INGRESS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -224,7 +225,6 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; - __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -580,14 +580,11 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_REDIRECT * - * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem - * @map_flags: sock map specific flags - * bit 1: Enable strparser - * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 617c239590c2..cf570d108fd5 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -723,20 +723,24 @@ out: return err; } -static int sock_map_attach_prog(struct bpf_map *map, - struct bpf_prog *parse, - struct bpf_prog *verdict) +int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); - struct bpf_prog *_parse, *_verdict; + struct bpf_prog *orig; - _parse = xchg(&stab->bpf_parse, parse); - _verdict = xchg(&stab->bpf_verdict, verdict); + switch (type) { + case BPF_SK_SKB_STREAM_PARSER: + orig = xchg(&stab->bpf_parse, prog); + break; + case BPF_SK_SKB_STREAM_VERDICT: + orig = xchg(&stab->bpf_verdict, prog); + break; + default: + return -EOPNOTSUPP; + } - if (_parse) - bpf_prog_put(_parse); - if (_verdict) - bpf_prog_put(_verdict); + if (orig) + bpf_prog_put(orig); return 0; } @@ -777,7 +781,6 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, - .map_attach = sock_map_attach_prog, }; BPF_CALL_5(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9378f3ba2cbf..021a05d9d800 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1093,12 +1093,12 @@ static int bpf_obj_get(const union bpf_attr *attr) #ifdef CONFIG_CGROUP_BPF -#define BPF_PROG_ATTACH_LAST_FIELD attach_bpf_fd2 +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags -static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype) +static int sockmap_get_from_fd(const union bpf_attr *attr) { - struct bpf_prog *prog1, *prog2; int ufd = attr->target_fd; + struct bpf_prog *prog; struct bpf_map *map; struct fd f; int err; @@ -1108,29 +1108,16 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype) if (IS_ERR(map)) return PTR_ERR(map); - if (!map->ops->map_attach) { - fdput(f); - return -EOPNOTSUPP; - } - - prog1 = bpf_prog_get_type(attr->attach_bpf_fd, ptype); - if (IS_ERR(prog1)) { + prog = bpf_prog_get_type(attr->attach_bpf_fd, BPF_PROG_TYPE_SK_SKB); + if (IS_ERR(prog)) { fdput(f); - return PTR_ERR(prog1); - } - - prog2 = bpf_prog_get_type(attr->attach_bpf_fd2, ptype); - if (IS_ERR(prog2)) { - fdput(f); - bpf_prog_put(prog1); - return PTR_ERR(prog2); + return PTR_ERR(prog); } - err = map->ops->map_attach(map, prog1, prog2); + err = sock_map_attach_prog(map, prog, attr->attach_type); if (err) { fdput(f); - bpf_prog_put(prog1); - bpf_prog_put(prog2); + bpf_prog_put(prog); return err; } @@ -1165,16 +1152,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_SOCK_OPS: ptype = BPF_PROG_TYPE_SOCK_OPS; break; - case BPF_CGROUP_SMAP_INGRESS: - ptype = BPF_PROG_TYPE_SK_SKB; - break; + case BPF_SK_SKB_STREAM_PARSER: + case BPF_SK_SKB_STREAM_VERDICT: + return sockmap_get_from_fd(attr); default: return -EINVAL; } - if (attr->attach_type == BPF_CGROUP_SMAP_INGRESS) - return sockmap_get_from_fd(attr, ptype); - prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index 6ff986f7059b..f9b38ef82dc2 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -82,8 +82,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops) if (lport == 10000) { ret = 1; err = bpf_sock_map_update(skops, &sock_map, &ret, - BPF_NOEXIST, - BPF_SOCKMAP_STRPARSER); + BPF_NOEXIST); bpf_printk("passive(%i -> %i) map ctx update err: %d\n", lport, bpf_ntohl(rport), err); } @@ -95,8 +94,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops) if (bpf_ntohl(rport) == 10001) { ret = 10; err = bpf_sock_map_update(skops, &sock_map, &ret, - BPF_NOEXIST, - BPF_SOCKMAP_STRPARSER); + BPF_NOEXIST); bpf_printk("active(%i -> %i) map ctx update err: %d\n", lport, bpf_ntohl(rport), err); } diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c index fb78f5abefb4..7cc9d228216f 100644 --- a/samples/sockmap/sockmap_user.c +++ b/samples/sockmap/sockmap_user.c @@ -256,8 +256,16 @@ int main(int argc, char **argv) } /* Attach programs to sockmap */ - err = __bpf_prog_attach(prog_fd[0], prog_fd[1], map_fd[0], - BPF_CGROUP_SMAP_INGRESS, 0); + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + + err = bpf_prog_attach(prog_fd[1], map_fd[0], + BPF_SK_SKB_STREAM_VERDICT, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", err, strerror(errno)); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f8f6377fd541..09ac590eefb1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -136,7 +136,8 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, - BPF_CGROUP_SMAP_INGRESS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -227,7 +228,6 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; - __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -572,14 +572,11 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_REDIRECT * - * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem - * @map_flags: sock map specific flags - * bit 1: Enable strparser - * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a0717610b116..1d6907d379c9 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -235,28 +235,20 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int __bpf_prog_attach(int prog_fd1, int prog_fd2, int target_fd, - enum bpf_attach_type type, - unsigned int flags) +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; bzero(&attr, sizeof(attr)); attr.target_fd = target_fd; - attr.attach_bpf_fd = prog_fd1; - attr.attach_bpf_fd2 = prog_fd2; + attr.attach_bpf_fd = prog_fd; attr.attach_type = type; attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, - unsigned int flags) -{ - return __bpf_prog_attach(prog_fd, 0, target_fd, type, flags); -} - int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 90e9d4e85d08..b8ea5843c39e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -56,10 +56,6 @@ int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); -int __bpf_prog_attach(int prog1, int prog2, - int attachable_fd, - enum bpf_attach_type type, - unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 98f3be26d390..36fb9161b34a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -68,8 +68,7 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, - unsigned long long flags, - unsigned long long map_lags) = + unsigned long long flags) = (void *) BPF_FUNC_sock_map_update; diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c index 8b5453158399..710f43f42dc4 100644 --- a/tools/testing/selftests/bpf/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c @@ -30,7 +30,7 @@ int bpf_prog1(struct __sk_buff *skb) */ d[0] = 1; - bpf_printk("data[0] = (%u): local_port %i remote %i\n", + bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n", d[0], lport, bpf_ntohl(rport)); return skb->len; } diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index d5f9447b3808..0573c1db2519 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -40,7 +40,7 @@ int bpf_prog2(struct __sk_buff *skb) d[6] = 0xe; d[7] = 0xf; - bpf_printk("data[0] = (%u): local_port %i remote %i\n", + bpf_printk("verdict: data[0] = (%u): local_port %i remote %i redirect 5\n", d[0], lport, bpf_ntohl(rport)); return bpf_sk_redirect_map(&sock_map, 5, 0); } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 40b2d1faf02b..6df6e6257424 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -547,20 +547,26 @@ static void test_sockmap(int task, void *data) goto out_sockmap; } - /* Nothing attached so these should fail */ + /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (!err) { - printf("Failed invalid update sockmap '%i:%i'\n", + if (err) { + printf("Failed noprog update sockmap '%i:%i'\n", i, sfd[i]); goto out_sockmap; } } /* Test attaching bad fds */ - err = __bpf_prog_attach(-1, -2, fd, BPF_CGROUP_SMAP_INGRESS, 0); + err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0); if (!err) { - printf("Failed invalid prog attach\n"); + printf("Failed invalid parser prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!err) { + printf("Failed invalid verdict prog attach\n"); goto out_sockmap; } @@ -591,14 +597,21 @@ static void test_sockmap(int task, void *data) goto out_sockmap; } - err = __bpf_prog_attach(parse_prog, verdict_prog, map_fd, - BPF_CGROUP_SMAP_INGRESS, 0); + err = bpf_prog_attach(parse_prog, map_fd, + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + printf("Failed bpf prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(verdict_prog, map_fd, + BPF_SK_SKB_STREAM_VERDICT, 0); if (err) { printf("Failed bpf prog attach\n"); goto out_sockmap; } - /* Test map update elem */ + /* Test map update elem afterwards fd lives in fd and map_fd */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_ANY); if (err) { @@ -649,96 +662,68 @@ static void test_sockmap(int task, void *data) goto out_sockmap; } - /* Delete the reset of the elems include some NULL elems */ - for (i = 0; i < 6; i++) { - err = bpf_map_delete_elem(map_fd, &i); - if (err && (i == 0 || i == 1 || i >= 4)) { - printf("Failed delete sockmap %i '%i:%i'\n", - err, i, sfd[i]); - goto out_sockmap; - } else if (!err && (i == 2 || i == 3)) { - printf("Failed null delete sockmap %i '%i:%i'\n", - err, i, sfd[i]); - goto out_sockmap; - } - } - - /* Test having multiple SMAPs open and active on same fds */ - err = __bpf_prog_attach(parse_prog, verdict_prog, fd, - BPF_CGROUP_SMAP_INGRESS, 0); - if (err) { - printf("Failed fd bpf prog attach\n"); - goto out_sockmap; - } - - for (i = 0; i < 6; i++) { - err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (err) { - printf("Failed fd update sockmap %i '%i:%i'\n", - err, i, sfd[i]); - goto out_sockmap; - } - } - - /* Test duplicate socket add of NOEXIST, ANY and EXIST */ - i = 0; + /* Push fd into same slot */ + i = 2; err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST); if (!err) { - printf("Failed BPF_NOEXIST create\n"); + printf("Failed allowed sockmap dup slot BPF_NOEXIST\n"); goto out_sockmap; } err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); if (err) { - printf("Failed sockmap update BPF_ANY\n"); + printf("Failed sockmap update new slot BPF_ANY\n"); goto out_sockmap; } err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST); if (err) { - printf("Failed sockmap update BPF_EXIST\n"); + printf("Failed sockmap update new slot BPF_EXIST\n"); goto out_sockmap; } - /* The above were pushing fd into same slot try different slot now */ - i = 2; - err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST); - if (!err) { - printf("Failed BPF_NOEXIST create\n"); - goto out_sockmap; + /* Delete the elems without programs */ + for (i = 0; i < 6; i++) { + err = bpf_map_delete_elem(fd, &i); + if (err) { + printf("Failed delete sockmap %i '%i:%i'\n", + err, i, sfd[i]); + } } - err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); + /* Test having multiple maps open and set with programs on same fds */ + err = bpf_prog_attach(parse_prog, fd, + BPF_SK_SKB_STREAM_PARSER, 0); if (err) { - printf("Failed sockmap update BPF_ANY\n"); + printf("Failed fd bpf parse prog attach\n"); goto out_sockmap; } - - err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST); + err = bpf_prog_attach(verdict_prog, fd, + BPF_SK_SKB_STREAM_VERDICT, 0); if (err) { - printf("Failed sockmap update BPF_EXIST\n"); + printf("Failed fd bpf verdict prog attach\n"); goto out_sockmap; } - /* Try pushing fd into different map, this is not allowed at the - * moment. Which programs would we use? - */ - err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_NOEXIST); - if (!err) { - printf("Failed BPF_NOEXIST create\n"); - goto out_sockmap; - } - - err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_ANY); - if (!err) { - printf("Failed sockmap update BPF_ANY\n"); - goto out_sockmap; - } - - err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_EXIST); - if (!err) { - printf("Failed sockmap update BPF_EXIST\n"); - goto out_sockmap; + for (i = 4; i < 6; i++) { + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); + if (!err) { + printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST); + if (!err) { + printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST); + if (!err) { + printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } } /* Test map close sockets */ -- cgit v1.2.3