From d7626b5acff9227e2a65da636a53e09bdafdc0aa Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 26 May 2020 16:38:34 +0700 Subject: tipc: introduce Gap ACK blocks for broadcast link As achieved through commit 9195948fbf34 ("tipc: improve TIPC throughput by Gap ACK blocks"), we apply the same mechanism for the broadcast link as well. The 'Gap ACK blocks' data field in a 'PROTOCOL/STATE_MSG' will consist of two parts built for both the broadcast and unicast types: 31 16 15 0 +-------------+-------------+-------------+-------------+ | bgack_cnt | ugack_cnt | len | +-------------+-------------+-------------+-------------+ - | gap | ack | | +-------------+-------------+-------------+-------------+ > bc gacks : : : | +-------------+-------------+-------------+-------------+ - | gap | ack | | +-------------+-------------+-------------+-------------+ > uc gacks : : : | +-------------+-------------+-------------+-------------+ - which is "automatically" backward-compatible. We also increase the max number of Gap ACK blocks to 128, allowing upto 64 blocks per type (total buffer size = 516 bytes). Besides, the 'tipc_link_advance_transmq()' function is refactored which is applicable for both the unicast and broadcast cases now, so some old functions can be removed and the code is optimized. With the patch, TIPC broadcast is more robust regardless of packet loss or disorder, latency, ... in the underlying network. Its performance is boost up significantly. For example, experiment with a 5% packet loss rate results: $ time tipc-pipe --mc --rdm --data_size 123 --data_num 1500000 real 0m 42.46s user 0m 1.16s sys 0m 17.67s Without the patch: $ time tipc-pipe --mc --rdm --data_size 123 --data_num 1500000 real 8m 27.94s user 0m 0.55s sys 0m 2.38s Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/node.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 803a3a6d0f50..6a49b3eeaae9 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2071,10 +2071,16 @@ rcv: le = &n->links[bearer_id]; /* Ensure broadcast reception is in synch with peer's send state */ - if (unlikely(usr == LINK_PROTOCOL)) + if (unlikely(usr == LINK_PROTOCOL)) { + if (unlikely(skb_linearize(skb))) { + tipc_node_put(n); + goto discard; + } + hdr = buf_msg(skb); tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); - else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) + } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) { tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); + } /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); -- cgit v1.2.3 From a91d55d162b86fb983b88f44296149752db7efbd Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 26 May 2020 16:38:36 +0700 Subject: tipc: enable broadcast retrans via unicast In some environment, broadcast traffic is suppressed at high rate (i.e. a kind of bandwidth limit setting). When it is applied, TIPC broadcast can still run successfully. However, when it comes to a high load, some packets will be dropped first and TIPC tries to retransmit them but the packet retransmission is intentionally broadcast too, so making things worse and not helpful at all. This commit enables the broadcast retransmission via unicast which only retransmits packets to the specific peer that has really reported a gap i.e. not broadcasting to all nodes in the cluster, so will prevent from being suppressed, and also reduce some overheads on the other peers due to duplicates, finally improve the overall TIPC broadcast performance. Note: the functionality can be turned on/off via the sysctl file: echo 1 > /proc/sys/net/tipc/bc_retruni echo 0 > /proc/sys/net/tipc/bc_retruni Default is '0', i.e. the broadcast retransmission still works as usual. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/bcast.c | 11 ++++++++--- net/tipc/bcast.h | 4 +++- net/tipc/link.c | 10 ++++++---- net/tipc/link.h | 3 ++- net/tipc/node.c | 2 +- net/tipc/sysctl.c | 9 ++++++++- 6 files changed, 28 insertions(+), 11 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3ce690a96ee9..50a16f8bebd9 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -46,6 +46,7 @@ #define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; +unsigned long sysctl_tipc_bc_retruni __read_mostly; /** * struct tipc_bc_base - base structure for keeping broadcast send state @@ -474,7 +475,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq); + tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); @@ -489,7 +490,8 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, * RCU is locked, no other locks set */ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) + struct tipc_msg *hdr, + struct sk_buff_head *retrq) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct tipc_gap_ack_blks *ga; @@ -503,8 +505,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, tipc_link_bc_init_rcv(l, hdr); } else if (!msg_bc_ack_invalid(hdr)) { tipc_get_gap_ack_blks(&ga, l, hdr, false); + if (!sysctl_tipc_bc_retruni) + retrq = &xmitq; rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), - msg_bc_gap(hdr), ga, &xmitq); + msg_bc_gap(hdr), ga, &xmitq, + retrq); rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); } tipc_bcast_unlock(net); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 9e847d9617d3..97d3cf9d3e4d 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -45,6 +45,7 @@ struct tipc_nl_msg; struct tipc_nlist; struct tipc_nitem; extern const char tipc_bclink_name[]; +extern unsigned long sysctl_tipc_bc_retruni; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) @@ -93,7 +94,8 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); + struct tipc_msg *hdr, + struct sk_buff_head *retrq); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 288c5670cfa5..af352391e2ab 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -375,7 +375,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, snd_l->ackers--; rcv_l->bc_peer_is_up = true; rcv_l->state = LINK_ESTABLISHED; - tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq); + tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; @@ -2400,7 +2400,8 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, - struct sk_buff_head *xmitq) + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq) { struct tipc_link *l = r->bc_sndlink; bool unused = false; @@ -2413,7 +2414,7 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, return 0; trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); - tipc_link_advance_transmq(l, r, acked, gap, ga, xmitq, &unused, &rc); + tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) @@ -2447,7 +2448,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; if (dnode == tipc_own_addr(l->net)) { - rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq); + rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, + xmitq); l->stats.recv_nacks++; return rc; } diff --git a/net/tipc/link.h b/net/tipc/link.h index 0a0fa7350722..4d0768cf91d5 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -147,7 +147,8 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, struct tipc_msg *hdr, bool uc); int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, - struct sk_buff_head *xmitq); + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq); void tipc_link_build_bc_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); diff --git a/net/tipc/node.c b/net/tipc/node.c index 6a49b3eeaae9..548207fdec15 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1772,7 +1772,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, struct tipc_link *ucl; int rc; - rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq); if (rc & TIPC_LINK_DOWN_EVT) { tipc_node_reset_links(n); diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 58ab3d6dcdce..97a6264a2993 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -36,7 +36,7 @@ #include "core.h" #include "trace.h" #include "crypto.h" - +#include "bcast.h" #include static struct ctl_table_header *tipc_ctl_hdr; @@ -75,6 +75,13 @@ static struct ctl_table tipc_table[] = { .extra1 = SYSCTL_ONE, }, #endif + { + .procname = "bc_retruni", + .data = &sysctl_tipc_bc_retruni, + .maxlen = sizeof(sysctl_tipc_bc_retruni), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, {} }; -- cgit v1.2.3 From 03b6fefd9bb4844c75faeb10df8496794e2fd5da Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 26 May 2020 16:38:37 +0700 Subject: tipc: add support for broadcast rcv stats dumping This commit enables dumping the statistics of a broadcast-receiver link like the traditional 'broadcast-link' one (which is for broadcast- sender). The link dumping can be triggered via netlink (e.g. the iproute2/tipc tool) by the link flag - 'TIPC_NLA_LINK_BROADCAST' as the indicator. The name of a broadcast-receiver link of a specific peer will be in the format: 'broadcast-link:'. For example: Link Window:50 packets RX packets:7841 fragments:2408/440 bundles:0/0 TX packets:0 fragments:0/0 bundles:0/0 RX naks:0 defs:124 dups:0 TX naks:21 acks:0 retrans:0 Congestion link:0 Send queue max:0 avg:0 In addition, the broadcast-receiver link statistics can be reset in the usual way via netlink by specifying that link name in command. Note: the 'tipc_link_name_ext()' is removed because the link name can now be retrieved simply via the 'l->name'. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 ++--- net/tipc/bcast.h | 5 +++-- net/tipc/link.c | 65 +++++++++++++++++++++++++++--------------------------- net/tipc/link.h | 3 +-- net/tipc/msg.c | 9 ++++---- net/tipc/msg.h | 2 +- net/tipc/netlink.c | 2 +- net/tipc/node.c | 61 +++++++++++++++++++++++++++++++++++++++++++------- net/tipc/trace.h | 4 ++-- 9 files changed, 101 insertions(+), 56 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 50a16f8bebd9..383f87bc1061 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -563,10 +563,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -int tipc_bclink_reset_stats(struct net *net) +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) { - struct tipc_link *l = tipc_bc_sndlink(net); - if (!l) return -ENOPROTOOPT; @@ -694,7 +692,7 @@ int tipc_bcast_init(struct net *net) tn->bcbase = bb; spin_lock_init(&tipc_net(net)->bclock); - if (!tipc_link_bc_create(net, 0, 0, + if (!tipc_link_bc_create(net, 0, 0, NULL, FB_MTU, BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 97d3cf9d3e4d..4240c95188b1 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -96,9 +96,10 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *retrq); -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); -int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index af352391e2ab..ee3b8d0576b8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -539,7 +539,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * * Returns true if link was created, otherwise false */ -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, @@ -554,7 +554,18 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, return false; l = *link; - strcpy(l->name, tipc_bclink_name); + if (peer_id) { + char peer_str[NODE_ID_STR_LEN] = {0,}; + + tipc_nodeid2string(peer_str, peer_id); + if (strlen(peer_str) > 16) + sprintf(peer_str, "%x", peer); + /* Broadcast receiver link name: "broadcast-link:" */ + snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, + peer_str); + } else { + strcpy(l->name, tipc_bclink_name); + } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); tipc_link_reset(l); l->state = LINK_RESET; @@ -1412,11 +1423,8 @@ static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, gacks[n].ack = htons(expect - 1); gacks[n].gap = htons(seqno - expect); if (++n >= MAX_GAP_ACK_BLKS / 2) { - char buf[TIPC_MAX_LINK_NAME]; - pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", - tipc_link_name_ext(l, buf), - n, + l->name, n, skb_queue_len(&l->deferdq)); return n; } @@ -1587,6 +1595,8 @@ release: _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + if (!is_uc) + r->stats.retransmitted++; *retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) @@ -1753,7 +1763,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { - __tipc_skb_queue_sorted(defq, seqno, skb); + if (!__tipc_skb_queue_sorted(defq, seqno, skb)) + l->stats.duplicates++; rc |= tipc_link_build_nack_msg(l, xmitq); break; } @@ -1787,15 +1798,15 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_mon_state *mstate = &l->mon_state; + struct sk_buff_head *dfq = &l->deferdq; struct tipc_link *bcl = l->bc_rcvlink; - struct sk_buff *skb; struct tipc_msg *hdr; - struct sk_buff_head *dfq = &l->deferdq; + struct sk_buff *skb; bool node_up = link_is_up(bcl); - struct tipc_mon_state *mstate = &l->mon_state; + u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; - u16 glen = 0; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1833,7 +1844,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) msg_set_seqno(hdr, l->snd_nxt_state++); msg_set_seq_gap(hdr, rcvgap); - msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + bc_rcvgap = link_bc_rcv_gap(bcl); + msg_set_bc_gap(hdr, bc_rcvgap); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) @@ -1858,6 +1870,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; + if (bc_rcvgap) + bcl->stats.sent_nacks++; skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, skb); trace_tipc_proto_build(skb, false, l->name); @@ -2358,8 +2372,6 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!l->bc_peer_is_up) return rc; - l->stats.recv_nacks++; - /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; @@ -2410,6 +2422,11 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, if (!link_is_up(r) || !r->bc_peer_is_up) return 0; + if (gap) { + l->stats.recv_nacks++; + r->stats.recv_nacks++; + } + if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) return 0; @@ -2721,16 +2738,15 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); u32 bc_mode = tipc_bcast_get_broadcast_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); - struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; @@ -2817,21 +2833,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) l->abort_limit = limit; } -char *tipc_link_name_ext(struct tipc_link *l, char *buf) -{ - if (!l) - scnprintf(buf, TIPC_MAX_LINK_NAME, "null"); - else if (link_is_bc_sndlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender"); - else if (link_is_bc_rcvlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, - "broadcast-receiver, peer %x", l->addr); - else - memcpy(buf, l->name, TIPC_MAX_LINK_NAME); - - return buf; -} - /** * tipc_link_dump - dump TIPC link data * @l: tipc link to be dumped diff --git a/net/tipc/link.h b/net/tipc/link.h index 4d0768cf91d5..fc07232c9a12 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -80,7 +80,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link); -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, @@ -111,7 +111,6 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); -char *tipc_link_name_ext(struct tipc_link *l, char *buf); u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4d0e0bdd997b..c69fb99163fc 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -825,19 +825,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, * @seqno: sequence number of buffer to add * @skb: buffer to add */ -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb) { struct sk_buff *_skb, *tmp; if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { __skb_queue_head(list, skb); - return; + return true; } if (more(seqno, buf_seqno(skb_peek_tail(list)))) { __skb_queue_tail(list, skb); - return; + return true; } skb_queue_walk_safe(list, _skb, tmp) { @@ -846,9 +846,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, if (seqno == buf_seqno(_skb)) break; __skb_queue_before(list, _skb, skb); - return; + return true; } kfree_skb(skb); + return false; } void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ca5f8689a33b..cd4281779468 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1145,7 +1145,7 @@ bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index bb9862410e68..c4aee6247d55 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -188,7 +188,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, }, diff --git a/net/tipc/node.c b/net/tipc/node.c index 548207fdec15..0312fb181d94 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1138,7 +1138,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (unlikely(!n->bc_entry.link)) { snd_l = tipc_bc_sndlink(net); if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, U16_MAX, + addr, peer_id, U16_MAX, tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), n->capabilities, @@ -2435,7 +2435,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl); if (err) goto err_free; } else { @@ -2479,6 +2479,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) @@ -2495,11 +2496,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); + err = -EINVAL; + if (!strcmp(link_name, tipc_bclink_name)) { + err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net)); if (err) return err; return 0; + } else if (strstr(link_name, tipc_bclink_name)) { + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + link = node->bc_entry.link; + if (link && !strcmp(link_name, tipc_link_name(link))) { + err = tipc_bclink_reset_stats(net, link); + tipc_node_read_unlock(node); + break; + } + tipc_node_read_unlock(node); + } + rcu_read_unlock(); + return err; } node = tipc_node_find_by_name(net, link_name, &bearer_id); @@ -2523,7 +2539,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) /* Caller should hold node lock */ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) + struct tipc_node *node, u32 *prev_link, + bool bc_link) { u32 i; int err; @@ -2539,6 +2556,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, if (err) return err; } + + if (bc_link) { + *prev_link = i; + err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link); + if (err) + return err; + } + *prev_link = 0; return 0; @@ -2547,17 +2572,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; u32 prev_link = cb->args[1]; int done = cb->args[2]; + bool bc_link = cb->args[3]; int err; if (done) return 0; + if (!prev_node) { + /* Check if broadcast-receiver links dumping is needed */ + if (attrs && attrs[TIPC_NLA_LINK]) { + err = nla_parse_nested_deprecated(link, + TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, + NULL); + if (unlikely(err)) + return err; + if (unlikely(!link[TIPC_NLA_LINK_BROADCAST])) + return -EINVAL; + bc_link = true; + } + } + msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; @@ -2581,7 +2625,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2589,14 +2633,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tn->bcl); if (err) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2611,6 +2655,7 @@ out: cb->args[0] = prev_node; cb->args[1] = prev_link; cb->args[2] = done; + cb->args[3] = bc_link; return skb->len; } diff --git a/net/tipc/trace.h b/net/tipc/trace.h index e7535ab75255..04af83f0500c 100644 --- a/net/tipc/trace.h +++ b/net/tipc/trace.h @@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(tipc_link_class, TP_fast_assign( __assign_str(header, header); - tipc_link_name_ext(l, __entry->name); + memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME); tipc_link_dump(l, dqueues, __get_str(buf)); ), @@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class, ), TP_fast_assign( - tipc_link_name_ext(r, __entry->name); + memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME); __entry->from = f; __entry->to = t; __entry->len = skb_queue_len(tq); -- cgit v1.2.3 From 049fa17f7ae6b0971ad41b761479962facafea4b Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Tue, 2 Jun 2020 11:46:40 +0700 Subject: Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv" This reverts commit de058420767df21e2b6b0f3bb36d1616fb962032. There is no actual tipc_node refcnt leak as stated in the above commit. The refcnt is hold carefully for the case of an asynchronous decryption (i.e. -EINPROGRESS/-EBUSY and skb = NULL is returned), so that the node object cannot be freed in the meantime. The counter will be re-balanced when the operation's callback arrives with the decrypted buffer if any. In other cases, e.g. a synchronous crypto the counter will be decreased immediately when it is done. Now with that commit, a kernel panic will occur when there is no node found (i.e. n = NULL) in the 'tipc_rcv()' or a premature release of the node object. This commit solves the issues by reverting the said commit, but keeping one valid case that the 'skb_linearize()' is failed. Acked-by: Jon Maloy Signed-off-by: Tuong Lien Tested-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 0312fb181d94..a4c2816c3746 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2038,7 +2038,6 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) n = tipc_node_find_by_id(net, ehdr->id); } tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); - tipc_node_put(n); if (!skb) return; -- cgit v1.2.3