From b4ff4f0419ae5db83553fab79d03a89c10d540a8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 15:46:06 -0700 Subject: [NETLINK]: allocate group bitmaps dynamically Allow changing the number of groups for a netlink family after it has been created, use RCU to protect the listeners bitmap keeping netlink_has_listeners() lock-free. Signed-off-by: Johannes Berg Acked-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/netlink.h') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 2e23353c28a5..b971ddd24090 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -161,6 +161,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module); +extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -- cgit v1.2.3 From 84659eb529b33572bb3f8c94e0978bd5d084bc7e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 15:47:05 -0700 Subject: [NETLIKN]: Allow removing multicast groups. Allow kicking listeners out of a multicast group when necessary (for example if that group is going to be removed.) Signed-off-by: Johannes Berg Acked-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + net/netlink/af_netlink.c | 57 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/linux/netlink.h') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b971ddd24090..83d8239f0cce 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -162,6 +162,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, struct mutex *cb_mutex, struct module *module); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); +extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c386eaf6ad5b..5681ce3aebca 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1027,6 +1027,23 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +/* must be called with netlink table grabbed */ +static void netlink_update_socket_mc(struct netlink_sock *nlk, + unsigned int group, + int is_new) +{ + int old, new = !!is_new, subscriptions; + + old = test_bit(group - 1, nlk->groups); + subscriptions = nlk->subscriptions - old + new; + if (new) + __set_bit(group - 1, nlk->groups); + else + __clear_bit(group - 1, nlk->groups); + netlink_update_subscriptions(&nlk->sk, subscriptions); + netlink_update_listeners(&nlk->sk); +} + static int netlink_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { @@ -1052,9 +1069,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - unsigned int subscriptions; - int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; - if (!netlink_capable(sock, NL_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); @@ -1063,14 +1077,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (!val || val - 1 >= nlk->ngroups) return -EINVAL; netlink_table_grab(); - old = test_bit(val - 1, nlk->groups); - subscriptions = nlk->subscriptions - old + new; - if (new) - __set_bit(val - 1, nlk->groups); - else - __clear_bit(val - 1, nlk->groups); - netlink_update_subscriptions(sk, subscriptions); - netlink_update_listeners(sk); + netlink_update_socket_mc(nlk, val, + optname == NETLINK_ADD_MEMBERSHIP); netlink_table_ungrab(); err = 0; break; @@ -1351,7 +1359,9 @@ out_sock_release: * * This changes the number of multicast groups that are available * on a certain netlink family. Note that it is not possible to - * change the number of groups to below 32. + * change the number of groups to below 32. Also note that it does + * not implicitly call netlink_clear_multicast_users() when the + * number of groups is reduced. * * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). * @groups: The new number of groups. @@ -1386,6 +1396,29 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) } EXPORT_SYMBOL(netlink_change_ngroups); +/** + * netlink_clear_multicast_users - kick off multicast listeners + * + * This function removes all listeners from the given group. + * @ksk: The kernel netlink socket, as returned by + * netlink_kernel_create(). + * @group: The multicast group to clear. + */ +void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) +{ + struct sock *sk; + struct hlist_node *node; + struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; + + netlink_table_grab(); + + sk_for_each_bound(sk, node, &tbl->mc_list) + netlink_update_socket_mc(nlk_sk(sk), group, 0); + + netlink_table_ungrab(); +} +EXPORT_SYMBOL(netlink_clear_multicast_users); + void netlink_set_nonroot(int protocol, unsigned int flags) { if ((unsigned int)protocol < MAX_LINKS) -- cgit v1.2.3 From b4b510290b056b86611757ce1175a230f1080f53 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 12 Sep 2007 13:05:38 +0200 Subject: [NET]: Support multiple network namespaces with netlink Each netlink socket will live in exactly one network namespace, this includes the controlling kernel sockets. This patch updates all of the existing netlink protocols to only support the initial network namespace. Request by clients in other namespaces will get -ECONREFUSED. As they would if the kernel did not have the support for that netlink protocol compiled in. As each netlink protocol is updated to be multiple network namespace safe it can register multiple kernel sockets to acquire a presence in the rest of the network namespaces. The implementation in af_netlink is a simple filter implementation at hash table insertion and hash table look up time. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- drivers/scsi/scsi_netlink.c | 2 +- drivers/scsi/scsi_transport_iscsi.c | 2 +- fs/ecryptfs/netlink.c | 2 +- include/linux/netlink.h | 6 ++- kernel/audit.c | 4 +- lib/kobject_uevent.c | 5 +- net/bridge/netfilter/ebt_ulog.c | 5 +- net/core/rtnetlink.c | 4 +- net/decnet/netfilter/dn_rtmsg.c | 3 +- net/ipv4/fib_frontend.c | 4 +- net/ipv4/inet_diag.c | 4 +- net/ipv4/netfilter/ip_queue.c | 6 +-- net/ipv4/netfilter/ipt_ULOG.c | 3 +- net/ipv6/netfilter/ip6_queue.c | 6 +-- net/netfilter/nfnetlink.c | 2 +- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 3 +- net/netlink/af_netlink.c | 104 +++++++++++++++++++++++++++--------- net/netlink/genetlink.c | 4 +- net/xfrm/xfrm_user.c | 2 +- security/selinux/netlink.c | 5 +- 22 files changed, 121 insertions(+), 60 deletions(-) (limited to 'include/linux/netlink.h') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index a7b9e9bb3e8d..569070997cc1 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -446,7 +446,7 @@ static int __devinit cn_init(void) dev->id.idx = cn_idx; dev->id.val = cn_val; - dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, + dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, CN_NETLINK_USERS + 0xf, dev->input, NULL, THIS_MODULE); if (!dev->nls) diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 4bf9aa547c78..163acf6ad2d3 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -167,7 +167,7 @@ scsi_netlink_init(void) return; } - scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, THIS_MODULE); if (!scsi_nl_sock) { diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 34c1860a259d..4916f01230dc 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1523,7 +1523,7 @@ static __init int iscsi_transport_init(void) if (err) goto unregister_conn_class; - nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL, + nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL, THIS_MODULE); if (!nls) { err = -ENOBUFS; diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index fe9186312d7c..056519cd92bc 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -227,7 +227,7 @@ int ecryptfs_init_netlink(void) { int rc; - ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0, + ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0, ecryptfs_receive_nl_message, NULL, THIS_MODULE); if (!ecryptfs_nl_sock) { diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 83d8239f0cce..d2843ae4a83a 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -27,6 +27,8 @@ #define MAX_LINKS 32 +struct net; + struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ @@ -157,7 +159,8 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, unsigned int groups, +extern struct sock *netlink_kernel_create(struct net *net, + int unit,unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module); @@ -206,6 +209,7 @@ struct netlink_callback struct netlink_notify { + struct net *net; int pid; int protocol; }; diff --git a/kernel/audit.c b/kernel/audit.c index eb0f9165b401..f3c390f6c0b4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -876,8 +876,8 @@ static int __init audit_init(void) printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - NULL, THIS_MODULE); + audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, + audit_receive, NULL, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index df02814699d7..e06a8dcec0f0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -280,9 +280,8 @@ EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - NULL, THIS_MODULE); - + uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 204c968fa86d..e7cfd30bac75 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -300,8 +300,9 @@ static int __init ebt_ulog_init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 41859508bedd..416768d1e0cd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1327,8 +1327,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); + rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 696234688cf6..ebb38feb4df3 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -137,7 +137,8 @@ static int __init dn_rtmsg_init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg = netlink_kernel_create(&init_net, + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, dnrmg_receive_user_sk, NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cefb55ec3d62..140bf7a8d877 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -816,8 +816,8 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, + NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 686ddd62f71a..031cc4856b49 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -897,8 +897,8 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - NULL, THIS_MODULE); + idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d91856097f25..82fda92e6b97 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -579,7 +579,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -671,8 +671,8 @@ static int __init ip_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, + ipq_rcv_sk, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 6ca43e4ca7e3..c636d6d63574 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -409,7 +409,8 @@ static int __init ipt_ulog_init(void) for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 64536a3ef2f6..2f5a52453834 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -569,7 +569,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -661,8 +661,8 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, - THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, + NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 8797e6953ef2..fa974e8e0ce6 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -264,7 +264,7 @@ static int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2351533a8507..8e4001b8f764 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -706,7 +706,8 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5a8e8ff76641..c97369f48db7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -765,7 +765,8 @@ nfqnl_rcv_nl_event(struct notifier_block *this, struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 406a493300d8..3029f865cd61 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -211,7 +211,7 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -221,7 +221,7 @@ static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -328,7 +328,7 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -341,7 +341,7 @@ static int netlink_insert(struct sock *sk, u32 pid) head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -419,9 +419,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) struct netlink_sock *nlk; int err = 0; - if (net != &init_net) - return -EAFNOSUPPORT; - sock->state = SS_UNCONNECTED; if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) @@ -481,6 +478,7 @@ static int netlink_release(struct socket *sock) if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -509,6 +507,7 @@ static int netlink_release(struct socket *sock) static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -522,6 +521,8 @@ retry: netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if ((osk->sk_net != net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -533,7 +534,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -598,6 +599,7 @@ static int netlink_realloc_groups(struct sock *sk) static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -619,7 +621,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -703,10 +705,12 @@ static void netlink_overrun(struct sock *sk) static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; + struct net *net; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + net = ssk->sk_net; + sock = netlink_lookup(net, protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -887,6 +891,7 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff struct netlink_broadcast_data { struct sock *exclude_sk; + struct net *net; u32 pid; u32 group; int failure; @@ -909,6 +914,9 @@ static inline int do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) goto out; + if ((sk->sk_net != p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -947,6 +955,7 @@ out: int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + struct net *net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -954,6 +963,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -1002,6 +1012,9 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; + if (sk->sk_net != p->exclude_sk->sk_net) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1304,7 +1317,7 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, +netlink_kernel_create(struct net *net, int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module) { @@ -1321,7 +1334,7 @@ netlink_kernel_create(int unit, unsigned int groups, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1336,18 +1349,20 @@ netlink_kernel_create(int unit, unsigned int groups, if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = groups; - nl_table[unit].listeners = listeners; - nl_table[unit].cb_mutex = cb_mutex; - nl_table[unit].module = module; - nl_table[unit].registered = 1; + if (!nl_table[unit].registered) { + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].cb_mutex = cb_mutex; + nl_table[unit].module = module; + nl_table[unit].registered = 1; + } netlink_table_ungrab(); return sk; @@ -1513,7 +1528,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1555,7 +1570,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1706,6 +1722,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, #ifdef CONFIG_PROC_FS struct nl_seq_iter { + struct net *net; int link; int hash_idx; }; @@ -1723,6 +1740,8 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (iter->net != s->sk_net) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1752,11 +1771,14 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - s = sk_next(v); + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && (iter->net != s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1765,6 +1787,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && (iter->net != s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1835,15 +1859,24 @@ static int netlink_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = iter; + iter->net = get_net(PROC_NET(inode)); return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1885,6 +1918,27 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; +static int netlink_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void netlink_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1930,9 +1984,7 @@ static int __init netlink_proto_init(void) } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8c11ca4a2121..af8fe26815fa 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -782,8 +782,8 @@ static int __init genl_init(void) netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); /* we'll bump the group number right afterwards */ - genl_sock = netlink_kernel_create(NETLINK_GENERIC, 0, genl_rcv, - NULL, THIS_MODULE); + genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, + genl_rcv, NULL, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0d81c0f23919..1f8e7c22ddbd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2399,7 +2399,7 @@ static int __init xfrm_user_init(void) printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index f49046de63a2..b59871d74dad 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -17,6 +17,7 @@ #include #include #include +#include static struct sock *selnl; @@ -104,8 +105,8 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL, - THIS_MODULE); + selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, + SELNLGRP_MAX, NULL, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); -- cgit v1.2.3 From 8f4c1f9b049df3be11090f1c2c4738700302acae Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 12 Sep 2007 14:44:36 +0200 Subject: [NETLINK]: Introduce nested and byteorder flag to netlink attribute This change allows the generic attribute interface to be used within the netfilter subsystem where this flag was initially introduced. The byte-order flag is yet unused, it's intended use is to allow automatic byte order convertions for all atomic types. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 14 ++++++++++++++ include/net/netlink.h | 9 +++++++++ net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 2 +- net/ipv6/route.c | 2 +- net/netlabel/netlabel_cipso_v4.c | 14 +++++++------- net/netlink/attr.c | 10 +++++----- 7 files changed, 38 insertions(+), 15 deletions(-) (limited to 'include/linux/netlink.h') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d2843ae4a83a..e638256ce472 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,20 @@ struct nlattr __u16 nla_type; }; +/* + * nla_type (16 bits) + * +---+---+-------------------------------+ + * | N | O | Attribute Type | + * +---+---+-------------------------------+ + * N := Carries nested attributes + * O := Payload stored in network byte order + * + * Note: The N and O flag are mutually exclusive. + */ +#define NLA_F_NESTED (1 << 15) +#define NLA_F_NET_BYTEORDER (1 << 14) +#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + #define NLA_ALIGNTO 4 #define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) diff --git a/include/net/netlink.h b/include/net/netlink.h index d7b824be5422..695e613a207b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -666,6 +666,15 @@ static inline int nla_padlen(int payload) return nla_total_size(payload) - nla_attr_size(payload); } +/** + * nla_type - attribute type + * @nla: netlink attribute + */ +static inline int nla_type(const struct nlattr *nla) +{ + return nla->nla_type & NLA_TYPE_MASK; +} + /** * nla_data - head of payload * @nla: netlink attribute diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index df17aab193b4..f823ca34cb12 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -487,7 +487,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, } nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { - switch (attr->nla_type) { + switch (nla_type(attr)) { case RTA_DST: cfg->fc_dst = nla_get_be32(attr); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d30fb68d5f4e..1351a2617dce 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -743,7 +743,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5bdd9d4010fe..104070e92cea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1279,7 +1279,7 @@ install_route: int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; + int type = nla_type(nla); if (type) { if (type > RTAX_MAX) { diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c060e3f991f1..ba0ca8d3f77d 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -130,7 +130,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) - if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + if (nla_type(nla) == NLBL_CIPSOV4_A_TAG) { if (iter >= CIPSO_V4_TAG_MAXCNT) return -EINVAL; doi_def->tags[iter++] = nla_get_u8(nla); @@ -192,13 +192,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_LVLS) @@ -240,7 +240,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSLVL) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { struct nlattr *lvl_loc; struct nlattr *lvl_rem; @@ -265,13 +265,13 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, netlbl_cipsov4_genl_policy) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) - switch (nla_b->nla_type) { + switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSCATLOC: if (nla_get_u32(nla_b) > CIPSO_V4_MAX_LOC_CATS) @@ -315,7 +315,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) - if (nla_a->nla_type == NLBL_CIPSOV4_A_MLSCAT) { + if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { struct nlattr *cat_loc; struct nlattr *cat_rem; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index e4d7bed99c2e..ec39d12c2423 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -27,12 +27,12 @@ static int validate_nla(struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla); + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - if (nla->nla_type <= 0 || nla->nla_type > maxtype) + if (type <= 0 || type > maxtype) return 0; - pt = &policy[nla->nla_type]; + pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -149,7 +149,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); nla_for_each_attr(nla, head, len, rem) { - u16 type = nla->nla_type; + u16 type = nla_type(nla); if (type > 0 && type <= maxtype) { if (policy) { @@ -185,7 +185,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) int rem; nla_for_each_attr(nla, head, len, rem) - if (nla->nla_type == attrtype) + if (nla_type(nla) == attrtype) return nla; return NULL; -- cgit v1.2.3 From 7ee015e0fa3c856416e9477aac4b850ec6f09017 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 10 Oct 2007 21:14:03 -0700 Subject: [NET]: cleanup 3rd argument in netlink_sendskb netlink_sendskb does not use third argument. Clean it and save a couple of bytes. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- ipc/mqueue.c | 5 ++--- net/netlink/af_netlink.c | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux/netlink.h') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e638256ce472..7b552b6c2c19 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -194,7 +194,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); -int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); +int netlink_sendskb(struct sock *sk, struct sk_buff *skb); /* * skb should fit one page. This choice is good for headerless malloc. diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 145d5a0d299f..24df3347ad4b 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -521,8 +521,7 @@ static void __do_notify(struct mqueue_inode_info *info) break; case SIGEV_THREAD: set_cookie(info->notify_cookie, NOTIFY_WOKENUP); - netlink_sendskb(info->notify_sock, - info->notify_cookie, 0); + netlink_sendskb(info->notify_sock, info->notify_cookie); break; } /* after notification unregisters process */ @@ -568,7 +567,7 @@ static void remove_notification(struct mqueue_inode_info *info) if (info->notify_owner != NULL && info->notify.sigev_notify == SIGEV_THREAD) { set_cookie(info->notify_cookie, NOTIFY_REMOVED); - netlink_sendskb(info->notify_sock, info->notify_cookie, 0); + netlink_sendskb(info->notify_sock, info->notify_cookie); } put_pid(info->notify_owner); info->notify_owner = NULL; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f934f54fbfd5..a5bd63ca86bc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -791,7 +791,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, return 0; } -int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol) +int netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; @@ -853,7 +853,7 @@ retry: if (err) return err; - return netlink_sendskb(sk, skb, ssk->sk_protocol); + return netlink_sendskb(sk, skb); } int netlink_has_listeners(struct sock *sk, unsigned int group) -- cgit v1.2.3 From cd40b7d3983c708aabe3d3008ec64ffce56d33b0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 10 Oct 2007 21:15:29 -0700 Subject: [NET]: make netlink user -> kernel interface synchronious This patch make processing netlink user -> kernel messages synchronious. This change was inspired by the talk with Alexey Kuznetsov about current netlink messages processing. He says that he was badly wrong when introduced asynchronious user -> kernel communication. The call netlink_unicast is the only path to send message to the kernel netlink socket. But, unfortunately, it is also used to send data to the user. Before this change the user message has been attached to the socket queue and sk->sk_data_ready was called. The process has been blocked until all pending messages were processed. The bad thing is that this processing may occur in the arbitrary process context. This patch changes nlk->data_ready callback to get 1 skb and force packet processing right in the netlink_unicast. Kernel -> user path in netlink_unicast remains untouched. EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock drop, but the process remains in the cycle until the message will be fully processed. So, there is no need to use this kludges now. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- drivers/connector/connector.c | 14 +--- drivers/scsi/scsi_netlink.c | 25 +----- drivers/scsi/scsi_transport_iscsi.c | 82 +++++++++---------- fs/ecryptfs/netlink.c | 14 +--- include/linux/connector.h | 2 +- include/linux/netlink.h | 2 +- include/net/netlink.h | 6 +- kernel/audit.c | 12 +-- net/core/rtnetlink.c | 12 +-- net/decnet/netfilter/dn_rtmsg.c | 14 +--- net/ipv4/fib_frontend.c | 9 ++- net/ipv4/inet_diag.c | 12 +-- net/ipv4/netfilter/ip_queue.c | 17 +--- net/ipv6/netfilter/ip6_queue.c | 19 ++--- net/netfilter/nfnetlink.c | 12 +-- net/netlink/af_netlink.c | 152 +++++++++++------------------------- net/netlink/genetlink.c | 12 +-- net/xfrm/xfrm_user.c | 13 +-- 18 files changed, 130 insertions(+), 299 deletions(-) (limited to 'include/linux/netlink.h') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 569070997cc1..0e328d387af4 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -234,18 +234,6 @@ out: kfree_skb(__skb); } -/* - * Netlink socket input callback - dequeues the skbs and calls the - * main netlink receiving function. - */ -static void cn_input(struct sock *sk, int len) -{ - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) - cn_rx_skb(skb); -} - /* * Notification routing. * @@ -442,7 +430,7 @@ static int __devinit cn_init(void) struct cn_dev *dev = &cdev; int err; - dev->input = cn_input; + dev->input = cn_rx_skb; dev->id.idx = cn_idx; dev->id.val = cn_val; diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index 163acf6ad2d3..40579edca101 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -64,7 +64,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) { err = -EBADMSG; - goto next_msg; + return; } hdr = NLMSG_DATA(nlh); @@ -98,27 +98,6 @@ next_msg: } -/** - * scsi_nl_rcv_msg - - * Receive handler for a socket. Extracts a received message buffer from - * the socket, and starts message processing. - * - * @sk: socket - * @len: unused - * - **/ -static void -scsi_nl_rcv(struct sock *sk, int len) -{ - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sk->sk_receive_queue))) { - scsi_nl_rcv_msg(skb); - kfree_skb(skb); - } -} - - /** * scsi_nl_rcv_event - * Event handler for a netlink socket. @@ -168,7 +147,7 @@ scsi_netlink_init(void) } scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, - SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, + SCSI_NL_GRP_CNT, scsi_nl_rcv_msg, NULL, THIS_MODULE); if (!scsi_nl_sock) { printk(KERN_ERR "%s: register of recieve handler failed\n", diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 4916f01230dc..5428d15f23c6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1097,61 +1097,49 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* - * Get message from skb (based on rtnetlink_rcv_skb). Each message is - * processed by iscsi_if_recv_msg. Malformed skbs with wrong lengths or - * invalid creds are discarded silently. + * Get message from skb. Each message is processed by iscsi_if_recv_msg. + * Malformed skbs with wrong lengths or invalid creds are not processed. */ static void -iscsi_if_rx(struct sock *sk, int len) +iscsi_if_rx(struct sk_buff *skb) { - struct sk_buff *skb; - mutex_lock(&rx_queue_mutex); - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - if (NETLINK_CREDS(skb)->uid) { - skb_pull(skb, skb->len); - goto free_skb; + while (skb->len >= NLMSG_SPACE(0)) { + int err; + uint32_t rlen; + struct nlmsghdr *nlh; + struct iscsi_uevent *ev; + + nlh = nlmsg_hdr(skb); + if (nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) { + break; } - while (skb->len >= NLMSG_SPACE(0)) { - int err; - uint32_t rlen; - struct nlmsghdr *nlh; - struct iscsi_uevent *ev; + ev = NLMSG_DATA(nlh); + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; - nlh = nlmsg_hdr(skb); - if (nlh->nlmsg_len < sizeof(*nlh) || - skb->len < nlh->nlmsg_len) { - break; - } - - ev = NLMSG_DATA(nlh); - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - - err = iscsi_if_recv_msg(skb, nlh); - if (err) { - ev->type = ISCSI_KEVENT_IF_ERROR; - ev->iferror = err; - } - do { - /* - * special case for GET_STATS: - * on success - sending reply and stats from - * inside of if_recv_msg(), - * on error - fall through. - */ - if (ev->type == ISCSI_UEVENT_GET_STATS && !err) - break; - err = iscsi_if_send_reply( - NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq, - nlh->nlmsg_type, 0, 0, ev, sizeof(*ev)); - } while (err < 0 && err != -ECONNREFUSED); - skb_pull(skb, rlen); + err = iscsi_if_recv_msg(skb, nlh); + if (err) { + ev->type = ISCSI_KEVENT_IF_ERROR; + ev->iferror = err; } -free_skb: - kfree_skb(skb); + do { + /* + * special case for GET_STATS: + * on success - sending reply and stats from + * inside of if_recv_msg(), + * on error - fall through. + */ + if (ev->type == ISCSI_UEVENT_GET_STATS && !err) + break; + err = iscsi_if_send_reply( + NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq, + nlh->nlmsg_type, 0, 0, ev, sizeof(*ev)); + } while (err < 0 && err != -ECONNREFUSED); + skb_pull(skb, rlen); } mutex_unlock(&rx_queue_mutex); } diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index 056519cd92bc..9aa345121e09 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(struct sk_buff *skb) * it to its desired netlink context element and wake up the process * that is waiting for a response. */ -static void ecryptfs_receive_nl_message(struct sock *sk, int len) +static void ecryptfs_receive_nl_message(struct sk_buff *skb) { - struct sk_buff *skb; struct nlmsghdr *nlh; - int rc = 0; /* skb_recv_datagram requires this */ -receive: - skb = skb_recv_datagram(sk, 0, 0, &rc); - if (rc == -EINTR) - goto receive; - else if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error occurred while " - "receiving eCryptfs netlink message; " - "rc = [%d]\n", rc); - return; - } nlh = nlmsg_hdr(skb); if (!NLMSG_OK(nlh, skb->len)) { ecryptfs_printk(KERN_ERR, "Received corrupt netlink " diff --git a/include/linux/connector.h b/include/linux/connector.h index 10eb56b2940a..b62f823e90cf 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -153,7 +153,7 @@ struct cn_dev { u32 seq, groups; struct sock *nls; - void (*input) (struct sock * sk, int len); + void (*input) (struct sk_buff *skb); struct cn_queue_dev *cbdev; }; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7b552b6c2c19..7c1f3b1d2ee5 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -175,7 +175,7 @@ struct netlink_skb_parms extern struct sock *netlink_kernel_create(struct net *net, int unit,unsigned int groups, - void (*input)(struct sock *sk, int len), + void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); diff --git a/include/net/netlink.h b/include/net/netlink.h index 1afd3e837d23..9298218c07f9 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -220,9 +220,9 @@ struct nl_info { u32 pid; }; -extern unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen, - int (*cb)(struct sk_buff *, - struct nlmsghdr *)); +extern int netlink_rcv_skb(struct sk_buff *skb, + int (*cb)(struct sk_buff *, + struct nlmsghdr *)); extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, unsigned int group, int report, gfp_t flags); diff --git a/kernel/audit.c b/kernel/audit.c index f3c390f6c0b4..2924251a6547 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb) } /* Receive messages from netlink socket. */ -static void audit_receive(struct sock *sk, int length) +static void audit_receive(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&audit_cmd_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - audit_receive_skb(skb); - kfree_skb(skb); - } + audit_receive_skb(skb); mutex_unlock(&audit_cmd_mutex); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 471d2d9f8eae..1072d16696c3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1312,15 +1312,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return doit(skb, nlh, (void *)&rta_buf[0]); } -static void rtnetlink_rcv(struct sock *sk, int len) +static void rtnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - rtnl_lock(); - qlen = netlink_run_queue(sk, qlen, &rtnetlink_rcv_msg); - rtnl_unlock(); - } while (qlen); + rtnl_lock(); + netlink_rcv_skb(skb, &rtnetlink_rcv_msg); + rtnl_unlock(); } static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index ebb38feb4df3..f7fba7721e63 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -115,17 +115,6 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EINVAL); } -static void dnrmg_receive_user_sk(struct sock *sk, int len) -{ - struct sk_buff *skb; - unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; qlen && (skb = skb_dequeue(&sk->sk_receive_queue)); qlen--) { - dnrmg_receive_user_skb(skb); - kfree_skb(skb); - } -} - static struct nf_hook_ops dnrmg_ops = { .hook = dnrmg_hook, .pf = PF_DECnet, @@ -139,7 +128,8 @@ static int __init dn_rtmsg_init(void) dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, - dnrmg_receive_user_sk, NULL, THIS_MODULE); + dnrmg_receive_user_skb, + NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f823ca34cb12..a5cba2349605 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -62,6 +62,9 @@ static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +static struct sock *fibnl = NULL; + + struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; @@ -811,13 +814,13 @@ static void nl_fib_input(struct sock *sk, int len) pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT); } static void nl_fib_lookup_init(void) { - netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, - NULL, THIS_MODULE); + fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, + nl_fib_input, NULL, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b04a6ee5a9a1..7eb83ebed2ec 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -839,15 +839,11 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) static DEFINE_MUTEX(inet_diag_mutex); -static void inet_diag_rcv(struct sock *sk, int len) +static void inet_diag_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&inet_diag_mutex); - qlen = netlink_run_queue(sk, qlen, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); - } while (qlen); + mutex_lock(&inet_diag_mutex); + netlink_rcv_skb(skb, &inet_diag_rcv_msg); + mutex_unlock(&inet_diag_mutex); } static DEFINE_SPINLOCK(inet_diag_register_lock); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index aaa3f5c56761..23cbfc7c80fd 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -475,7 +475,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -533,19 +533,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -670,7 +661,7 @@ static int __init ip_queue_init(void) netlink_register_notifier(&ipq_nl_notifier); ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, - ipq_rcv_sk, NULL, THIS_MODULE); + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c75f467a8f51..0473145ac534 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -464,7 +464,7 @@ ipq_dev_drop(int ifindex) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) static inline void -ipq_rcv_skb(struct sk_buff *skb) +__ipq_rcv_skb(struct sk_buff *skb) { int status, type, pid, flags, nlmsglen, skblen; struct nlmsghdr *nlh; @@ -522,19 +522,10 @@ ipq_rcv_skb(struct sk_buff *skb) } static void -ipq_rcv_sk(struct sock *sk, int len) +ipq_rcv_skb(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&ipqnl_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - + __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } @@ -658,8 +649,8 @@ static int __init ip6_queue_init(void) struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, + ipq_rcv_skb, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 99775af19ff4..2128542995f7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -169,15 +169,11 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } } -static void nfnetlink_rcv(struct sock *sk, int len) +static void nfnetlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - nfnl_lock(); - qlen = netlink_run_queue(sk, qlen, nfnetlink_rcv_msg); - nfnl_unlock(); - } while (qlen); + nfnl_lock(); + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + nfnl_unlock(); } static void __exit nfnetlink_exit(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4ce7dcbcb6ef..c776bcd9f825 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -80,7 +80,7 @@ struct netlink_sock { struct netlink_callback *cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; - void (*data_ready)(struct sock *sk, int bytes); + void (*netlink_rcv)(struct sk_buff *skb); struct module *module; }; @@ -127,7 +127,6 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_destroy_callback(struct netlink_callback *cb); -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); static DEFINE_RWLOCK(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -709,21 +708,17 @@ static void netlink_overrun(struct sock *sk) static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { - int protocol = ssk->sk_protocol; - struct net *net; struct sock *sock; struct netlink_sock *nlk; - net = ssk->sk_net; - sock = netlink_lookup(net, protocol, pid); + sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - if ((netlink_is_kernel(sock) && !nlk->data_ready) || - (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid)) { + if (sock->sk_state == NETLINK_CONNECTED && + nlk->dst_pid != nlk_sk(ssk)->pid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -837,7 +832,34 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, return skb; } -int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) +static inline void netlink_rcv_wake(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (skb_queue_empty(&sk->sk_receive_queue)) + clear_bit(0, &nlk->state); + if (!test_bit(0, &nlk->state)) + wake_up_interruptible(&nlk->wait); +} + +static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +{ + int ret; + struct netlink_sock *nlk = nlk_sk(sk); + + ret = -ECONNREFUSED; + if (nlk->netlink_rcv != NULL) { + ret = skb->len; + skb_set_owner_r(skb, sk); + nlk->netlink_rcv(skb); + } + kfree_skb(skb); + sock_put(sk); + return ret; +} + +int netlink_unicast(struct sock *ssk, struct sk_buff *skb, + u32 pid, int nonblock) { struct sock *sk; int err; @@ -852,6 +874,9 @@ retry: kfree_skb(skb); return PTR_ERR(sk); } + if (netlink_is_kernel(sk)) + return netlink_unicast_kernel(sk, skb); + err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); if (err == 1) goto retry; @@ -1151,16 +1176,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static inline void netlink_rcv_wake(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(0, &nlk->state); - if (!test_bit(0, &nlk->state)) - wake_up_interruptible(&nlk->wait); -} - static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len) { @@ -1308,11 +1323,7 @@ out: static void netlink_data_ready(struct sock *sk, int len) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->data_ready) - nlk->data_ready(sk, len); - netlink_rcv_wake(sk); + BUG(); } /* @@ -1323,7 +1334,7 @@ static void netlink_data_ready(struct sock *sk, int len) struct sock * netlink_kernel_create(struct net *net, int unit, unsigned int groups, - void (*input)(struct sock *sk, int len), + void (*input)(struct sk_buff *skb), struct mutex *cb_mutex, struct module *module) { struct socket *sock; @@ -1352,7 +1363,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) - nlk_sk(sk)->data_ready = input; + nlk_sk(sk)->netlink_rcv = input; if (netlink_insert(sk, net, 0)) goto out_sock_release; @@ -1552,12 +1563,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, netlink_dump(sk); sock_put(sk); - - /* We successfully started a dump, by returning -EINTR we - * signal the queue mangement to interrupt processing of - * any netlink messages so userspace gets a chance to read - * the results. */ - return -EINTR; + return 0; } void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) @@ -1594,13 +1600,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } -static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, +int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)) { struct nlmsghdr *nlh; int err; while (skb->len >= nlmsg_total_size(0)) { + int msglen; + nlh = nlmsg_hdr(skb); err = 0; @@ -1616,85 +1624,19 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, goto skip; err = cb(skb, nlh); - if (err == -EINTR) { - /* Not an error, but we interrupt processing */ - netlink_queue_skip(nlh, skb); - return err; - } skip: if (nlh->nlmsg_flags & NLM_F_ACK || err) netlink_ack(skb, nlh, err); - netlink_queue_skip(nlh, skb); + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); } return 0; } -/** - * nelink_run_queue - Process netlink receive queue. - * @sk: Netlink socket containing the queue - * @qlen: Initial queue length - * @cb: Callback function invoked for each netlink message found - * - * Processes as much as there was in the queue upon entry and invokes - * a callback function for each netlink message found. The callback - * function may refuse a message by returning a negative error code - * but setting the error pointer to 0 in which case this function - * returns with a qlen != 0. - * - * qlen must be initialized to 0 before the initial entry, afterwards - * the function may be called repeatedly until the returned qlen is 0. - * - * The callback function may return -EINTR to signal that processing - * of netlink messages shall be interrupted. In this case the message - * currently being processed will NOT be requeued onto the receive - * queue. - */ -unsigned int netlink_run_queue(struct sock *sk, unsigned int qlen, - int (*cb)(struct sk_buff *, struct nlmsghdr *)) -{ - struct sk_buff *skb; - - if (!qlen || qlen > skb_queue_len(&sk->sk_receive_queue)) - qlen = skb_queue_len(&sk->sk_receive_queue); - - for (; qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - if (netlink_rcv_skb(skb, cb)) { - if (skb->len) - skb_queue_head(&sk->sk_receive_queue, skb); - else { - kfree_skb(skb); - qlen--; - } - break; - } - - kfree_skb(skb); - } - - return qlen; -} - -/** - * netlink_queue_skip - Skip netlink message while processing queue. - * @nlh: Netlink message to be skipped - * @skb: Socket buffer containing the netlink messages. - * - * Pulls the given netlink message off the socket buffer so the next - * call to netlink_queue_run() will not reconsider the message. - */ -static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) -{ - int msglen = NLMSG_ALIGN(nlh->nlmsg_len); - - if (msglen > skb->len) - msglen = skb->len; - - skb_pull(skb, msglen); -} - /** * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use @@ -1998,7 +1940,7 @@ panic: core_initcall(netlink_proto_init); EXPORT_SYMBOL(netlink_ack); -EXPORT_SYMBOL(netlink_run_queue); +EXPORT_SYMBOL(netlink_rcv_skb); EXPORT_SYMBOL(netlink_broadcast); EXPORT_SYMBOL(netlink_dump_start); EXPORT_SYMBOL(netlink_kernel_create); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3f1104dc128b..150579a21469 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -470,15 +470,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ops->doit(skb, &info); } -static void genl_rcv(struct sock *sk, int len) +static void genl_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - genl_lock(); - qlen = netlink_run_queue(sk, qlen, genl_rcv_msg); - genl_unlock(); - } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); + genl_lock(); + netlink_rcv_skb(skb, &genl_rcv_msg); + genl_unlock(); } /************************************************************************** diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5238f6a8dfad..d41588d101d0 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1895,16 +1895,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return link->doit(skb, nlh, attrs); } -static void xfrm_netlink_rcv(struct sock *sk, int len) +static void xfrm_netlink_rcv(struct sk_buff *skb) { - unsigned int qlen = 0; - - do { - mutex_lock(&xfrm_cfg_mutex); - qlen = netlink_run_queue(sk, qlen, &xfrm_user_rcv_msg); - mutex_unlock(&xfrm_cfg_mutex); - - } while (qlen); + mutex_lock(&xfrm_cfg_mutex); + netlink_rcv_skb(skb, &xfrm_user_rcv_msg); + mutex_unlock(&xfrm_cfg_mutex); } static inline size_t xfrm_expire_msgsize(void) -- cgit v1.2.3