summaryrefslogtreecommitdiff
path: root/include/linux/net.h
AgeCommit message (Collapse)Author
2008-08-26Fix userspace export of <linux/net.h>David Woodhouse
Including <linux/fcntl.h> in the user-visible part of this header has caused build regressions with headers from 2.6.27-rc. Move it down to the #ifdef __KERNEL__ part, which is the only place it's needed. Move some other kernel-only things down there too, while we're at it. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25printk ratelimiting rewriteDave Young
All ratelimit user use same jiffies and burst params, so some messages (callbacks) will be lost. For example: a call printk_ratelimit(5 * HZ, 1) b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will will be supressed. - rewrite __ratelimit, and use a ratelimit_state as parameter. Thanks for hints from andrew. - Add WARN_ON_RATELIMIT, update rcupreempt.h - remove __printk_ratelimit - use __ratelimit in net_ratelimit Signed-off-by: Dave Young <hidave.darkstar@gmail.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Cc: Dave Young <hidave.darkstar@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24flag parameters: NONBLOCK in socket and socketpairUlrich Drepper
This patch introduces support for the SOCK_NONBLOCK flag in socket, socketpair, and paccept. To do this the internal function sock_attach_fd gets an additional parameter which it uses to set the appropriate flag for the file descriptor. Given that in modern, scalable programs almost all socket connections are non-blocking and the minimal additional cost for the new functionality I see no reason not to add this code. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include <fcntl.h> #include <pthread.h> #include <stdio.h> #include <unistd.h> #include <netinet/in.h> #include <sys/socket.h> #include <sys/syscall.h> #ifndef __NR_paccept # ifdef __x86_64__ # define __NR_paccept 288 # elif defined __i386__ # define SYS_PACCEPT 18 # define USE_SOCKETCALL 1 # else # error "need __NR_paccept" # endif #endif #ifdef USE_SOCKETCALL # define paccept(fd, addr, addrlen, mask, flags) \ ({ long args[6] = { \ (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \ syscall (__NR_socketcall, SYS_PACCEPT, args); }) #else # define paccept(fd, addr, addrlen, mask, flags) \ syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags) #endif #define PORT 57392 #define SOCK_NONBLOCK O_NONBLOCK static pthread_barrier_t b; static void * tf (void *arg) { pthread_barrier_wait (&b); int s = socket (AF_INET, SOCK_STREAM, 0); struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); return NULL; } int main (void) { int fd; fd = socket (PF_INET, SOCK_STREAM, 0); if (fd == -1) { puts ("socket(0) failed"); return 1; } int fl = fcntl (fd, F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if (fl & O_NONBLOCK) { puts ("socket(0) set non-blocking mode"); return 1; } close (fd); fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0); if (fd == -1) { puts ("socket(SOCK_NONBLOCK) failed"); return 1; } fl = fcntl (fd, F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if ((fl & O_NONBLOCK) == 0) { puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode"); return 1; } close (fd); int fds[2]; if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1) { puts ("socketpair(0) failed"); return 1; } for (int i = 0; i < 2; ++i) { fl = fcntl (fds[i], F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if (fl & O_NONBLOCK) { printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i); return 1; } close (fds[i]); } if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1) { puts ("socketpair(SOCK_NONBLOCK) failed"); return 1; } for (int i = 0; i < 2; ++i) { fl = fcntl (fds[i], F_GETFL); if (fl == -1) { puts ("fcntl failed"); return 1; } if ((fl & O_NONBLOCK) == 0) { printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i); return 1; } close (fds[i]); } pthread_barrier_init (&b, NULL, 2); struct sockaddr_in sin; pthread_t th; if (pthread_create (&th, NULL, tf, NULL) != 0) { puts ("pthread_create failed"); return 1; } int s = socket (AF_INET, SOCK_STREAM, 0); int reuse = 1; setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); int s2 = paccept (s, NULL, 0, NULL, 0); if (s2 < 0) { puts ("paccept(0) failed"); return 1; } fl = fcntl (s2, F_GETFL); if (fl & O_NONBLOCK) { puts ("paccept(0) set non-blocking mode"); return 1; } close (s2); close (s); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK); if (s2 < 0) { puts ("paccept(SOCK_NONBLOCK) failed"); return 1; } fl = fcntl (s2, F_GETFL); if ((fl & O_NONBLOCK) == 0) { puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode"); return 1; } close (s2); close (s); pthread_barrier_wait (&b); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper <drepper@redhat.com> Acked-by: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24flag parameters: paccept w/out set_restore_sigmaskUlrich Drepper
Some platforms do not have support to restore the signal mask in the return path from a syscall. For those platforms syscalls like pselect are not defined at all. This is, I think, not a good choice for paccept() since paccept() adds more value on top of accept() than just the signal mask handling. Therefore this patch defines a scaled down version of the sys_paccept function for those platforms. It returns -EINVAL in case the signal mask is non-NULL but behaves the same otherwise. Note that I explicitly included <linux/thread_info.h>. I saw that it is currently included but indirectly two levels down. There is too much risk in relying on this. The header might change and then suddenly the function definition would change without anyone immediately noticing. Signed-off-by: Ulrich Drepper <drepper@redhat.com> Cc: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24flag parameters: pacceptUlrich Drepper
This patch is by far the most complex in the series. It adds a new syscall paccept. This syscall differs from accept in that it adds (at the userlevel) two additional parameters: - a signal mask - a flags value The flags parameter can be used to set flag like SOCK_CLOEXEC. This is imlpemented here as well. Some people argued that this is a property which should be inherited from the file desriptor for the server but this is against POSIX. Additionally, we really want the signal mask parameter as well (similar to pselect, ppoll, etc). So an interface change in inevitable. The flag value is the same as for socket and socketpair. I think diverging here will only create confusion. Similar to the filesystem interfaces where the use of the O_* constants differs, it is acceptable here. The signal mask is handled as for pselect etc. The mask is temporarily installed for the thread and removed before the call returns. I modeled the code after pselect. If there is a problem it's likely also in pselect. For architectures which use socketcall I maintained this interface instead of adding a system call. The symmetry shouldn't be broken. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include <errno.h> #include <fcntl.h> #include <pthread.h> #include <signal.h> #include <stdio.h> #include <unistd.h> #include <netinet/in.h> #include <sys/socket.h> #include <sys/syscall.h> #ifndef __NR_paccept # ifdef __x86_64__ # define __NR_paccept 288 # elif defined __i386__ # define SYS_PACCEPT 18 # define USE_SOCKETCALL 1 # else # error "need __NR_paccept" # endif #endif #ifdef USE_SOCKETCALL # define paccept(fd, addr, addrlen, mask, flags) \ ({ long args[6] = { \ (long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \ syscall (__NR_socketcall, SYS_PACCEPT, args); }) #else # define paccept(fd, addr, addrlen, mask, flags) \ syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags) #endif #define PORT 57392 #define SOCK_CLOEXEC O_CLOEXEC static pthread_barrier_t b; static void * tf (void *arg) { pthread_barrier_wait (&b); int s = socket (AF_INET, SOCK_STREAM, 0); struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); s = socket (AF_INET, SOCK_STREAM, 0); sin.sin_port = htons (PORT); connect (s, (const struct sockaddr *) &sin, sizeof (sin)); close (s); pthread_barrier_wait (&b); pthread_barrier_wait (&b); sleep (2); pthread_kill ((pthread_t) arg, SIGUSR1); return NULL; } static void handler (int s) { } int main (void) { pthread_barrier_init (&b, NULL, 2); struct sockaddr_in sin; pthread_t th; if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0) { puts ("pthread_create failed"); return 1; } int s = socket (AF_INET, SOCK_STREAM, 0); int reuse = 1; setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK); sin.sin_port = htons (PORT); bind (s, (struct sockaddr *) &sin, sizeof (sin)); listen (s, SOMAXCONN); pthread_barrier_wait (&b); int s2 = paccept (s, NULL, 0, NULL, 0); if (s2 < 0) { puts ("paccept(0) failed"); return 1; } int coe = fcntl (s2, F_GETFD); if (coe & FD_CLOEXEC) { puts ("paccept(0) set close-on-exec-flag"); return 1; } close (s2); pthread_barrier_wait (&b); s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC); if (s2 < 0) { puts ("paccept(SOCK_CLOEXEC) failed"); return 1; } coe = fcntl (s2, F_GETFD); if ((coe & FD_CLOEXEC) == 0) { puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag"); return 1; } close (s2); pthread_barrier_wait (&b); struct sigaction sa; sa.sa_handler = handler; sa.sa_flags = 0; sigemptyset (&sa.sa_mask); sigaction (SIGUSR1, &sa, NULL); sigset_t ss; pthread_sigmask (SIG_SETMASK, NULL, &ss); sigaddset (&ss, SIGUSR1); pthread_sigmask (SIG_SETMASK, &ss, NULL); sigdelset (&ss, SIGUSR1); alarm (4); pthread_barrier_wait (&b); errno = 0 ; s2 = paccept (s, NULL, 0, &ss, 0); if (s2 != -1 || errno != EINTR) { puts ("paccept did not fail with EINTR"); return 1; } close (s); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [akpm@linux-foundation.org: make it compile] [akpm@linux-foundation.org: add sys_ni stub] Signed-off-by: Ulrich Drepper <drepper@redhat.com> Acked-by: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: <linux-arch@vger.kernel.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Roland McGrath <roland@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24flag parameters: socket and socketpairUlrich Drepper
This patch adds support for flag values which are ORed to the type passwd to socket and socketpair. The additional code is minimal. The flag values in this implementation can and must match the O_* flags. This avoids overhead in the conversion. The internal functions sock_alloc_fd and sock_map_fd get a new parameters and all callers are changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <netinet/in.h> #include <sys/socket.h> #define PORT 57392 /* For Linux these must be the same. */ #define SOCK_CLOEXEC O_CLOEXEC int main (void) { int fd; fd = socket (PF_INET, SOCK_STREAM, 0); if (fd == -1) { puts ("socket(0) failed"); return 1; } int coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { puts ("socket(0) set close-on-exec flag"); return 1; } close (fd); fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0); if (fd == -1) { puts ("socket(SOCK_CLOEXEC) failed"); return 1; } coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag"); return 1; } close (fd); int fds[2]; if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1) { puts ("socketpair(0) failed"); return 1; } for (int i = 0; i < 2; ++i) { coe = fcntl (fds[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i); return 1; } close (fds[i]); } if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1) { puts ("socketpair(SOCK_CLOEXEC) failed"); return 1; } for (int i = 0; i < 2; ++i) { coe = fcntl (fds[i], F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i); return 1; } close (fds[i]); } puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper <drepper@redhat.com> Acked-by: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-08net: remove padding from struct socket on 64bit & increase objects/cacheRichard Kennedy
remove padding from struct socket reducing its size by 8 bytes. This allows more objects/cache in sock_inode_cache 12 objects/cache when cacheline size is 128 (generic x86_64) Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-03-22[NET]: NPROTO is redundant; it's equal to AF_MAX/PF_MAX.Rusty Russell
DaveM pointed out NPROTO exposed to userspace, so keep it around, just make sure it stays in sync. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-01-28[NET]: Remove the empty net_tablePavel Emelyanov
I have removed all the entries from this table (core_table, ipv4_table and tr_table), so now we can safely drop it. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-01-28[NET]: Name magic constants in sock_wake_async()Pavel Emelyanov
The sock_wake_async() performs a bit different actions depending on "how" argument. Unfortunately this argument ony has numerical magic values. I propose to give names to their constants to help people reading this function callers understand what's going on without looking into this function all the time. I suppose this is 2.6.25 material, but if it's not (or the naming seems poor/bad/awful), I can rework it against the current net-2.6 tree. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-01-28[TCP]: Splice receive support.Jens Axboe
Support for network splice receive. Signed-off-by: Jens Axboe <jens.axboe@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-11-12[NET]: Add the helper kernel_sock_shutdown()Trond Myklebust
...and fix a couple of bugs in the NBD, CIFS and OCFS2 socket handlers. Looking at the sock->op->shutdown() handlers, it looks as if all of them take a SHUT_RD/SHUT_WR/SHUT_RDWR argument instead of the RCV_SHUTDOWN/SEND_SHUTDOWN arguments. Add a helper, and then define the SHUT_* enum to ensure that kernel users of shutdown() don't get confused. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Acked-by: Mark Fasheh <mark.fasheh@oracle.com> Acked-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-10-22[INET]: Let inet_diag and friends autoloadJean Delvare
By adding module aliases to inet_diag, tcp_diag and dccp_diag, we let them load automatically as needed. This makes tools like "ss" run faster. Signed-off-by: Jean Delvare <jdelvare@suse.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-10-10[NET]: Make socket creation namespace safe.Eric W. Biederman
This patch passes in the namespace a new socket should be created in and has the socket code do the appropriate reference counting. By virtue of this all socket create methods are touched. In addition the socket create methods are modified so that they will fail if you attempt to create a socket in a non-default network namespace. Failing if we attempt to create a socket outside of the default network namespace ensures that as we incrementally make the network stack network namespace aware we will not export functionality that someone has not audited and made certain is network namespace safe. Allowing us to partially enable network namespaces before all of the exotic protocols are supported. Any protocol layers I have missed will fail to compile because I now pass an extra parameter into the socket creation code. [ Integrated AF_IUCV build fixes from Andrew Morton... -DaveM ] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-04-26[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel bothDavid Howells
Provide AF_RXRPC sockets that can be used to talk to AFS servers, or serve answers to AFS clients. KerberosIV security is fully supported. The patches and some example test programs can be found in: http://people.redhat.com/~dhowells/rxrpc/ This will eventually replace the old implementation of kernel-only RxRPC currently resident in net/rxrpc/. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-02-08[S390]: Add AF_IUCV socket supportJennifer Hunt
From: Jennifer Hunt <jenhunt@us.ibm.com> This patch adds AF_IUCV socket support. Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-12-31[NET]: Don't export linux/random.h outside __KERNEL__.David Woodhouse
Don't add it there please; add it lower down inside the existing #ifdef __KERNEL__. You just made the _userspace_ net.h include random.h, which then fails to compile unless <asm/types.h> was already included. Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-12-02[NET]: Annotate net_srandom().Al Viro
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-10-17[PATCH] rename net_random to random32Stephen Hemminger
Make net_random() more widely available by calling it random32 akpm: hopefully this will permit the removal of carta_random32. That needs confirmation from Stephane - this code looks somewhat more computationally expensive, and has a different (ie: callee-stateful) interface. [akpm@osdl.org: lots of build fixes, cleanups] Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net> Cc: Stephane Eranian <eranian@hpl.hp.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-22[NET]: sock_register interface changesStephen Hemminger
The sock_register() doesn't change the family, so the protocols can define it read-only. No caller ever checks return value from sock_unregister() Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-09-22[NET]: drop unused elements from net_proto_familyStephen Hemminger
Three values in net_proto_family are defined but never used. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-09-22[NET]: Round out in-kernel sockets APISridhar Samudrala
This patch implements wrapper functions that provide a convenient way to access the sockets API for in-kernel users like sunrpc, cifs & ocfs2 etc and any future users. Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-29[AF_UNIX]: Datagram getpeersecCatherine Zhang
This patch implements an API whereby an application can determine the label of its peer's Unix datagram sockets via the auxiliary data mechanism of recvmsg. Patch purpose: This patch enables a security-aware application to retrieve the security context of the peer of a Unix datagram socket. The application can then use this security context to determine the security context for processing on behalf of the peer who sent the packet. Patch design and implementation: The design and implementation is very similar to the UDP case for INET sockets. Basically we build upon the existing Unix domain socket API for retrieving user credentials. Linux offers the API for obtaining user credentials via ancillary messages (i.e., out of band/control messages that are bundled together with a normal message). To retrieve the security context, the application first indicates to the kernel such desire by setting the SO_PASSSEC option via getsockopt. Then the application retrieves the security context using the auxiliary data mechanism. An example server application for Unix datagram socket should look like this: toggle = 1; toggle_len = sizeof(toggle); setsockopt(sockfd, SOL_SOCKET, SO_PASSSEC, &toggle, &toggle_len); recvmsg(sockfd, &msg_hdr, 0); if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) { cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr); if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) && cmsg_hdr->cmsg_level == SOL_SOCKET && cmsg_hdr->cmsg_type == SCM_SECURITY) { memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext)); } } sock_setsockopt is enhanced with a new socket option SOCK_PASSSEC to allow a server socket to receive security context of the peer. Testing: We have tested the patch by setting up Unix datagram client and server applications. We verified that the server can retrieve the security context using the auxiliary data mechanism of recvmsg. Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com> Acked-by: Acked-by: James Morris <jmorris@namei.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-04-26Don't include linux/config.h from anywhere else in include/David Woodhouse
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
2006-04-25Don't include <linux/stringify> from user-visible part of linux/net.hDavid Woodhouse
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
2006-03-21[NET]: allow 32 bit socket ioctl in 64 bit kernelShaun Pereira
Since the register_ioctl32_conversion() patch in the kernel is now obsolete, provide another method to allow 32 bit user space ioctls to reach the kernel. Signed-off-by: Shaun Pereira <spereira@tusc.com.au> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-03-20[NET]: {get|set}sockopt compatibility layerDmitry Mishin
This patch extends {get|set}sockopt compatibility layer in order to move protocol specific parts to their place and avoid huge universal net/compat.c file in the future. Signed-off-by: Dmitry Mishin <dim@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-01-03[NET]: move struct proto_ops to constEric Dumazet
I noticed that some of 'struct proto_ops' used in the kernel may share a cache line used by locks or other heavily modified data. (default linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at least) This patch makes sure a 'struct proto_ops' can be declared as const, so that all cpus can share all parts of it without false sharing. This is not mandatory : a driver can still use a read/write structure if it needs to (and eventually a __read_mostly) I made a global stubstitute to change all existing occurences to make them const. This should reduce the possibility of false sharing on SMP, and speedup some socket system calls. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-11-07[PATCH] kernel-doc: fix some kernel-api warningsRandy Dunlap
Fix various warnings in kernel-doc: Warning(linux-2614-rc4//include/linux/net.h:89): Enum value 'SOCK_DCCP' not described in enum 'sock_type' usercopy.c: should use !E instead of !I for exported symbols: Warning(linux-2614-rc4//arch/i386/lib/usercopy.c): no structured comments found fs.h does not need to use !E since it has no exported symbols: Warning(linux-2614-rc4//include/linux/fs.h:1182): No description found for parameter 'find_exported_dentry' Warning(linux-2614-rc4//include/linux/fs.h): no structured comments found irq/manage.c should use !E for its exported symbols: Warning(linux-2614-rc4//kernel/irq/manage.c): no structured comments found macmodes.c should use !E for its exported symbols: Warning(linux-2614-rc4//drivers/video/macmodes.c): no structured comments found Signed-off-by: Randy Dunlap <rdunlap@xenotime.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-08-29[NET]: Fix sparse warningsArnaldo Carvalho de Melo
Of this type, mostly: CHECK net/ipv6/netfilter.c net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static? net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static? Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-08-29[DCCP]: Initial implementationArnaldo Carvalho de Melo
Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-08-29[NETLINK]: Add properly module refcounting for kernel netlink sockets.Harald Welte
- Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte <laforge@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-23[NET]: Kill stray reference to sock->passcred.David S. Miller
That struct member was deleted, but a comment was not updated to reflect this. Signed-off-by: David S. Miller <davem@davemloft.net>
2005-05-05[PATCH] update Ross Biro bouncing email addressJesper Juhl
Ross moved. Remove the bad email address so people will find the correct one in ./CREDITS. Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-01[PATCH] DocBook: changes and extensions to the kernel documentationPavel Pisa
I have recompiled Linux kernel 2.6.11.5 documentation for me and our university students again. The documentation could be extended for more sources which are equipped by structured comments for recent 2.6 kernels. I have tried to proceed with that task. I have done that more times from 2.6.0 time and it gets boring to do same changes again and again. Linux kernel compiles after changes for i386 and ARM targets. I have added references to some more files into kernel-api book, I have added some section names as well. So please, check that changes do not break something and that categories are not too much skewed. I have changed kernel-doc to accept "fastcall" and "asmlinkage" words reserved by kernel convention. Most of the other changes are modifications in the comments to make kernel-doc happy, accept some parameters description and do not bail out on errors. Changed <pid> to @pid in the description, moved some #ifdef before comments to correct function to comments bindings, etc. You can see result of the modified documentation build at http://cmp.felk.cvut.cz/~pisa/linux/lkdb-2.6.11.tar.gz Some more sources are ready to be included into kernel-doc generated documentation. Sources has been added into kernel-api for now. Some more section names added and probably some more chaos introduced as result of quick cleanup work. Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz> Signed-off-by: Martin Waitz <tali@admingilde.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-03-09[NET]: Passcred cleanup in struct sockHerbert Pƶtzl
struct socket uses a 'bool' (unsigned char) to 'flag' the pass-credential option for sockets (which can be easily replaced by a flag) Signed-off-by: David S. Miller <davem@davemloft.net>
2004-12-27[NET]: No need to export sock_readv_writevAdrian Bunk
Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: David S. Miller <davem@davemloft.net>
2004-10-25[NET]: Remove dead socket layer exports.Christoph Hellwig
Signed-off-by: David S. Miller <davem@davemloft.net>
2004-09-15[SOCKET] make enum socket_type be arch overridableArnaldo Carvalho de Melo
To cope with MIPS, that has SOCK_STREAM and SOCK_DGRAM values swapped to deal with binary compat. Signed-off-by: Arnaldo Carvalho de Melo <acme@conectiva.com.br> Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: David S. Miller <davem@redhat.com>
2004-09-06[NET]: Move SOCK_foo types into linux/net.hArnaldo Carvalho de Melo
Every arch defines them the same without exception and with this we only need to update one spot when adding new socket types. Signed-off-by: Arnaldo Carvalho de Melo <acme@conectiva.com.br> Signed-off-by: David S. Miller <davem@davemloft.net>
2004-08-15[NET]: Enhanced version of net_random().Stephen Hemminger
Here is another alternative, using tansworthe generator. It uses percpu state. The one small semantic change is the net_srandom() only affects the current cpu's seed. The problem was that having it change all cpu's seed would mean adding locking and the only user's today are a couple of places that feed in mac address to try make sure address resolution to collide. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@redhat.com>
2004-07-15[PATCH] sparse: beginning of iovec cleanups - infrastructureAlexander Viro
Beginning of iovec cleanups - added two helpers (kernel_{send,recv}msg) that do sock_sendmsg/sock_recvmsg with kvec instead of iovec; basically, they were abstracted from earlier afs patch. They take kvec/length of kvec as separate arguments, do set_fs(), stick kvec into msghdr and call sock_...msg(). The next group of patches will switch network filesystems to use of kvec for kernel data + use of these helpers. Basically, the same thing we'd done for afs.
2004-05-30[PATCH] sparse: missed setsockopt wrappersAlexander Viro
Here's the tail of setsockopt I've missed - for old protocol families we have wrappers around those methods.
2004-05-08[NET]: Add sock_create_lite()James Morris
The purpose of this is to allow sockets created by the kernel in this way to be passed through the LSM socket creation hooks and be labeled and mediated in the same manner as other sockets. This patches addresses a class of potential issues with LSMs, where such sockets will not be labeled correctly (if at all), or mediated during creation. Under SELinux, it fixes a specific bug where RPC sockets created by the kernel during TCP NFS serving are unlabeled.
2004-05-08[NET]: Add sock_create_kern()James Morris
Under SELinux, and potentially other LSMs, we need to be able to distinguish between user sockets and kernel sockets. For SELinux specifically, kernel sockets need to be specially labeled during creation, then bypass access control checks (they are controlled by the kernel itself and not subject to SELinux mediation). This addresses a class of potential issues in SELinux where, for example, a TCP NFS session times out, then the kernel re-establishes an RPC connection upon further user activity. We do not want such kernel created sockets to be labeled with user security contexts. sock_create() and sock_create_kern() are wrapper functions, which seems semantically clearer to me than e.g. adding a flag to sock_create(). If you prefer the latter, then let me know. The patch also adds an argument to the LSM socket creation functions indicating whether the socket being created is a kernel socket or not.
2004-01-08[NET]: Use size_t for size argument in {send,recv}msg callchain.Stephen Hemminger
2003-09-01[NET]: Use MODULE_ALIAS() in network families.Rusty Russell
Previously, default aliases were hardwired into modutils. Now they should be inside the modules, using MODULE_ALIAS() (they will be overridden by any user alias).
2003-05-12Add user pointer annotations to socket, file IO and signalLinus Torvalds
handling. This pointed out a bug in x86 sys_rt_sigreturn(), btw.
2003-05-02o net: improve the current module infrastructureArnaldo Carvalho de Melo
As per discussions in netdev we'll probably be moving to a brand new scheme, but this set of changesets have been discussed and are an improvement to the current situation and were already done prior to this thread happening.
2003-04-22o net: module refcounting for sk_alloc/sk_freeArnaldo Carvalho de Melo
I had to move the rtnetlink_init and init_netlink calls to af_netlink init time, so that the sk_alloc called down the rtnetlink_init callchain is done after the PF_NETLINK net_proto_family is sock_registered, and because of that the af_netlink init function call had to be moved to earlier by means of subsys_initcall (DaveM's suggestion).