diff options
| author | Neil Brown <neilb@cse.unsw.edu.au> | 2002-10-11 05:40:05 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-10-11 05:40:05 -0700 |
| commit | 04ffa5502c5cd8b1f1b1b0ea4425600fa4258660 (patch) | |
| tree | 9d9d87c9403e4f7cb5405dde47f594988b717fb9 | |
| parent | ea2212232c15d28b58e817d374c7986b0190d7d4 (diff) | |
[PATCH] kNFSd: Create files: /proc/net/rpc/$CACHENAME/channel for communicating cache updates with kernel
Each cache gets it's own 'channel' at
/proc/net/rpc/$CACHENAME/channel
Reads from the file will return all pending requests, one at a time.
select will block when at end of file.
writes will pass full lines in to be processed.
| -rw-r--r-- | Documentation/rpc-cache.txt | 49 | ||||
| -rw-r--r-- | include/linux/sunrpc/cache.h | 7 | ||||
| -rw-r--r-- | include/linux/sunrpc/stats.h | 2 | ||||
| -rw-r--r-- | net/sunrpc/cache.c | 296 | ||||
| -rw-r--r-- | net/sunrpc/stats.c | 2 |
5 files changed, 355 insertions, 1 deletions
diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt index a71548d8617b..ccfe5587f81b 100644 --- a/Documentation/rpc-cache.txt +++ b/Documentation/rpc-cache.txt @@ -90,3 +90,52 @@ item does become valid, the deferred copy of the request will be revisited (->revisit). It is expected that this method will reschedule the request for processing. + +Populating a cache +------------------ + +Each cache has a name, and when the cache is registered, a directory +with that name is created in /proc/net/rpc + +This directory contains a file called 'channel' which is a channel +for communicating between kernel and user for populating the cache. +This directory may later contain other files of interacting +with the cache. + +The 'channel' works a bit like a datagram socket. Each 'write' is +passed as a whole to the cache for parsing and interpretation. +Each cache can treat the write requests differently, but it is +expected that a message written will contain: + - a key + - an expiry time + - a content. +with the intention that an item in the cache with the give key +should be create or updated to have the given content, and the +expiry time should be set on that item. + +Reading from a channel is a bit more interesting. When a cache +lookup fail, or when it suceeds but finds an entry that may soon +expiry, a request is lodged for that cache item to be updated by +user-space. These requests appear in the channel file. + +Successive reads will return successive requests. +If there are no more requests to return, read will return EOF, but a +select or poll for read will block waiting for another request to be +added. + +Thus a user-space helper is likely to: + open the channel. + select for readable + read a request + write a response + loop. + +If it dies and needs to be restarted, any requests that have not be +answered will still appear in the file and will be read by the new +instance of the helper. + +Each cache should define a "cache_parse" method which takes a message +written from user-space and processes it. It should return an error +(which propagates back to the write syscall) or 0. + + diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ef69dfa21817..c501845cac0a 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <asm/atomic.h> +#include <linux/proc_fs.h> /* * Each cache requires: @@ -74,6 +75,8 @@ struct cache_detail { /* request and update functions for interaction with userspace * will go here */ + int (*cache_parse)(struct cache_detail *, + char *buf, int len); /* fields below this comment are for internal use * and should not be touched by cache owners @@ -83,6 +86,10 @@ struct cache_detail { struct list_head others; time_t nextcheck; int entries; + + /* fields for communication over channel */ + struct list_head queue; + struct proc_dir_entry *proc_ent; }; diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 13c7989d67d7..8553d06059bd 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -57,6 +57,8 @@ int svc_proc_read(char *, char **, off_t, int, int *, void *); void svc_proc_zero(struct svc_program *); +extern struct proc_dir_entry *proc_net_rpc; + #else static inline void svc_proc_unregister(const char *p) {} diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 38d854d29528..0ec038f86c8f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -18,9 +18,14 @@ #include <linux/sched.h> #include <linux/kmod.h> #include <linux/list.h> +#include <linux/module.h> #include <asm/uaccess.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> +#include <linux/sunrpc/stats.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -128,9 +133,25 @@ static spinlock_t cache_list_lock = SPIN_LOCK_UNLOCKED; static struct cache_detail *current_detail; static int current_index; +struct file_operations cache_file_operations; + void cache_register(struct cache_detail *cd) { + cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->proc_ent) { + struct proc_dir_entry *p; + cd->proc_ent->owner = THIS_MODULE; + + p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent); + if (p) { + p->proc_fops = &cache_file_operations; + p->owner = THIS_MODULE; + p->data = cd; + } + } rwlock_init(&cd->hash_lock); + INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); cd->nextcheck = 0; cd->entries = 0; @@ -153,6 +174,10 @@ int cache_unregister(struct cache_detail *cd) list_del_init(&cd->others); write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); + if (cd->proc_ent) { + cd->proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); + } return 0; } @@ -390,3 +415,274 @@ void cache_revisit_request(struct cache_head *item) dreq->revisit(dreq, 0); } } + +/* + * communicate with user-space + * + * We have a magic /proc file - /proc/sunrpc/cache + * On read, you get a full request, or block + * On write, an update request is processed + * Poll works if anything to read, and always allows write + * + * Implemented by linked list of requests. Each open file has + * a ->private that also exists in this list. New request are added + * to the end and may wakeup and preceeding readers. + * New readers are added to the head. If, on read, an item is found with + * CACHE_UPCALLING clear, we free it from the list. + * + */ + +static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_MUTEX(queue_io_sem); + +struct cache_queue { + struct list_head list; + int reader; /* if 0, then request */ +}; +struct cache_request { + struct cache_queue q; + struct cache_head *item; + char * buf; + int len; + int readers; +}; +struct cache_reader { + struct cache_queue q; + int offset; /* if non-0, we have a refcnt on next request */ + char *page; +}; + +static ssize_t +cache_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +{ + struct cache_reader *rp = filp->private_data; + struct cache_request *rq; + struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + int err; + + if (ppos != &filp->f_pos) + return -ESPIPE; + + if (count == 0) + return 0; + + down(&queue_io_sem); /* protect against multiple concurrent + * readers on this file */ + again: + spin_lock(&queue_lock); + /* need to find next request */ + while (rp->q.list.next != &cd->queue && + list_entry(rp->q.list.next, struct cache_queue, list) + ->reader) { + struct list_head *next = rp->q.list.next; + list_move(&rp->q.list, next); + } + if (rp->q.list.next == &cd->queue) { + spin_unlock(&queue_lock); + up(&queue_io_sem); + if (rp->offset) + BUG(); + return 0; + } + rq = container_of(rp->q.list.next, struct cache_request, q.list); + if (rq->q.reader) BUG(); + if (rp->offset == 0) + rq->readers++; + spin_unlock(&queue_lock); + + if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { + err = -EAGAIN; + spin_lock(&queue_lock); + list_move(&rp->q.list, &rq->q.list); + spin_unlock(&queue_lock); + } else { + if (rp->offset + count > rq->len) + count = rq->len - rp->offset; + err = -EFAULT; + if (copy_to_user(buf, rq->buf + rp->offset, count)) + goto out; + rp->offset += count; + if (rp->offset >= rq->len) { + rp->offset = 0; + spin_lock(&queue_lock); + list_move(&rp->q.list, &rq->q.list); + spin_unlock(&queue_lock); + } + err = 0; + } + out: + if (rp->offset == 0) { + /* need to release rq */ + spin_lock(&queue_lock); + rq->readers--; + if (rq->readers == 0 && + !test_bit(CACHE_PENDING, &rq->item->flags)) { + list_del(&rq->q.list); + spin_unlock(&queue_lock); + cd->cache_put(rq->item, cd); + kfree(rq->buf); + kfree(rq); + } else + spin_unlock(&queue_lock); + } + if (err == -EAGAIN) + goto again; + up(&queue_io_sem); + return err ? err : count; +} + +static ssize_t +cache_write(struct file *filp, const char *buf, size_t count, + loff_t *ppos) +{ + int err; + struct cache_reader *rp = filp->private_data; + struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + + if (ppos != &filp->f_pos) + return -ESPIPE; + + if (count == 0) + return 0; + if (count > PAGE_SIZE) + return -EINVAL; + + down(&queue_io_sem); + + if (rp->page == NULL) { + rp->page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (rp->page == NULL) { + up(&queue_io_sem); + return -ENOMEM; + } + } + + if (copy_from_user(rp->page, buf, count)) { + up(&queue_io_sem); + return -EFAULT; + } + if (count < PAGE_SIZE) + rp->page[count] = '\0'; + if (cd->cache_parse) + err = cd->cache_parse(cd, rp->page, count); + else + err = -EINVAL; + + up(&queue_io_sem); + return err ? err : count; +} + +static DECLARE_WAIT_QUEUE_HEAD(queue_wait); + +static unsigned int +cache_poll(struct file *filp, poll_table *wait) +{ + unsigned int mask; + struct cache_reader *rp = filp->private_data; + struct cache_queue *cq; + struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; + + poll_wait(filp, &queue_wait, wait); + + /* alway allow write */ + mask = POLL_OUT | POLLWRNORM; + spin_lock(&queue_lock); + + for (cq= &rp->q; &cq->list != &cd->queue; + cq = list_entry(cq->list.next, struct cache_queue, list)) + if (!cq->reader) { + mask |= POLLIN | POLLRDNORM; + break; + } + spin_unlock(&queue_lock); + return mask; +} + +static int +cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int len = 0; + struct cache_reader *rp = filp->private_data; + struct cache_queue *cq; + struct cache_detail *cd = PDE(ino)->data; + + if (cmd != FIONREAD) + return -EINVAL; + spin_lock(&queue_lock); + + /* only find the length remaining in current request, + * or the length of the next request + */ + for (cq= &rp->q; &cq->list != &cd->queue; + cq = list_entry(cq->list.next, struct cache_queue, list)) + if (!cq->reader) { + struct cache_request *cr = + container_of(cq, struct cache_request, q); + len = cr->len - rp->offset; + break; + } + spin_unlock(&queue_lock); + + return put_user(len, (int *)arg); +} + +static int +cache_open(struct inode *inode, struct file *filp) +{ + struct cache_reader *rp; + struct cache_detail *cd = PDE(inode)->data; + + rp = kmalloc(sizeof(*rp), GFP_KERNEL); + if (!rp) + return -ENOMEM; + rp->page = NULL; + rp->offset = 0; + rp->q.reader = 1; + spin_lock(&queue_lock); + list_add(&rp->q.list, &cd->queue); + spin_unlock(&queue_lock); + filp->private_data = rp; + return 0; +} + +static int +cache_release(struct inode *inode, struct file *filp) +{ + struct cache_reader *rp = filp->private_data; + struct cache_detail *cd = PDE(inode)->data; + + spin_lock(&queue_lock); + if (rp->offset) { + struct cache_queue *cq; + for (cq= &rp->q; &cq->list != &cd->queue; + cq = list_entry(cq->list.next, struct cache_queue, list)) + if (!cq->reader) { + container_of(cq, struct cache_request, q) + ->readers--; + break; + } + rp->offset = 0; + } + list_del(&rp->q.list); + spin_unlock(&queue_lock); + + if (rp->page) + kfree(rp->page); + + filp->private_data = NULL; + kfree(rp); + return 0; +} + + + +struct file_operations cache_file_operations = { + .llseek = no_llseek, + .read = cache_read, + .write = cache_write, + .poll = cache_poll, + .ioctl = cache_ioctl, /* for FIONREAD */ + .open = cache_open, + .release = cache_release, +}; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 2cca105cdb76..ad7a1d700d27 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -25,7 +25,7 @@ #define RPCDBG_FACILITY RPCDBG_MISC -static struct proc_dir_entry *proc_net_rpc = NULL; +struct proc_dir_entry *proc_net_rpc = NULL; /* * Get RPC client stats |
