From c8383054506c77b814489c09877b5db83fd4abf2 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:24 +0800 Subject: cachefiles: notify the user daemon when looking up cookie Fscache/CacheFiles used to serve as a local cache for a remote networking fs. A new on-demand read mode will be introduced for CacheFiles, which can boost the scenario where on-demand read semantics are needed, e.g. container image distribution. The essential difference between these two modes is seen when a cache miss occurs: In the original mode, the netfs will fetch the data from the remote server and then write it to the cache file; in on-demand read mode, fetching the data and writing it into the cache is delegated to a user daemon. As the first step, notify the user daemon when looking up cookie. In this case, an anonymous fd is sent to the user daemon, through which the user daemon can write the fetched data to the cache file. Since the user daemon may move the anonymous fd around, e.g. through dup(), an object ID uniquely identifying the cache file is also attached. Also add one advisory flag (FSCACHE_ADV_WANT_CACHE_SIZE) suggesting that the cache file size shall be retrieved at runtime. This helps the scenario where one cache file contains multiple netfs files, e.g. for the purpose of deduplication. In this case, netfs itself has no idea the size of the cache file, whilst the user daemon should give the hint on it. Signed-off-by: Jeffle Xu Link: https://lore.kernel.org/r/20220509074028.74954-3-jefflexu@linux.alibaba.com Acked-by: David Howells Signed-off-by: Gao Xiang --- include/uapi/linux/cachefiles.h | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/uapi/linux/cachefiles.h (limited to 'include/uapi/linux/cachefiles.h') diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h new file mode 100644 index 000000000000..521f2fe4fe9c --- /dev/null +++ b/include/uapi/linux/cachefiles.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_CACHEFILES_H +#define _LINUX_CACHEFILES_H + +#include + +/* + * Fscache ensures that the maximum length of cookie key is 255. The volume key + * is controlled by netfs, and generally no bigger than 255. + */ +#define CACHEFILES_MSG_MAX_SIZE 1024 + +enum cachefiles_opcode { + CACHEFILES_OP_OPEN, +}; + +/* + * Message Header + * + * @msg_id a unique ID identifying this message + * @opcode message type, CACHEFILE_OP_* + * @len message length, including message header and following data + * @object_id a unique ID identifying a cache file + * @data message type specific payload + */ +struct cachefiles_msg { + __u32 msg_id; + __u32 opcode; + __u32 len; + __u32 object_id; + __u8 data[]; +}; + +/* + * @data contains the volume_key followed directly by the cookie_key. volume_key + * is a NUL-terminated string; @volume_key_size indicates the size of the volume + * key in bytes. cookie_key is binary data, which is netfs specific; + * @cookie_key_size indicates the size of the cookie key in bytes. + * + * @fd identifies an anon_fd referring to the cache file. + */ +struct cachefiles_open { + __u32 volume_key_size; + __u32 cookie_key_size; + __u32 fd; + __u32 flags; + __u8 data[]; +}; + +#endif -- cgit v1.2.3 From 324b954ac80cff0d11ddb6bde9b6631e45e98620 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:26 +0800 Subject: cachefiles: notify the user daemon when withdrawing cookie Notify the user daemon that cookie is going to be withdrawn, providing a hint that the associated anonymous fd can be closed. Be noted that this is only a hint. The user daemon may close the associated anonymous fd when receiving the CLOSE request, then it will receive another anonymous fd when the cookie gets looked up. Or it may ignore the CLOSE request, and keep writing data through the anonymous fd. However the next time the cookie gets looked up, the user daemon will still receive another new anonymous fd. Signed-off-by: Jeffle Xu Acked-by: David Howells Link: https://lore.kernel.org/r/20220425122143.56815-5-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/cachefiles/interface.c | 2 ++ fs/cachefiles/internal.h | 5 +++++ fs/cachefiles/ondemand.c | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/cachefiles.h | 1 + 4 files changed, 46 insertions(+) (limited to 'include/uapi/linux/cachefiles.h') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index ae93cee9d25d..a69073a1d3f0 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -362,6 +362,8 @@ static void cachefiles_withdraw_cookie(struct fscache_cookie *cookie) spin_unlock(&cache->object_list_lock); } + cachefiles_ondemand_clean_object(object); + if (object->file) { cachefiles_begin_secure(cache, &saved_cred); cachefiles_clean_up_object(object, cache); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index e5c612888f84..da388ba127eb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -290,6 +290,7 @@ extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args); extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); +extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); #else static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, @@ -302,6 +303,10 @@ static inline int cachefiles_ondemand_init_object(struct cachefiles_object *obje { return 0; } + +static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ +} #endif /* diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 7946ee6c40be..11b1c15ac697 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -229,6 +229,12 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, goto err_put_fd; } + /* CLOSE request has no reply */ + if (msg->opcode == CACHEFILES_OP_CLOSE) { + xa_erase(&cache->reqs, id); + complete(&req->done); + } + return n; err_put_fd: @@ -300,6 +306,13 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, /* coupled with the barrier in cachefiles_flush_reqs() */ smp_mb(); + if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) { + WARN_ON_ONCE(object->ondemand_id == 0); + xas_unlock(&xas); + ret = -EIO; + goto out; + } + xas.xa_index = 0; xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); if (xas.xa_node == XAS_RESTART) @@ -356,6 +369,25 @@ static int cachefiles_ondemand_init_open_req(struct cachefiles_req *req, return 0; } +static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + int object_id = object->ondemand_id; + + /* + * It's possible that object id is still 0 if the cookie looking up + * phase failed before OPEN request has ever been sent. Also avoid + * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means + * anon_fd has already been closed. + */ + if (object_id <= 0) + return -ENOENT; + + req->msg.object_id = object_id; + return 0; +} + int cachefiles_ondemand_init_object(struct cachefiles_object *object) { struct fscache_cookie *cookie = object->cookie; @@ -379,3 +411,9 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) return cachefiles_ondemand_send_req(object, CACHEFILES_OP_OPEN, data_len, cachefiles_ondemand_init_open_req, NULL); } + +void cachefiles_ondemand_clean_object(struct cachefiles_object *object) +{ + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, + cachefiles_ondemand_init_close_req, NULL); +} diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h index 521f2fe4fe9c..37a0071037c8 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -12,6 +12,7 @@ enum cachefiles_opcode { CACHEFILES_OP_OPEN, + CACHEFILES_OP_CLOSE, }; /* -- cgit v1.2.3 From 9032b6e8589f269743984aac53e82e4835be16dc Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:27 +0800 Subject: cachefiles: implement on-demand read Implement the data plane of on-demand read mode. The early implementation [1] place the entry to cachefiles_ondemand_read() in fscache_read(). However, fscache_read() can only detect if the requested file range is fully cache miss, whilst we need to notify the user daemon as long as there's a hole inside the requested file range. Thus the entry is now placed in cachefiles_prepare_read(). When working in on-demand read mode, once a hole detected, the read routine will send a READ request to the user daemon. The user daemon needs to fetch the data and write it to the cache file. After sending the READ request, the read routine will hang there, until the READ request is handled by the user daemon. Then it will retry to read from the same file range. If no progress encountered, the read routine will fail then. A new NETFS_SREQ_ONDEMAND flag is introduced to indicate that on-demand read should be done when a cache miss encountered. [1] https://lore.kernel.org/all/20220406075612.60298-6-jefflexu@linux.alibaba.com/ #v8 Signed-off-by: Jeffle Xu Acked-by: David Howells Link: https://lore.kernel.org/r/20220425122143.56815-6-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/cachefiles/internal.h | 9 +++++ fs/cachefiles/io.c | 15 ++++++-- fs/cachefiles/ondemand.c | 77 +++++++++++++++++++++++++++++++++++++++++ include/linux/netfs.h | 1 + include/uapi/linux/cachefiles.h | 17 +++++++++ 5 files changed, 117 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/cachefiles.h') diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index da388ba127eb..6cba2c6de2f9 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -292,6 +292,9 @@ extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); +extern int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len); + #else static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, char __user *_buffer, size_t buflen) @@ -307,6 +310,12 @@ static inline int cachefiles_ondemand_init_object(struct cachefiles_object *obje static inline void cachefiles_ondemand_clean_object(struct cachefiles_object *object) { } + +static inline int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + return -EOPNOTSUPP; +} #endif /* diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 50a14e8f0aac..000a28f46e59 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -403,6 +403,7 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest * enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER; loff_t off, to; ino_t ino = file ? file_inode(file)->i_ino : 0; + int rc; _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); @@ -415,7 +416,8 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest * if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); why = cachefiles_trace_read_no_data; - goto out_no_object; + if (!test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) + goto out_no_object; } /* The object and the file may be being created in the background. */ @@ -432,7 +434,7 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest * object = cachefiles_cres_object(cres); cache = object->volume->cache; cachefiles_begin_secure(cache, &saved_cred); - +retry: off = cachefiles_inject_read_error(); if (off == 0) off = vfs_llseek(file, subreq->start, SEEK_DATA); @@ -483,6 +485,15 @@ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest * download_and_store: __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); + if (test_bit(NETFS_SREQ_ONDEMAND, &subreq->flags)) { + rc = cachefiles_ondemand_read(object, subreq->start, + subreq->len); + if (!rc) { + __clear_bit(NETFS_SREQ_ONDEMAND, &subreq->flags); + goto retry; + } + ret = NETFS_INVALID_READ; + } out: cachefiles_end_secure(cache, saved_cred); out_no_object: diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 11b1c15ac697..3470d4e8f0cb 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -10,8 +10,25 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, struct cachefiles_object *object = file->private_data; struct cachefiles_cache *cache = object->volume->cache; int object_id = object->ondemand_id; + struct cachefiles_req *req; + XA_STATE(xas, &cache->reqs, 0); + xa_lock(&cache->reqs); object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + + /* + * Flush all pending READ requests since their completion depends on + * anon_fd. + */ + xas_for_each(&xas, req, ULONG_MAX) { + if (req->msg.opcode == CACHEFILES_OP_READ) { + req->error = -EIO; + complete(&req->done); + xas_store(&xas, NULL); + } + } + xa_unlock(&cache->reqs); + xa_erase(&cache->ondemand_ids, object_id); cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); cachefiles_put_unbind_pincount(cache); @@ -57,11 +74,35 @@ static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos, return vfs_llseek(file, pos, whence); } +static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct cachefiles_object *object = filp->private_data; + struct cachefiles_cache *cache = object->volume->cache; + struct cachefiles_req *req; + unsigned long id; + + if (ioctl != CACHEFILES_IOC_READ_COMPLETE) + return -EINVAL; + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + id = arg; + req = xa_erase(&cache->reqs, id); + if (!req) + return -EINVAL; + + complete(&req->done); + return 0; +} + static const struct file_operations cachefiles_ondemand_fd_fops = { .owner = THIS_MODULE, .release = cachefiles_ondemand_fd_release, .write_iter = cachefiles_ondemand_fd_write_iter, .llseek = cachefiles_ondemand_fd_llseek, + .unlocked_ioctl = cachefiles_ondemand_fd_ioctl, }; /* @@ -388,6 +429,32 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, return 0; } +struct cachefiles_read_ctx { + loff_t off; + size_t len; +}; + +static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, + void *private) +{ + struct cachefiles_object *object = req->object; + struct cachefiles_read *load = (void *)req->msg.data; + struct cachefiles_read_ctx *read_ctx = private; + int object_id = object->ondemand_id; + + /* Stop enqueuing requests when daemon has closed anon_fd. */ + if (object_id <= 0) { + WARN_ON_ONCE(object_id == 0); + pr_info_once("READ: anonymous fd closed prematurely.\n"); + return -EIO; + } + + req->msg.object_id = object_id; + load->off = read_ctx->off; + load->len = read_ctx->len; + return 0; +} + int cachefiles_ondemand_init_object(struct cachefiles_object *object) { struct fscache_cookie *cookie = object->cookie; @@ -417,3 +484,13 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object) cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, cachefiles_ondemand_init_close_req, NULL); } + +int cachefiles_ondemand_read(struct cachefiles_object *object, + loff_t pos, size_t len) +{ + struct cachefiles_read_ctx read_ctx = {pos, len}; + + return cachefiles_ondemand_send_req(object, CACHEFILES_OP_READ, + sizeof(struct cachefiles_read), + cachefiles_ondemand_init_read_req, &read_ctx); +} diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c7bf1eaf51d5..057d04efaf79 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -159,6 +159,7 @@ struct netfs_io_subrequest { #define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ +#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ }; enum netfs_io_origin { diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h index 37a0071037c8..78caa73e5343 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEFILES_H #include +#include /* * Fscache ensures that the maximum length of cookie key is 255. The volume key @@ -13,6 +14,7 @@ enum cachefiles_opcode { CACHEFILES_OP_OPEN, CACHEFILES_OP_CLOSE, + CACHEFILES_OP_READ, }; /* @@ -48,4 +50,19 @@ struct cachefiles_open { __u8 data[]; }; +/* + * @off indicates the starting offset of the requested file range + * @len indicates the length of the requested file range + */ +struct cachefiles_read { + __u64 off; + __u64 len; +}; + +/* + * Reply for READ request + * @arg for this ioctl is the @id field of READ request. + */ +#define CACHEFILES_IOC_READ_COMPLETE _IOW(0x98, 1, int) + #endif -- cgit v1.2.3