From 7e86845a0346efc95fddaa97ce5cd6a8bda8c71c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 4 Jun 2024 15:45:24 -0400 Subject: rpcrdma: Implement generic device removal Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device removal") explains the benefits of handling device removal outside of the CM event handler. Sketch in an IB device removal notification mechanism that can be used by both the client and server side RPC-over-RDMA transport implementations. Suggested-by: Sagi Grimberg Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rdma_rn.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/sunrpc/rdma_rn.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h new file mode 100644 index 000000000000..7d032ca057af --- /dev/null +++ b/include/linux/sunrpc/rdma_rn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2024, Oracle and/or its affiliates. + */ + +#ifndef _LINUX_SUNRPC_RDMA_RN_H +#define _LINUX_SUNRPC_RDMA_RN_H + +#include + +/** + * rpcrdma_notification - request removal notification + */ +struct rpcrdma_notification { + void (*rn_done)(struct rpcrdma_notification *rn); + u32 rn_index; +}; + +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)); +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn); +int rpcrdma_ib_client_register(void); +void rpcrdma_ib_client_unregister(void); + +#endif /* _LINUX_SUNRPC_RDMA_RN_H */ -- cgit v1.2.3 From 820620516993c151c88e9b59edc79ed12d1c31cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:19 -0400 Subject: NFSv4: Clean up open delegation return structure Instead of having the fields open coded in the struct nfs_openres, add a separate structure for them so that we can reuse that code for the WANT_DELEGATION case. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 30 ++++++++++++++++++------------ fs/nfs/nfs4xdr.c | 38 +++++++++++++++++++------------------- include/linux/nfs_xdr.h | 21 +++++++++++++++++---- 3 files changed, 54 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a691fa10b3e9..7a74dc1bcfbd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1960,6 +1960,13 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) struct nfs_delegation *delegation; int delegation_flags = 0; + switch (data->o_res.delegation.open_delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + break; + default: + return; + }; rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation) @@ -1979,19 +1986,19 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) if ((delegation_flags & 1UL<inode, data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); + data->o_res.delegation.type, + &data->o_res.delegation.stateid, + data->o_res.delegation.pagemod_limit); else nfs_inode_reclaim_delegation(state->inode, data->owner->so_cred, - data->o_res.delegation_type, - &data->o_res.delegation, - data->o_res.pagemod_limit); + data->o_res.delegation.type, + &data->o_res.delegation.stateid, + data->o_res.delegation.pagemod_limit); - if (data->o_res.do_recall) + if (data->o_res.delegation.do_recall) nfs_async_inode_return_delegation(state->inode, - &data->o_res.delegation); + &data->o_res.delegation.stateid); } /* @@ -2015,8 +2022,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (ret) return ERR_PTR(ret); - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); + nfs4_opendata_check_deleg(data, state); if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) @@ -2083,7 +2089,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (IS_ERR(state)) goto out; - if (data->o_res.delegation_type != 0) + if (data->o_res.delegation.type != 0) nfs4_opendata_check_deleg(data, state); if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) { @@ -3111,7 +3117,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, case NFS4_OPEN_CLAIM_DELEGATE_PREV: if (!opendata->rpc_done) break; - if (opendata->o_res.delegation_type != 0) + if (opendata->o_res.delegation.type != 0) dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1416099dfcd1..119061da5298 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5148,13 +5148,12 @@ static int decode_space_limit(struct xdr_stream *xdr, } static int decode_rw_delegation(struct xdr_stream *xdr, - uint32_t delegation_type, - struct nfs_openres *res) + struct nfs4_open_delegation *res) { __be32 *p; int status; - status = decode_delegation_stateid(xdr, &res->delegation); + status = decode_delegation_stateid(xdr, &res->stateid); if (unlikely(status)) return status; p = xdr_inline_decode(xdr, 4); @@ -5162,52 +5161,53 @@ static int decode_rw_delegation(struct xdr_stream *xdr, return -EIO; res->do_recall = be32_to_cpup(p); - switch (delegation_type) { + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: - res->delegation_type = FMODE_READ; + res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: - res->delegation_type = FMODE_WRITE|FMODE_READ; + res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; } return decode_ace(xdr, NULL); } -static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_no_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t why_no_delegation; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - why_no_delegation = be32_to_cpup(p); - switch (why_no_delegation) { + res->why_no_delegation = be32_to_cpup(p); + switch (res->why_no_delegation) { case WND4_CONTENTION: case WND4_RESOURCE: - xdr_inline_decode(xdr, 4); - /* Ignore for now */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + res->will_notify = be32_to_cpup(p); } return 0; } -static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_delegation(struct xdr_stream *xdr, + struct nfs4_open_delegation *res) { __be32 *p; - uint32_t delegation_type; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) return -EIO; - delegation_type = be32_to_cpup(p); - res->delegation_type = 0; - switch (delegation_type) { + res->open_delegation_type = be32_to_cpup(p); + switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_NONE: return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: - return decode_rw_delegation(xdr, delegation_type, res); + return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); } @@ -5248,7 +5248,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) for (; i < NFS4_BITMAP_SIZE; i++) res->attrset[i] = 0; - return decode_delegation(xdr, res); + return decode_delegation(xdr, &res->delegation); xdr_error: dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); return -EIO; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d09b9773b20c..682559e19d9d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -449,6 +449,22 @@ struct stateowner_id { __u32 uniquifier; }; +struct nfs4_open_delegation { + __u32 open_delegation_type; + union { + struct { + fmode_t type; + __u32 do_recall; + nfs4_stateid stateid; + unsigned long pagemod_limit; + }; + struct { + __u32 why_no_delegation; + __u32 will_notify; + }; + }; +}; + /* * Arguments to the open call. */ @@ -490,13 +506,10 @@ struct nfs_openres { struct nfs_fattr * f_attr; struct nfs_seqid * seqid; const struct nfs_server *server; - fmode_t delegation_type; - nfs4_stateid delegation; - unsigned long pagemod_limit; - __u32 do_recall; __u32 attrset[NFS4_BITMAP_SIZE]; struct nfs4_string *owner; struct nfs4_string *group_owner; + struct nfs4_open_delegation delegation; __u32 access_request; __u32 access_supported; __u32 access_result; -- cgit v1.2.3 From 6a68aed602d7b770552c7f890d0c30c53725c7b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:21 -0400 Subject: NFSv4: Add new attribute delegation definitions Add the attribute delegation XDR definitions from the spec. Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- include/linux/nfs4.h | 9 +++++++++ include/uapi/linux/nfs4.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f294085e887..90df37f3866a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3852,7 +3852,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_TIME_DELEG_MODIFY - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 0d896ce296ce..c074e0ac390f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -367,6 +367,8 @@ enum open_delegation_type4 { NFS4_OPEN_DELEGATE_READ = 1, NFS4_OPEN_DELEGATE_WRITE = 2, NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */ + NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, }; enum why_no_delegation4 { /* new to v4.1 */ @@ -507,6 +509,11 @@ enum { FATTR4_XATTR_SUPPORT = 82, }; +enum { + FATTR4_TIME_DELEG_ACCESS = 84, + FATTR4_TIME_DELEG_MODIFY = 85, +}; + /* * The following internal definitions enable processing the above * attribute bits within 32-bit word boundaries. @@ -586,6 +593,8 @@ enum { #define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) #define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) +#define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) +#define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 1d2043708bf1..afd7e32906c3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -69,6 +69,8 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 + #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 #define NFS4_CDFC4_BOTH 0x3 -- cgit v1.2.3 From 90f9ae74422d7e7447cb0ea21e1227142b58c52a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:22 -0400 Subject: NFSv4: Plumb in XDR support for the new delegation-only setattr op We want to send the updated atime and mtime as part of the delegreturn compound. Add a special structure to hold those variables. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 25 ++++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 10 ++++++++++ 3 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 90df37f3866a..af0758210162 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6575,6 +6575,7 @@ struct nfs4_delegreturndata { u32 roc_barrier; bool roc; } lr; + struct nfs4_delegattr sattr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; @@ -6599,6 +6600,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) &data->res.lr_ret) == -EAGAIN) goto out_restart; + if (data->args.sattr_args && task->tk_status != 0) { + switch(data->res.sattr_ret) { + case 0: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + /* Let the main handler below do stateid recovery */ + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, + data->inode)) + goto out_restart; + fallthrough; + default: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + goto out_restart; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 119061da5298..4c22b865b9c9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -224,6 +224,11 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_attrs_maxsz) #define decode_setattr_maxsz (op_decode_hdr_maxsz + \ nfs4_fattr_bitmap_maxsz) +#define encode_delegattr_maxsz (op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + nfs4_fattr_bitmap_maxsz + \ + 2*nfstime4_maxsz) +#define decode_delegattr_maxsz (decode_setattr_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) @@ -758,12 +763,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_layoutreturn_maxsz + \ + encode_delegattr_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz + \ + decode_delegattr_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -1735,6 +1742,33 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs server->attr_bitmask); } +static void encode_delegattr(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs4_delegattr *attr, + struct compound_hdr *hdr) +{ + uint32_t bitmap[3] = { 0 }; + uint32_t len = 0; + __be32 *p; + + encode_op_hdr(xdr, OP_SETATTR, encode_delegattr_maxsz, hdr); + encode_nfs4_stateid(xdr, stateid); + if (attr->atime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_ACCESS; + len += (nfstime4_maxsz << 2); + } + if (attr->mtime_set) { + bitmap[2] |= FATTR4_WORD2_TIME_DELEG_MODIFY; + len += (nfstime4_maxsz << 2); + } + xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + xdr_stream_encode_opaque_inline(xdr, (void **)&p, len); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + p = xdr_encode_nfstime4(p, &attr->atime); + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + p = xdr_encode_nfstime4(p, &attr->mtime); +} + static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -2812,6 +2846,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); + if (args->sattr_args) + encode_delegattr(xdr, args->stateid, args->sattr_args, &hdr); if (args->bitmask) encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); @@ -5163,9 +5199,11 @@ static int decode_rw_delegation(struct xdr_stream *xdr, switch (res->open_delegation_type) { case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: res->type = FMODE_READ; break; case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: res->type = FMODE_WRITE|FMODE_READ; if (decode_space_limit(xdr, &res->pagemod_limit) < 0) return -EIO; @@ -5207,6 +5245,8 @@ static int decode_delegation(struct xdr_stream *xdr, return 0; case NFS4_OPEN_DELEGATE_READ: case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: return decode_rw_delegation(xdr, res); case NFS4_OPEN_DELEGATE_NONE_EXT: return decode_no_delegation(xdr, res); @@ -5480,6 +5520,11 @@ static int decode_setattr(struct xdr_stream *xdr) return -EIO; } +static int decode_delegattr(struct xdr_stream *xdr) +{ + return decode_setattr(xdr); +} + static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res) { __be32 *p; @@ -7052,6 +7097,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } + if (res->sattr_res) { + status = decode_delegattr(xdr); + res->sattr_ret = status; + if (status) + goto out; + } if (res->fattr) { status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 682559e19d9d..f40be64ce942 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -622,6 +622,13 @@ struct nfs_release_lockowner_res { struct nfs4_sequence_res seq_res; }; +struct nfs4_delegattr { + struct timespec64 atime; + struct timespec64 mtime; + bool atime_set; + bool mtime_set; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; @@ -629,6 +636,7 @@ struct nfs4_delegreturnargs { const u32 *bitmask; u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; + struct nfs4_delegattr *sattr_args; }; struct nfs4_delegreturnres { @@ -637,6 +645,8 @@ struct nfs4_delegreturnres { struct nfs_server *server; struct nfs4_layoutreturn_res *lr_res; int lr_ret; + bool sattr_res; + int sattr_ret; }; /* -- cgit v1.2.3 From 4201916f2ab13577d45876f4bc784be55e4a83da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:24 -0400 Subject: NFSv4: Add a flags argument to the 'have_delegation' callback This argument will be used to allow the caller to specify whether or not they need to know that this is an attribute delegation. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 26 +++++++++++++------------- fs/nfs/delegation.h | 16 +++++++++++++--- fs/nfs/dir.c | 2 +- fs/nfs/file.c | 4 ++-- fs/nfs/inode.c | 7 +++---- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 14 +++++++------- fs/nfs/proc.c | 2 +- fs/nfs/write.c | 2 +- include/linux/nfs_xdr.h | 2 +- 10 files changed, 43 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6bace5fece04..6fdffd25cb2b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -82,11 +82,10 @@ static void nfs_mark_return_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } -static bool -nfs4_is_valid_delegation(const struct nfs_delegation *delegation, - fmode_t flags) +static bool nfs4_is_valid_delegation(const struct nfs_delegation *delegation, + fmode_t type) { - if (delegation != NULL && (delegation->type & flags) == flags && + if (delegation != NULL && (delegation->type & type) == type && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) return true; @@ -103,16 +102,16 @@ struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) return NULL; } -static int -nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) +static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, + int flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; - flags &= FMODE_READ|FMODE_WRITE; + type &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (nfs4_is_valid_delegation(delegation, flags)) { + if (nfs4_is_valid_delegation(delegation, type)) { if (mark) nfs_mark_delegation_referenced(delegation); ret = 1; @@ -124,22 +123,23 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) * nfs4_have_delegation - check if inode has a delegation, mark it * NFS_DELEGATION_REFERENCED if there is one. * @inode: inode to check - * @flags: delegation types to check for + * @type: delegation types to check for + * @flags: various modifiers * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags) { - return nfs4_do_check_delegation(inode, flags, true); + return nfs4_do_check_delegation(inode, type, flags, true); } /* * nfs4_check_delegation - check if inode has a delegation, do not mark * NFS_DELEGATION_REFERENCED if it has one. */ -int nfs4_check_delegation(struct inode *inode, fmode_t flags) +int nfs4_check_delegation(struct inode *inode, fmode_t type) { - return nfs4_do_check_delegation(inode, flags, false); + return nfs4_do_check_delegation(inode, type, 0, false); } static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index a6f495d012cf..257b3d726043 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -75,8 +75,8 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs4_have_delegation(struct inode *inode, fmode_t flags); -int nfs4_check_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags); +int nfs4_check_delegation(struct inode *inode, fmode_t type); bool nfs4_delegation_flush_on_close(const struct inode *inode); void nfs_inode_find_delegation_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); @@ -84,9 +84,19 @@ int nfs4_inode_make_writeable(struct inode *inode); #endif +static inline int nfs_have_read_or_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); +} + +static inline int nfs_have_write_delegation(struct inode *inode) +{ + return NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE, 0); +} + static inline int nfs_have_delegated_attributes(struct inode *inode) { - return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0); } #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 07a7be27182e..4cb97ef41350 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1437,7 +1437,7 @@ static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) if (!dir || !nfs_verify_change_attribute(dir, verf)) return; - if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ, 0)) nfs_set_verifier_delegated(&verf); dentry->d_time = verf; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7f1295475a90..834e612262e6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -732,7 +732,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->c.flc_type = saved_type; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) goto out_noconflict; if (is_local) @@ -815,7 +815,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { + if (!nfs_have_read_or_write_delegation(inode)) { nfs_zap_caches(inode); if (mapping_mapped(filp->f_mapping)) nfs_revalidate_mapping(inode, filp->f_mapping); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d81c5bcc64e3..f1bfe453aa84 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -190,9 +190,8 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | @@ -1013,7 +1012,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -1483,7 +1482,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 74bda639a7cf..cab6c73d25d6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -979,7 +979,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return status; } -static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af0758210162..4455ee510c2f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -293,7 +293,7 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, unsigned long cache_validity; memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); - if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) + if (!inode || !nfs_have_read_or_write_delegation(inode)) return; cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags; @@ -1264,7 +1264,7 @@ nfs4_update_changeattr_locked(struct inode *inode, if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (!nfs_have_delegated_attributes(inode)) cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | @@ -3700,7 +3700,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, inode, 0); @@ -4638,7 +4638,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry }; int status = 0; - if (!nfs4_have_delegation(inode, FMODE_READ)) { + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -5607,7 +5607,7 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0; } void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], @@ -7654,10 +7654,10 @@ static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease, int ret; /* No delegation, no lease */ - if (!nfs4_have_delegation(inode, type)) + if (!nfs4_have_delegation(inode, type, 0)) return -EAGAIN; ret = generic_setlease(file, arg, lease, priv); - if (ret || nfs4_have_delegation(inode, type)) + if (ret || nfs4_have_delegation(inode, type, 0)) return ret; /* We raced with a delegation return */ nfs4_delete_lease(file, priv); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d105e5b2659d..995cc42b0fa0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -687,7 +687,7 @@ out_einval: return -EINVAL; } -static int nfs_have_delegation(struct inode *inode, fmode_t flags) +static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2329cbb0e446..be19ac3110d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1320,7 +1320,7 @@ static int nfs_can_extend_write(struct file *file, struct folio *folio, return 0; if (!nfs_folio_write_uptodate(folio, pagelen)) return 0; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (nfs_have_write_delegation(inode)) return 1; if (!flctx || (list_empty_careful(&flctx->flc_flock) && list_empty_careful(&flctx->flc_posix))) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f40be64ce942..51611583af51 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1830,7 +1830,7 @@ struct nfs_rpc_ops { int open_flags, struct iattr *iattr, int *); - int (*have_delegation)(struct inode *, fmode_t); + int (*have_delegation)(struct inode *, fmode_t, int); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From 86e1c54d152e2799671e149d6e4d1c1a4a2be857 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:26 -0400 Subject: NFSv4: Add recovery of attribute delegations After a reboot of the NFSv4.2 server, the recovery code needs to specify whether the delegation to be recovered is an attribute delegation or not. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 +++++++++++++++--- fs/nfs/nfs4xdr.c | 18 ++++++++---------- include/linux/nfs_xdr.h | 2 +- 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f4215dcf3614..34182a3c38a7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2225,7 +2225,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - fmode_t delegation_type = 0; + u32 delegation_type = NFS4_OPEN_DELEGATE_NONE; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state, @@ -2234,8 +2234,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state return PTR_ERR(opendata); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) - delegation_type = delegation->type; + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) { + switch(delegation->type) { + case FMODE_READ: + delegation_type = NFS4_OPEN_DELEGATE_READ; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG; + break; + case FMODE_WRITE: + case FMODE_READ|FMODE_WRITE: + delegation_type = NFS4_OPEN_DELEGATE_WRITE; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG; + } + } rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c22b865b9c9..e160a275ad4a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1475,20 +1475,18 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a } } -static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) +static inline void encode_delegation_type(struct xdr_stream *xdr, u32 delegation_type) { __be32 *p; p = reserve_space(xdr, 4); switch (delegation_type) { - case 0: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE); - break; - case FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ); - break; - case FMODE_WRITE|FMODE_READ: - *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE); + case NFS4_OPEN_DELEGATE_NONE: + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + *p = cpu_to_be32(delegation_type); break; default: BUG(); @@ -1504,7 +1502,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * encode_string(xdr, name->len, name->name); } -static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) +static inline void encode_claim_previous(struct xdr_stream *xdr, u32 type) { __be32 *p; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51611583af51..d8cfa956d24c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -484,7 +484,7 @@ struct nfs_openargs { nfs4_verifier verifier; /* EXCLUSIVE */ }; nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ - fmode_t delegation_type; /* CLAIM_PREVIOUS */ + __u32 delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ -- cgit v1.2.3 From dcb3c20f741993efbd95be78aab93fac29516323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:27 -0400 Subject: NFSv4: Add a capability for delegated attributes Cache whether or not the server may have support for delegated attributes in a capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 34182a3c38a7..03835c8a180f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3930,6 +3930,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f #endif if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) server->caps |= NFS_CAP_FS_LOCATIONS; + if (res.attr_bitmask[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) + server->caps |= NFS_CAP_DELEGTIME; if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 92de074e63b9..5a76a87cd924 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) -- cgit v1.2.3 From 707f13b3d081ea811c40014e7b61f2c487ee9a4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:32 -0400 Subject: NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute Query the server for the OPEN arguments that it supports so that we can figure out which extensions we can use. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++-- fs/nfs/nfs4xdr.c | 24 ++++++++++++++++++++++++ include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 9 +++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cbd340cd825e..ae91492e9521 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3885,11 +3885,14 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_TIME_DELEG_MODIFY - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { - u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion; + u32 minorversion = server->nfs_client->cl_minorversion; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_SUPPORTED_ATTRS, + }; struct nfs4_server_caps_arg args = { .fhandle = fhandle, .bitmask = bitmask, @@ -3915,6 +3918,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING) & + res.attr_bitmask[0]; /* Sanity check the server answers */ switch (minorversion) { case 0: @@ -3923,9 +3934,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f break; case 1: res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT & + res.attr_bitmask[2]; break; case 2: res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS) & + res.attr_bitmask[2]; } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e160a275ad4a..98aab2c324c9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4337,6 +4337,28 @@ static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +static int decode_attr_open_arguments(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_open_caps *res) +{ + memset(res, 0, sizeof(*res)); + if (unlikely(bitmap[2] & (FATTR4_WORD2_OPEN_ARGUMENTS - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_OPEN_ARGUMENTS)) { + if (decode_bitmap4(xdr, res->oa_share_access, ARRAY_SIZE(res->oa_share_access)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_deny, ARRAY_SIZE(res->oa_share_deny)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_share_access_want, ARRAY_SIZE(res->oa_share_access_want)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_open_claim, ARRAY_SIZE(res->oa_open_claim)) < 0) + return -EIO; + if (decode_bitmap4(xdr, res->oa_createmode, ARRAY_SIZE(res->oa_createmode)) < 0) + return -EIO; + bitmap[2] &= ~FATTR4_WORD2_OPEN_ARGUMENTS; + } + return 0; +} + static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); @@ -4511,6 +4533,8 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_open_arguments(xdr, bitmap, &res->open_caps)) != 0) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c074e0ac390f..f9df88091c6d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -512,6 +512,7 @@ enum { enum { FATTR4_TIME_DELEG_ACCESS = 84, FATTR4_TIME_DELEG_MODIFY = 85, + FATTR4_OPEN_ARGUMENTS = 86, }; /* @@ -595,6 +596,7 @@ enum { #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) #define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) #define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) +#define FATTR4_WORD2_OPEN_ARGUMENTS BIT(FATTR4_OPEN_ARGUMENTS - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d8cfa956d24c..af510a7ec46a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1213,6 +1213,14 @@ struct nfs4_statfs_res { struct nfs_fsstat *fsstat; }; +struct nfs4_open_caps { + u32 oa_share_access[1]; + u32 oa_share_deny[1]; + u32 oa_share_access_want[1]; + u32 oa_open_claim[1]; + u32 oa_createmode[1]; +}; + struct nfs4_server_caps_arg { struct nfs4_sequence_args seq_args; struct nfs_fh *fhandle; @@ -1229,6 +1237,7 @@ struct nfs4_server_caps_res { u32 fh_expire_type; u32 case_insensitive; u32 case_preserving; + struct nfs4_open_caps open_caps; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 -- cgit v1.2.3 From d2a00cceb93a4df2afaceb836297628d0aeec7ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:33 -0400 Subject: NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION If the server supports the NFSv4.2 protocol extension to optimise away returning a stateid when it returns a delegation, then we cache that information in another capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++++ include/linux/nfs_fs_sb.h | 1 + include/uapi/linux/nfs4.h | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ae91492e9521..adf4fc8610f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3990,6 +3990,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (res.open_caps.oa_share_access_want[0] & + NFS4_SHARE_WANT_OPEN_XOR_DELEGATION) + server->caps |= NFS_CAP_OPEN_XOR; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5a76a87cd924..fe5b1a8bd723 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index afd7e32906c3..caf4db2fcbb9 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -46,6 +46,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 +#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F @@ -70,6 +71,7 @@ #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 +#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000 #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 -- cgit v1.2.3 From adb4b42d19aea91826621a8d0bac94cf2c08f8bc Mon Sep 17 00:00:00 2001 From: Lance Shelton Date: Sun, 16 Jun 2024 21:21:36 -0400 Subject: Return the delegation when deleting sillyrenamed files Add a callback to return the delegation in order to allow generic NFS code to return the delegation when appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 8 ++++++++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 8 ++++++++ fs/nfs/unlink.c | 2 ++ include/linux/nfs_xdr.h | 1 + 5 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cab6c73d25d6..1566163c6d85 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -984,6 +984,13 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t type, int flags) return 0; } +static int nfs3_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs3_dir_inode_operations = { .create = nfs_create, .atomic_open = nfs_atomic_open_v23, @@ -1062,6 +1069,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b1376571f6ef..9376b5031acf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10871,6 +10871,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 995cc42b0fa0..6c09cd090c34 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -692,6 +692,13 @@ static int nfs_have_delegation(struct inode *inode, fmode_t type, int flags) return 0; } +static int nfs_return_delegation(struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + nfs_wb_all(inode); + return 0; +} + static const struct inode_operations nfs_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -757,6 +764,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 0110299643a2..bf77399696a7 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -232,6 +232,8 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); + NFS_PROTO(inode)->return_delegation(inode); + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, inode, data)) nfs_free_unlinkdata(data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index af510a7ec46a..01efacae4634 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1840,6 +1840,7 @@ struct nfs_rpc_ops { struct iattr *iattr, int *); int (*have_delegation)(struct inode *, fmode_t, int); + int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From 924cf3c91fe29c7ebd1b9d56e10f513c1bd7d4bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:46 -0400 Subject: NFSv4.1: constify the stateid argument in nfs41_test_stateid() Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4proc.c | 24 ++++++++++++------------ fs/nfs/nfs4xdr.c | 2 +- include/linux/nfs_xdr.h | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7024230f0d1d..c2045a2a9d0f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -67,7 +67,8 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - nfs4_stateid *, const struct cred *); + const nfs4_stateid *, + const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); void (*session_trunk)(struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26758acba3a6..db585a6a8f0d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, const struct cred *cred, struct nfs4_slot *slot, bool is_privileged); -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - const struct cred *); +static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, - const struct cred *, bool); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -2875,16 +2875,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; @@ -10386,12 +10386,12 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = *stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -10447,8 +10447,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, * failed or the state ID is not currently valid. */ static int nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 98aab2c324c9..4bf7d5c09282 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2137,7 +2137,7 @@ static void encode_test_stateid(struct xdr_stream *xdr, { encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); encode_uint32(xdr, 1); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); } static void encode_free_stateid(struct xdr_stream *xdr, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 01efacae4634..45623af3e7b8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1438,7 +1438,7 @@ struct nfs41_secinfo_no_name_args { struct nfs41_test_stateid_args { struct nfs4_sequence_args seq_args; - nfs4_stateid *stateid; + nfs4_stateid stateid; }; struct nfs41_test_stateid_res { -- cgit v1.2.3 From 5468fc8298a97ca504aca8ac0c7b9e6fd7c1b588 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:55 -0400 Subject: NFSv4/pNFS: Do layout state recovery upon reboot Some pNFS implementations, such as flexible files, want the client to send the layout stats and layout errors that may have incurred while the metadata server was booting. To do so, the client sends a layoutreturn with an all-zero stateid while the server is in grace during reboot recovery. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- fs/nfs/nfs4state.c | 4 +- fs/nfs/pnfs.c | 106 ++++++++++++++++++++++++++++++--- fs/nfs/pnfs.h | 6 ++ include/linux/nfs_fs_sb.h | 1 + 5 files changed, 110 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 24188af56d5b..39ba9f4208aa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2548,7 +2548,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *dummy) { #if IS_ENABLED(CONFIG_NFS_V4_2) - server->caps |= NFS_CAP_LAYOUTSTATS; + server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN; #endif return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5b452411e8fd..877f682b45f2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1863,6 +1863,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) if (!nfs4_state_clear_reclaim_reboot(clp)) return; + pnfs_destroy_all_layouts(clp); ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); err = nfs4_reclaim_complete(clp, ops, cred); @@ -2068,7 +2069,6 @@ static int nfs4_establish_lease(struct nfs_client *clp) put_cred(cred); if (status != 0) return status; - pnfs_destroy_all_layouts(clp); return 0; } @@ -2680,6 +2680,8 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); + if (status == 0) + status = pnfs_layout_handle_reboot(clp); if (status == -EAGAIN) continue; if (status < 0) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 31df5fae7acb..aa698481bec8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, u32 seq); static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list); +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -937,25 +938,37 @@ restart: return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); } -int pnfs_layout_destroy_byclid(struct nfs_client *clp, - enum pnfs_layout_destroy_mode mode) +static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp, + struct list_head *list) { struct nfs_server *server; - LIST_HEAD(layout_list); spin_lock(&clp->cl_lock); rcu_read_lock(); restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) goto restart; } rcu_read_unlock(); spin_unlock(&clp->cl_lock); +} - return pnfs_layout_free_bulk_destroy_list(&layout_list, mode); +static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp, + struct list_head *list, + enum pnfs_layout_destroy_mode mode) +{ + pnfs_layout_build_destroy_list_byclient(clp, list); + return pnfs_layout_free_bulk_destroy_list(list, mode); +} + +int pnfs_layout_destroy_byclid(struct nfs_client *clp, + enum pnfs_layout_destroy_mode mode) +{ + LIST_HEAD(layout_list); + + return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode); } /* @@ -971,6 +984,67 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE); } +static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp, + struct list_head *list) +{ + struct nfs_server *server; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN)) + continue; + if (pnfs_layout_bulk_destroy_byserver_locked(clp, server, + list) != 0) + goto restart; + } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); +} + +static int pnfs_layout_bulk_list_reboot(struct list_head *list) +{ + struct pnfs_layout_hdr *lo; + struct nfs_server *server; + int ret; + + list_for_each_entry(lo, list, plh_bulk_destroy) { + server = NFS_SERVER(lo->plh_inode); + ret = pnfs_layout_return_on_reboot(lo); + switch (ret) { + case 0: + continue; + case -NFS4ERR_BAD_STATEID: + server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN; + break; + case -NFS4ERR_NO_GRACE: + break; + default: + goto err; + } + break; + } + return 0; +err: + return ret; +} + +int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + LIST_HEAD(list); + int ret = 0, ret2; + + pnfs_layout_build_recover_list_byclient(clp, &list); + if (!list_empty(&list)) + ret = pnfs_layout_bulk_list_reboot(&list); + ret2 = pnfs_layout_do_destroy_byclid(clp, &list, + PNFS_LAYOUT_INVALIDATE); + if (!ret) + ret = ret2; + return (ret == 0) ? 0 : -EAGAIN; +} + static void pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred) { @@ -1445,6 +1519,24 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } +static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + const struct cred *cred; + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + return 0; + } + cred = get_cred(lo->plh_lc_cred); + pnfs_get_layout_hdr(lo); + spin_unlock(&inode->i_lock); + + return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY, + PNFS_FL_LAYOUTRETURN_PRIVILEGED); +} + bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d192feb346b4..bb5142b4e67a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -356,6 +356,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); void pnfs_layout_return_unused_byclid(struct nfs_client *clp, enum pnfs_iomode iomode); +int pnfs_layout_handle_reboot(struct nfs_client *clp); /* nfs4_deviceid_flags */ enum { @@ -737,6 +738,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { } +static inline int pnfs_layout_handle_reboot(struct nfs_client *clp) +{ + return 0; +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fe5b1a8bd723..ba9df1848b35 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From 2f1f31042ef07719a0d5cb4784b8a32d20c13110 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jul 2024 12:50:48 +0200 Subject: nfs: Block on write congestion Commit 6df25e58532b ("nfs: remove reliance on bdi congestion") introduced NFS-private solution for limiting number of writes outstanding against a particular server. Unlike previous bdi congestion this algorithm actually works and limits number of outstanding writeback pages to nfs_congestion_kb which scales with amount of client's memory and is capped at 256 MB. As a result some workloads such as random buffered writes over NFS got slower (from ~170 MB/s to ~126 MB/s). The fio command to reproduce is: fio --direct=0 --ioengine=sync --thread --invalidate=1 --group_reporting=1 --runtime=300 --fallocate=posix --ramp_time=10 --new_group --rw=randwrite --size=64256m --numjobs=4 --bs=4k --fsync_on_close=1 --end_fsync=1 This happens because the client sends ~256 MB worth of dirty pages to the server and any further background writeback request is ignored until the number of writeback pages gets below the threshold of 192 MB. By the time this happens and clients decides to trigger another round of writeback, the server often has no pages to write and the disk is idle. To fix this problem and make the client react faster to eased congestion of the server by blocking waiting for congestion to resolve instead of aborting writeback. This improves the random 4k buffered write throughput to 184 MB/s. Reviewed-by: Sagi Grimberg Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + fs/nfs/write.c | 15 +++++++++++---- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3b252dceebf5..8286edd6062d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -994,6 +994,7 @@ struct nfs_server *nfs_alloc_server(void) server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; + init_waitqueue_head(&server->write_congestion_wait); atomic_long_set(&server->writeback, 0); ida_init(&server->openowner_id); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4321cdc581bb..81845ab2e00a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -425,8 +425,10 @@ static void nfs_folio_end_writeback(struct folio *folio) folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < - NFS_CONGESTION_OFF_THRESH) + NFS_CONGESTION_OFF_THRESH) { nfss->write_congested = 0; + wake_up_all(&nfss->write_congestion_wait); + } } static void nfs_page_end_writeback(struct nfs_page *req) @@ -700,12 +702,17 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc = NULL; unsigned int mntflags = NFS_SERVER(inode)->flags; + struct nfs_server *nfss = NFS_SERVER(inode); int priority = 0; int err; - if (wbc->sync_mode == WB_SYNC_NONE && - NFS_SERVER(inode)->write_congested) - return 0; + /* Wait with writeback until write congestion eases */ + if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { + err = wait_event_killable(nfss->write_congestion_wait, + nfss->write_congested == 0); + if (err) + return err; + } nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ba9df1848b35..1df86ab98c77 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -140,6 +140,7 @@ struct nfs_server { struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ + wait_queue_head_t write_congestion_wait; /* wait until write congestion eases */ atomic_long_t writeback; /* number of writeback pages */ unsigned int write_congested;/* flag set when writeback gets too high */ unsigned int flags; /* various flags */ -- cgit v1.2.3 From 7e8e78a0ba00c88f0ded86de64bdddc82e06b196 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:48 +0200 Subject: nfs: remove dead code for the old swap over NFS implementation Remove the code testing folio_test_swapcache either explicitly or implicitly in pagemap.h headers, as is now handled using the direct I/O path and not the buffered I/O path that these helpers are located in. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 6 +- fs/nfs/filelayout/filelayout.c | 1 - fs/nfs/fscache.c | 2 +- fs/nfs/internal.h | 8 +-- fs/nfs/pagelist.c | 2 +- fs/nfs/pnfs.h | 22 ------- fs/nfs/pnfs_nfs.c | 47 --------------- fs/nfs/read.c | 2 +- fs/nfs/write.c | 128 ++++++++--------------------------------- include/linux/nfs_page.h | 4 +- 10 files changed, 36 insertions(+), 186 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 834e612262e6..0e2f87120cb8 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -427,7 +427,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); @@ -454,7 +454,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || current_is_kswapd()) return false; - if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0) + if (nfs_wb_folio(folio->mapping->host, folio) < 0) return false; } return nfs_fscache_release_folio(folio, gfp); @@ -606,7 +606,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); folio_lock(folio); - mapping = folio_file_mapping(folio); + mapping = folio->mapping; if (mapping != inode->i_mapping) goto out_unlock; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 29d84dc66ca3..b6e9aeaf4ce2 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1110,7 +1110,6 @@ static const struct pnfs_commit_ops filelayout_commit_ops = { .clear_request_commit = pnfs_generic_clear_request_commit, .scan_commit_lists = pnfs_generic_scan_commit_lists, .recover_commit_reqs = pnfs_generic_recover_commit_reqs, - .search_commit_reqs = pnfs_generic_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, }; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ddc1ee031955..7202ce84d0eb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -341,7 +341,7 @@ void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) int nfs_netfs_folio_unlock(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; /* * If fscache is enabled, netfs will unlock pages. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9f0f4534744b..87ebc4608c31 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -785,7 +785,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, struct nfs_commit_info *cinfo) { if (folio && !cinfo->dreq) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the @@ -803,7 +803,7 @@ static inline void nfs_folio_mark_unstable(struct folio *folio, static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page_file_mapping(page)->host); + loff_t i_size = i_size_read(page->mapping->host); if (i_size > 0) { pgoff_t index = page_index(page); @@ -821,10 +821,10 @@ unsigned int nfs_page_length(struct page *page) */ static inline size_t nfs_folio_length(struct folio *folio) { - loff_t i_size = i_size_read(folio_file_mapping(folio)->host); + loff_t i_size = i_size_read(folio->mapping->host); if (i_size > 0) { - pgoff_t index = folio_index(folio) >> folio_order(folio); + pgoff_t index = folio->index >> folio_order(folio); pgoff_t end_index = (i_size - 1) >> folio_shift(folio); if (index < end_index) return folio_size(folio); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 040b6b79c75e..3b006bcbcc87 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -569,7 +569,7 @@ struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count); + ret = nfs_page_create(l_ctx, offset, folio->index, offset, count); if (!IS_ERR(ret)) { nfs_page_assign_folio(ret, folio); nfs_page_group_init(ret, NULL); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bb5142b4e67a..1fc40afcbf1f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -199,8 +199,6 @@ struct pnfs_commit_ops { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); - struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct folio *folio); }; struct pnfs_layout_hdr { @@ -409,8 +407,6 @@ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); void pnfs_generic_rw_release(void *data); void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo); -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio); int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, @@ -570,17 +566,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) fl_cinfo->ops->recover_commit_reqs(head, cinfo); } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - - if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) - return NULL; - return fl_cinfo->ops->search_commit_reqs(cinfo, folio); -} - /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -882,13 +867,6 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) { } -static inline struct nfs_page * -pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct folio *folio) -{ - return NULL; -} - static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 88e061bd711b..a74ee69a2fa6 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -351,53 +351,6 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); -static struct nfs_page * -pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, - unsigned int nbuckets, struct folio *folio) -{ - struct nfs_page *req; - struct pnfs_commit_bucket *b; - unsigned int i; - - /* Linearly search the commit lists for each bucket until a matching - * request is found */ - for (i = 0, b = buckets; i < nbuckets; i++, b++) { - list_for_each_entry(req, &b->written, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - list_for_each_entry(req, &b->committing, wb_list) { - if (nfs_page_to_folio(req) == folio) - return req->wb_head; - } - } - return NULL; -} - -/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request - * for @folio - * @cinfo - commit info for current inode - * @folio - page to search for matching head request - * - * Return: the head request if one is found, otherwise %NULL. - */ -struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct folio *folio) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_array *array; - struct nfs_page *req; - - list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { - req = pnfs_bucket_search_commit_reqs(array->buckets, - array->nbuckets, folio); - if (req) - return req; - } - return NULL; -} -EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs); - static struct pnfs_layout_segment * pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 1b0e06c11983..036ede4875ca 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -289,7 +289,7 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_server *server = NFS_SERVER(inode); size_t fsize = folio_size(folio); unsigned int rsize = server->rsize; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 81845ab2e00a..dc432f581fa7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -63,9 +63,6 @@ static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -178,16 +175,16 @@ static struct nfs_page *nfs_folio_private_request(struct folio *folio) } /** - * nfs_folio_find_private_request - find head request associated with a folio + * nfs_folio_find_head_request - find head request associated with a folio * @folio: pointer to folio * * must be called while holding the inode lock. * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) +static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_page *req; if (!folio_test_private(folio)) @@ -202,45 +199,9 @@ static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) return req; } -static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req = NULL; - if (!folio_test_swapcache(folio)) - return NULL; - mutex_lock(&nfsi->commit_mutex); - if (folio_test_swapcache(folio)) { - req = nfs_page_search_commits_for_head_request_locked(nfsi, - folio); - if (req) { - WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); - } - } - mutex_unlock(&nfsi->commit_mutex); - return req; -} - -/** - * nfs_folio_find_head_request - find head request associated with a folio - * @folio: pointer to folio - * - * returns matching head request with reference held, or NULL if not found. - */ -static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) -{ - struct nfs_page *req; - - req = nfs_folio_find_private_request(folio); - if (!req) - req = nfs_folio_find_swap_request(folio); - return req; -} - static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_page *req, *head; int ret; @@ -261,8 +222,6 @@ static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) /* Ensure that nobody removed the request before we locked it */ if (head == nfs_folio_private_request(folio)) break; - if (folio_test_swapcache(folio)) - break; nfs_unlock_and_release_request(head); } return head; @@ -272,14 +231,14 @@ static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); - if (i_size > 0 && folio_index(folio) < end_index) + if (i_size > 0 && folio->index < end_index) goto out; end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) @@ -311,7 +270,7 @@ static void nfs_set_pageerror(struct address_space *mapping) static void nfs_mapping_set_error(struct folio *folio, int error) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; folio_set_error(folio); filemap_set_wb_err(mapping, error); @@ -412,7 +371,7 @@ int nfs_congestion_kb; static void nfs_folio_set_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_start_writeback(folio); if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) @@ -421,7 +380,7 @@ static void nfs_folio_set_writeback(struct folio *folio) static void nfs_folio_end_writeback(struct folio *folio) { - struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); + struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); folio_end_writeback(folio); if (atomic_long_dec_return(&nfss->writeback) < @@ -569,7 +528,7 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, */ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_page *head; struct nfs_commit_info cinfo; int ret; @@ -645,7 +604,7 @@ static int nfs_page_async_flush(struct folio *folio, nfs_redirty_request(req); pgio->pg_error = 0; } else - nfs_add_stats(folio_file_mapping(folio)->host, + nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1); out: return ret; @@ -657,7 +616,7 @@ out_launder: static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - nfs_pageio_cond_complete(pgio, folio_index(folio)); + nfs_pageio_cond_complete(pgio, folio->index); return nfs_page_async_flush(folio, wbc, pgio); } @@ -668,7 +627,7 @@ static int nfs_writepage_locked(struct folio *folio, struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); @@ -751,24 +710,17 @@ out_err: static void nfs_inode_add_request(struct nfs_page *req) { struct folio *folio = nfs_page_to_folio(req); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct nfs_inode *nfsi = NFS_I(mapping->host); WARN_ON_ONCE(req->wb_this_page != req); /* Lock the request! */ nfs_lock_request(req); - - /* - * Swap-space should not get truncated. Hence no need to plug the race - * with invalidate/truncate. - */ spin_lock(&mapping->i_private_lock); - if (likely(!folio_test_swapcache(folio))) { - set_bit(PG_MAPPED, &req->wb_flags); - folio_set_private(folio); - folio->private = req; - } + set_bit(PG_MAPPED, &req->wb_flags); + folio_set_private(folio); + folio->private = req; spin_unlock(&mapping->i_private_lock); atomic_long_inc(&nfsi->nrequests); /* this a head request for a page group - mark it as having an @@ -788,10 +740,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; spin_lock(&mapping->i_private_lock); - if (likely(folio && !folio_test_swapcache(folio))) { + if (likely(folio)) { folio->private = NULL; folio_clear_private(folio); clear_bit(PG_MAPPED, &req->wb_head->wb_flags); @@ -812,38 +764,6 @@ static void nfs_mark_request_dirty(struct nfs_page *req) filemap_dirty_folio(folio_mapping(folio), folio); } -/* - * nfs_page_search_commits_for_head_request_locked - * - * Search through commit lists on @inode for the head request for @folio. - * Must be called while holding the inode (which is cinfo) lock. - * - * Returns the head request if found, or NULL if not found. - */ -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct folio *folio) -{ - struct nfs_page *freq, *t; - struct nfs_commit_info cinfo; - struct inode *inode = &nfsi->vfs_inode; - - nfs_init_cinfo_from_inode(&cinfo, inode); - - /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, folio); - if (freq) - return freq->wb_head; - - /* Linearly search the commit list for the correct request */ - list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (nfs_page_to_folio(freq) == folio) - return freq->wb_head; - } - - return NULL; -} - /** * nfs_request_add_commit_list_locked - add request to a commit list * @req: pointer to a struct nfs_page @@ -950,7 +870,7 @@ static void nfs_folio_clear_commit(struct folio *folio) long nr = folio_nr_pages(folio); node_stat_mod_folio(folio, NR_WRITEBACK, -nr); - wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb, + wb_stat_mod(&inode_to_bdi(folio->mapping->host)->wb, WB_WRITEBACK, -nr); } } @@ -1135,7 +1055,7 @@ out_flushme: */ nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); - error = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + error = nfs_wb_folio(folio->mapping->host, folio); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1211,7 +1131,7 @@ int nfs_flush_incompatible(struct file *file, struct folio *folio) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_folio(folio_file_mapping(folio)->host, folio); + status = nfs_wb_folio(folio->mapping->host, folio); } while (status == 0); return status; } @@ -1285,7 +1205,7 @@ out: */ static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) @@ -1363,7 +1283,7 @@ int nfs_update_folio(struct file *file, struct folio *folio, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; unsigned int pagelen = nfs_folio_length(folio); int status = 0; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1c315f854ea8..7bc31df457ea 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -208,8 +208,8 @@ static inline struct inode *nfs_page_to_inode(const struct nfs_page *req) struct folio *folio = nfs_page_to_folio(req); if (folio == NULL) - return page_file_mapping(req->wb_page)->host; - return folio_file_mapping(folio)->host; + return req->wb_page->mapping->host; + return folio->mapping->host; } /** -- cgit v1.2.3 From 9eb7c484db1ae993648fc9b9d48a295f4d99afb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:50 +0200 Subject: nfs: simplify nfs_folio_find_and_lock_request nfs_folio_find_and_lock_request and the nfs_page_group_lock_head helper called by it spend quite some effort to deal with head vs subrequests. But given that only the head request can be stashed in the folio private data, non of that is required. Fold the locking logic from nfs_page_group_lock_head into nfs_folio_find_and_lock_request and simplify the result based on the invariant that we always find the head request in the folio private data. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 19 ------------------- fs/nfs/write.c | 38 +++++++++++++++++++++----------------- include/linux/nfs_page.h | 1 - 3 files changed, 21 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 3b006bcbcc87..e48cc69a2361 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -187,25 +187,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); -/* - * nfs_page_lock_head_request - page lock the head of the page group - * @req: any member of the page group - */ -struct nfs_page * -nfs_page_group_lock_head(struct nfs_page *req) -{ - struct nfs_page *head = req->wb_head; - - while (!nfs_lock_request(head)) { - int ret = nfs_wait_on_request(head); - if (ret < 0) - return ERR_PTR(ret); - } - if (head != req) - kref_get(&head->wb_kref); - return head; -} - /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a56bb49af55a..69336bca26f5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -197,28 +197,32 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) { struct inode *inode = folio->mapping->host; - struct nfs_page *req, *head; + struct nfs_page *head; int ret; - for (;;) { - req = nfs_folio_find_head_request(folio); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; + + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == folio->private) - break; + } + + /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private) { nfs_unlock_and_release_request(head); + goto retry; } + + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) { + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); + } + return head; } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7bc31df457ea..e799d93626f1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, -- cgit v1.2.3 From 25edbcac6e32eab345e470d56ca9974a577b878b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:52 +0200 Subject: nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests Fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests to prepare for future changes to this code, and move the helpers to write.c as well. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 77 ------------------------------------------------ fs/nfs/write.c | 67 +++++++++++++++++++++++++++++++++++++++-- include/linux/nfs_page.h | 1 - 3 files changed, 64 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e48cc69a2361..fa7971072900 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -187,83 +187,6 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); -/* - * nfs_unroll_locks - unlock all newly locked reqs and wait on @req - * @head: head request of page group, must be holding head lock - * @req: request that couldn't lock and needs to wait on the req bit lock - * - * This is a helper function for nfs_lock_and_join_requests - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - -/* - * nfs_page_group_lock_subreq - try to lock a subrequest - * @head: head request of page group - * @subreq: request to lock - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. - */ -static int -nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) -{ - int ret; - - if (!kref_get_unless_zero(&subreq->wb_kref)) - return 0; - while (!nfs_lock_request(subreq)) { - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(head, subreq); - nfs_release_request(subreq); - return ret; - } - } - return 0; -} - -/* - * nfs_page_group_lock_subrequests - try to lock the subrequests - * @head: head request of page group - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request locked. - */ -int nfs_page_group_lock_subrequests(struct nfs_page *head) -{ - struct nfs_page *subreq; - int ret; - - ret = nfs_page_group_lock(head); - if (ret < 0) - return ret; - /* lock each request in the page group */ - for (subreq = head->wb_this_page; subreq != head; - subreq = subreq->wb_this_page) { - ret = nfs_page_group_lock_subreq(head, subreq); - if (ret < 0) - return ret; - } - nfs_page_group_unlock(head); - return 0; -} - /* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 93833f1dcbad..0fe9d7bf34db 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -478,6 +478,57 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + /* * nfs_lock_and_join_requests - join all subreqs to the head req * @folio: the folio used to lookup the "page group" of nfs_page structures @@ -496,7 +547,7 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio->mapping->host; - struct nfs_page *head; + struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; int ret; @@ -526,11 +577,21 @@ retry: if (ret < 0) goto out_unlock; - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); + ret = nfs_page_group_lock(head); if (ret < 0) goto out_unlock; + /* lock each request in the page group */ + for (subreq = head->wb_this_page; + subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + goto out_unlock; + } + + nfs_page_group_unlock(head); + nfs_init_cinfo_from_inode(&cinfo, inode); nfs_join_page_group(head, &cinfo, inode); return head; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index e799d93626f1..63eed97a18ad 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, struct inode *inode); -- cgit v1.2.3 From f1b7c7552cbcf89e56b15ff481f3d19b53046291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:53 +0200 Subject: nfs: move nfs_wait_on_request to write.c nfs_wait_on_request is now only used in write.c. Move it there and mark it static. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 19 ------------------- fs/nfs/write.c | 17 +++++++++++++++++ include/linux/nfs_page.h | 1 - 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fa7971072900..04124f226665 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -598,25 +598,6 @@ void nfs_release_request(struct nfs_page *req) } EXPORT_SYMBOL_GPL(nfs_release_request); -/** - * nfs_wait_on_request - Wait for a request to complete. - * @req: request to wait upon. - * - * Interruptible by fatal signals only. - * The user is responsible for holding a count on the request. - */ -int -nfs_wait_on_request(struct nfs_page *req) -{ - if (!test_bit(PG_BUSY, &req->wb_flags)) - return 0; - set_bit(PG_CONTENDED2, &req->wb_flags); - smp_mb__after_atomic(); - return wait_on_bit_io(&req->wb_flags, PG_BUSY, - TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL_GPL(nfs_wait_on_request); - /* * nfs_generic_pg_test - determine if requests can be coalesced * @desc: pointer to descriptor diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0fe9d7bf34db..089c242efac0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -478,6 +478,23 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); } +/** + * nfs_wait_on_request - Wait for a request to complete. + * @req: request to wait upon. + * + * Interruptible by fatal signals only. + * The user is responsible for holding a count on the request. + */ +static int nfs_wait_on_request(struct nfs_page *req) +{ + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); + return wait_on_bit_io(&req->wb_flags, PG_BUSY, + TASK_UNINTERRUPTIBLE); +} + /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 63eed97a18ad..169b4ae30ff4 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -152,7 +152,6 @@ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req); -extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern void nfs_join_page_group(struct nfs_page *head, -- cgit v1.2.3