From bd4928ec799b31c492eb63f9f4a0c1e0bb4bb3f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Nov 2025 15:28:17 -0500 Subject: NFS: Avoid changing nlink when file removes and attribute updates race If a file removal races with another operation that updates its attributes, then skip the change to nlink, and just mark the attributes as being stale. Reported-by: Aiden Lambert Fixes: 59a707b0d42e ("NFS: Ensure we revalidate the inode correctly after remove or rename") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea9f6ca8f30f..d557b0443e8b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1894,13 +1894,15 @@ static int nfs_dentry_delete(const struct dentry *dentry) } /* Ensure that we revalidate inode->i_nlink */ -static void nfs_drop_nlink(struct inode *inode) +static void nfs_drop_nlink(struct inode *inode, unsigned long gencount) { + struct nfs_inode *nfsi = NFS_I(inode); + spin_lock(&inode->i_lock); /* drop the inode if we're reasonably sure this is the last link */ - if (inode->i_nlink > 0) + if (inode->i_nlink > 0 && gencount == nfsi->attr_gencount) drop_nlink(inode); - NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_set_cache_invalid( inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_NLINK); @@ -1914,8 +1916,9 @@ static void nfs_drop_nlink(struct inode *inode) static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + unsigned long gencount = READ_ONCE(NFS_I(inode)->attr_gencount); nfs_complete_unlink(dentry, inode); - nfs_drop_nlink(inode); + nfs_drop_nlink(inode, gencount); } iput(inode); } @@ -2507,9 +2510,11 @@ static int nfs_safe_remove(struct dentry *dentry) trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { + unsigned long gencount = READ_ONCE(NFS_I(inode)->attr_gencount); + error = NFS_PROTO(dir)->remove(dir, dentry); if (error == 0) - nfs_drop_nlink(inode); + nfs_drop_nlink(inode, gencount); } else error = NFS_PROTO(dir)->remove(dir, dentry); if (error == -ENOENT) @@ -2709,6 +2714,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); + unsigned long new_gencount = 0; struct dentry *dentry = NULL; struct rpc_task *task; bool must_unblock = false; @@ -2761,6 +2767,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } else { block_revalidate(new_dentry); must_unblock = true; + new_gencount = NFS_I(new_inode)->attr_gencount; spin_unlock(&new_dentry->d_lock); } @@ -2800,7 +2807,7 @@ out: new_dir, new_dentry, error); if (!error) { if (new_inode != NULL) - nfs_drop_nlink(new_inode); + nfs_drop_nlink(new_inode, new_gencount); /* * The d_move() should be here instead of in an async RPC completion * handler because we need the proper locks to move the dentry. If -- cgit v1.2.3 From 9bd545539b233725a3416801f7c374bff0327d6e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Nov 2025 08:36:16 -0500 Subject: NFS: Initialise verifiers for visible dentries in readdir and lookup Ensure that the verifiers are initialised before calling d_splice_alias() in both nfs_prime_dcache() and nfs_lookup(). Reported-by: Michael Stoler Fixes: a1147b8281bd ("NFS: Fix up directory verifier races") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d557b0443e8b..2eead7e85be5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -789,16 +789,17 @@ again: goto out; } + nfs_set_verifier(dentry, dir_verifier); inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); alias = d_splice_alias(inode, dentry); d_lookup_done(dentry); if (alias) { if (IS_ERR(alias)) goto out; + nfs_set_verifier(alias, dir_verifier); dput(dentry); dentry = alias; } - nfs_set_verifier(dentry, dir_verifier); trace_nfs_readdir_lookup(d_inode(parent), dentry, 0); out: dput(dentry); @@ -1994,13 +1995,14 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in nfs_lookup_advise_force_readdirplus(dir, flags); no_entry: + nfs_set_verifier(dentry, dir_verifier); res = d_splice_alias(inode, dentry); if (res != NULL) { if (IS_ERR(res)) goto out; + nfs_set_verifier(res, dir_verifier); dentry = res; } - nfs_set_verifier(dentry, dir_verifier); out: trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res)); nfs_free_fattr(fattr); -- cgit v1.2.3 From 518c32a1bc4f8df1a8442ee8cdfea3e2fcff20a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Nov 2025 08:39:50 -0500 Subject: NFS: Initialise verifiers for visible dentries in nfs_atomic_open() Ensure that the verifiers are initialised before calling d_splice_alias() in nfs_atomic_open(). Reported-by: Michael Stoler Fixes: 809fd143de88 ("NFSv4: Ensure nfs_atomic_open set the dentry verifier on ENOENT") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2eead7e85be5..3b8250ee0141 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2144,12 +2144,12 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, d_drop(dentry); switch (err) { case -ENOENT: - d_splice_alias(NULL, dentry); if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) dir_verifier = inode_peek_iversion_raw(dir); else dir_verifier = nfs_save_change_attribute(dir); nfs_set_verifier(dentry, dir_verifier); + d_splice_alias(NULL, dentry); break; case -EISDIR: case -ENOTDIR: -- cgit v1.2.3 From 0f900f11002ff52391fc2aa4a75e59f26ed1c242 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Nov 2025 08:43:21 -0500 Subject: NFS: Initialise verifiers for visible dentries in _nfs4_open_and_get_state Ensure that the verifiers are initialised before calling d_splice_alias() in _nfs4_open_and_get_state(). Reported-by: Michael Stoler Fixes: cf5b4059ba71 ("NFSv4: Fix races between open and dentry revalidation") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 93c6ce04332b..6f4e14fb7b9b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3174,18 +3174,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, if (opendata->o_res.rflags & NFS4_OPEN_RESULT_PRESERVE_UNLINKED) set_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(state->inode)->flags); - dentry = opendata->dentry; - if (d_really_is_negative(dentry)) { - struct dentry *alias; - d_drop(dentry); - alias = d_splice_alias(igrab(state->inode), dentry); - /* d_splice_alias() can't fail here - it's a non-directory */ - if (alias) { - dput(ctx->dentry); - ctx->dentry = dentry = alias; - } - } - switch(opendata->o_arg.claim) { default: break; @@ -3196,7 +3184,20 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, break; if (opendata->o_res.delegation.type != 0) dir_verifier = nfs_save_change_attribute(dir); - nfs_set_verifier(dentry, dir_verifier); + } + + dentry = opendata->dentry; + nfs_set_verifier(dentry, dir_verifier); + if (d_really_is_negative(dentry)) { + struct dentry *alias; + d_drop(dentry); + alias = d_splice_alias(igrab(state->inode), dentry); + /* d_splice_alias() can't fail here - it's a non-directory */ + if (alias) { + dput(ctx->dentry); + nfs_set_verifier(alias, dir_verifier); + ctx->dentry = dentry = alias; + } } /* Parse layoutget results before we check for access */ -- cgit v1.2.3 From 2e47c3cc64b44b0b06cd68c2801db92ff143f2b2 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 3 Nov 2025 10:44:15 -0500 Subject: NFSv4: ensure the open stateid seqid doesn't go backwards We have observed an NFSv4 client receiving a LOCK reply with a status of NFS4ERR_OLD_STATEID and subsequently retrying the LOCK request with an earlier seqid value in the stateid. As this was for a new lockowner, that would imply that nfs_set_open_stateid_locked() had updated the open stateid seqid with an earlier value. Looking at nfs_set_open_stateid_locked(), if the incoming seqid is out of sequence, the task will sleep on the state->waitq for up to 5 seconds. If the task waits for the full 5 seconds, then after finishing the wait it'll update the open stateid seqid with whatever value the incoming seqid has. If there are multiple waiters in this scenario, then the last one to perform said update may not be the one with the highest seqid. Add a check to ensure that the seqid can only be incremented, and add a tracepoint to indicate when old seqids are skipped. Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 +++++++++++-- fs/nfs/nfs4trace.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6f4e14fb7b9b..3b436ba2ed3b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1780,8 +1780,17 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, if (nfs_stateid_is_sequential(state, stateid)) break; - if (status) - break; + if (status) { + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + trace_nfs4_open_stateid_update_skip(state->inode, + stateid, status); + return; + } else { + break; + } + } + /* Rely on seqids for serialisation with NFSv4.0 */ if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) break; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9776d220cec3..6285128e631a 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1353,6 +1353,7 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_skip); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_close_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, -- cgit v1.2.3 From 8936ff47367e7ef659db6cfd70fb3dd060cc702a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Nov 2025 17:29:24 -0500 Subject: NFSv4.1: pass transport for callback shutdown When we are setting up the 4.1 callback server, we pass in the appropriate rpc_xprt transport pointer with which to associate the callback server structure. Similarly, pass in the rpc_xprt pointer for when we are shutting down the callback. This will be used to make sure that we free the server structure and then clear the rpc_xprt's bc_server pointer in a safe manner. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 +- fs/nfs/callback.h | 3 ++- fs/nfs/nfs4client.c | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c8b837006bb2..8b674ee093a6 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -258,7 +258,7 @@ err_start: /* * Kill the callback thread if it's no longer being used. */ -void nfs_callback_down(int minorversion, struct net *net) +void nfs_callback_down(int minorversion, struct net *net, struct rpc_xprt *xprt) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; struct svc_serv *serv; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 154a6ed1299f..8809f93d82c0 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -188,7 +188,8 @@ extern __be32 nfs4_callback_recall(void *argp, void *resp, struct cb_process_state *cps); #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); -extern void nfs_callback_down(int minorversion, struct net *net); +extern void nfs_callback_down(int minorversion, struct net *net, + struct rpc_xprt *xprt); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 3a4baed993c9..4e972f85d0ca 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -281,8 +281,13 @@ error: */ static void nfs4_destroy_callback(struct nfs_client *clp) { - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net); + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net, + xprt); + } } static void nfs4_shutdown_client(struct nfs_client *clp) -- cgit v1.2.3 From 441244d4273a8037b265fd254dfdaca5fa736ee2 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Nov 2025 17:29:25 -0500 Subject: SUNRPC: cleanup common code in backchannel request Create a helper function for common code between rdma and tcp backchannel handling of the backchannel request. Make sure that access is protected by the bc_pa_lock lock. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 + net/sunrpc/backchannel_rqst.c | 19 ++++++++++++++++--- net/sunrpc/xprtrdma/backchannel.c | 8 ++------ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index f22bf915dcf6..178f34ad8db6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -25,6 +25,7 @@ void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +void xprt_enqueue_bc_request(struct rpc_rqst *req); /* Socket backchannel transport methods */ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index caa94cf57123..efddea0f4b8b 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -354,7 +354,6 @@ found: void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) { struct rpc_xprt *xprt = req->rq_xprt; - struct svc_serv *bc_serv = xprt->bc_serv; spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); @@ -365,7 +364,21 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); dprintk("RPC: add callback request to list\n"); + xprt_enqueue_bc_request(req); +} + +void xprt_enqueue_bc_request(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + struct svc_serv *bc_serv; + xprt_get(xprt); - lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list); - svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); + spin_lock(&xprt->bc_pa_lock); + bc_serv = xprt->bc_serv; + if (bc_serv) { + lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list); + svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); + } + spin_unlock(&xprt->bc_pa_lock); } +EXPORT_SYMBOL_GPL(xprt_enqueue_bc_request); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 8c817e755262..2f0f9618dd05 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "xprt_rdma.h" #include @@ -220,7 +221,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct svc_serv *bc_serv; struct rpcrdma_req *req; struct rpc_rqst *rqst; struct xdr_buf *buf; @@ -261,11 +261,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, trace_xprtrdma_cb_call(r_xprt, rqst); /* Queue rqst for ULP's callback service */ - bc_serv = xprt->bc_serv; - xprt_get(xprt); - lwq_enqueue(&rqst->rq_bc_list, &bc_serv->sv_cb_list); - - svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); + xprt_enqueue_bc_request(rqst); r_xprt->rx_stats.bcall_count++; return; -- cgit v1.2.3 From 6f8b26c90a4d645fd5c944c41a6f0fd61ec27c50 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Nov 2025 17:29:26 -0500 Subject: SUNRPC: new helper function for stopping backchannel server Create a new backchannel function to stop the backchannel server and clear the bc_serv in transport protected under the bc_pa_lock. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 6 ++++++ net/sunrpc/backchannel_rqst.c | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 178f34ad8db6..98939cb664cf 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -32,6 +32,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); void xprt_free_bc_rqst(struct rpc_rqst *req); unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); +void xprt_svc_destroy_nullify_bc(struct rpc_xprt *xprt, struct svc_serv **serv); /* * Determine if a shared backchannel is in use @@ -69,5 +70,10 @@ static inline void set_bc_enabled(struct svc_serv *serv) static inline void xprt_free_bc_request(struct rpc_rqst *req) { } + +static inline void xprt_svc_destroy_nullify_bc(struct rpc_xprt *xprt, struct svc_serv **serv) +{ + svc_destroy(serv); +} #endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index efddea0f4b8b..68b1fcdea8f0 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -24,6 +24,22 @@ unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt) return BC_MAX_SLOTS; } +/* + * Helper function to nullify backchannel server pointer in transport. + * We need to synchronize setting the pointer to NULL (done so after + * the backchannel server is shutdown) with the usage of that pointer + * by the backchannel request processing routines + * xprt_complete_bc_request() and rpcrdma_bc_receive_call(). + */ +void xprt_svc_destroy_nullify_bc(struct rpc_xprt *xprt, struct svc_serv **serv) +{ + spin_lock(&xprt->bc_pa_lock); + svc_destroy(serv); + xprt->bc_serv = NULL; + spin_unlock(&xprt->bc_pa_lock); +} +EXPORT_SYMBOL_GPL(xprt_svc_destroy_nullify_bc); + /* * Helper routines that track the number of preallocation elements * on the transport. -- cgit v1.2.3 From 9e9fdd0ad0fba799dbae7ecfd167199885fb63a1 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Nov 2025 17:29:27 -0500 Subject: NFSv4.1: protect destroying and nullifying bc_serv structure When we are shutting down the client, we free the callback server structure and then at a later pointer we free the transport used by the client. Yet, it's possible that after the callback server is freed, the transport receives a backchannel request at which point we can dereferene freed memory. Instead, do the freeing the bc server and nullying bc_serv under the lock. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8b674ee093a6..fabda0f6ec1a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -270,7 +270,7 @@ void nfs_callback_down(int minorversion, struct net *net, struct rpc_xprt *xprt) if (cb_info->users == 0) { svc_set_num_threads(serv, NULL, 0); dprintk("nfs_callback_down: service destroyed\n"); - svc_destroy(&cb_info->serv); + xprt_svc_destroy_nullify_bc(xprt, &cb_info->serv); } mutex_unlock(&nfs_callback_mutex); } -- cgit v1.2.3 From e0f8058f2cb56de0b7572f51cd563ca5debce746 Mon Sep 17 00:00:00 2001 From: Jonathan Curley Date: Wed, 12 Nov 2025 18:02:42 +0000 Subject: NFSv4/pNFS: Clear NFS_INO_LAYOUTCOMMIT in pnfs_mark_layout_stateid_invalid Fixes a crash when layout is null during this call stack: write_inode -> nfs4_write_inode -> pnfs_layoutcommit_inode pnfs_set_layoutcommit relies on the lseg refcount to keep the layout around. Need to clear NFS_INO_LAYOUTCOMMIT otherwise we might attempt to reference a null layout. Fixes: fe1cf9469d7bc ("pNFS: Clear all layout segment state in pnfs_mark_layout_stateid_invalid") Signed-off-by: Jonathan Curley Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3135b5af7ee..7ce2e840217c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -464,6 +464,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, *next; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(lo->plh_inode)->flags); list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) pnfs_clear_lseg_state(lseg, lseg_list); pnfs_clear_layoutreturn_info(lo); -- cgit v1.2.3 From 130ae65c01862e1ed30ef5ff2258990d7628f360 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Nov 2025 10:06:41 -0500 Subject: NFS: Add support for sending GDD_GETATTR I add this to the existing GETATTR compound as an option extra step that we can send if the "dir_deleg" flag is set to 'true'. Actually enabling this value will happen in a later patch. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_xdr.h | 7 ++++ 2 files changed, 113 insertions(+) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1d0e6c10f921..b6fe30577fab 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -393,6 +393,20 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) +#define encode_get_dir_deleg_maxsz (op_encode_hdr_maxsz + \ + 4 /* gdda_signal_deleg_avail */ + \ + 8 /* gdda_notification_types */ + \ + nfstime4_maxsz /* gdda_child_attr_delay */ + \ + nfstime4_maxsz /* gdda_dir_attr_delay */ + \ + nfs4_fattr_bitmap_maxsz /* gdda_child_attributes */ + \ + nfs4_fattr_bitmap_maxsz /* gdda_dir_attributes */) +#define decode_get_dir_deleg_maxsz (op_decode_hdr_maxsz + \ + 4 /* gddrnf_status */ + \ + encode_verifier_maxsz /* gddr_cookieverf */ + \ + encode_stateid_maxsz /* gddr_stateid */ + \ + 8 /* gddr_notification */ + \ + nfs4_fattr_maxsz /* gddr_child_attributes */ + \ + nfs4_fattr_maxsz /* gddr_dir_attributes */) #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \ 1 /* layout type */ + \ @@ -444,6 +458,8 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 +#define encode_get_dir_deleg_maxsz 0 +#define decode_get_dir_deleg_maxsz 0 #define encode_layoutreturn_maxsz 0 #define decode_layoutreturn_maxsz 0 #define encode_layoutget_maxsz 0 @@ -631,11 +647,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_get_dir_deleg_maxsz + \ encode_getattr_maxsz + \ encode_renew_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_get_dir_deleg_maxsz + \ decode_getattr_maxsz + \ decode_renew_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ @@ -2007,6 +2025,33 @@ static void encode_sequence(struct xdr_stream *xdr, } #ifdef CONFIG_NFS_V4_1 +static void +encode_get_dir_delegation(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ + struct timespec64 ts = { 0, 0 }; + u32 notifications[1] = { 0 }; + u32 attributes[1] = { 0 }; + __be32 *p; + + encode_op_hdr(xdr, OP_GET_DIR_DELEGATION, decode_get_dir_deleg_maxsz, hdr); + + /* We don't handle CB_RECALLABLE_OBJ_AVAIL yet. */ + xdr_stream_encode_bool(xdr, false); + + xdr_encode_bitmap4(xdr, notifications, ARRAY_SIZE(notifications)); + + /* Request no delay on attribute updates */ + p = reserve_space(xdr, 12 + 12); + p = xdr_encode_nfstime4(p, &ts); + xdr_encode_nfstime4(p, &ts); + + /* Requested child attributes */ + xdr_encode_bitmap4(xdr, attributes, ARRAY_SIZE(attributes)); + + /* Requested dir attributes */ + xdr_encode_bitmap4(xdr, attributes, ARRAY_SIZE(attributes)); +} + static void encode_getdeviceinfo(struct xdr_stream *xdr, const struct nfs4_getdeviceinfo_args *args, @@ -2142,6 +2187,11 @@ static void encode_free_stateid(struct xdr_stream *xdr, encode_nfs4_stateid(xdr, &args->stateid); } #else +static inline void +encode_get_dir_delegation(struct xdr_stream *xdr, struct compound_hdr *hdr) +{ +} + static inline void encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args, @@ -2356,6 +2406,8 @@ static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->get_dir_deleg) + encode_get_dir_delegation(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -5994,6 +6046,49 @@ static int decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) return decode_stateid(xdr, stateid); } +static int decode_get_dir_delegation(struct xdr_stream *xdr, + struct nfs4_getattr_res *res) +{ + struct nfs4_gdd_res *gdd_res = res->gdd_res; + nfs4_verifier cookieverf; + u32 bitmap[1]; + int status; + + status = decode_op_hdr(xdr, OP_GET_DIR_DELEGATION); + if (status) + return status; + + if (xdr_stream_decode_u32(xdr, &gdd_res->status)) + return -EIO; + + if (gdd_res->status == GDD4_UNAVAIL) + return xdr_inline_decode(xdr, 4) ? 0 : -EIO; + + status = decode_verifier(xdr, &cookieverf); + if (status) + return status; + + status = decode_delegation_stateid(xdr, &gdd_res->deleg); + if (status) + return status; + + /* Decode supported notification types. */ + status = decode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + if (status < 0) + return status; + + /* Decode supported child attributes. */ + status = decode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + if (status < 0) + return status; + + /* Decode supported attributes. */ + status = decode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + if (status < 0) + return status; + return 0; +} + static int decode_getdeviceinfo(struct xdr_stream *xdr, struct nfs4_getdeviceinfo_res *res) { @@ -6208,6 +6303,12 @@ static int decode_free_stateid(struct xdr_stream *xdr, return res->status; } #else +static int decode_get_dir_delegation(struct xdr_stream *xdr, + struct nfs4_getattr_res *res) +{ + return 0; +} + static inline int decode_layoutreturn(struct xdr_stream *xdr, struct nfs4_layoutreturn_res *res) @@ -6525,6 +6626,11 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; + if (res->gdd_res) { + status = decode_get_dir_delegation(xdr, res); + if (status) + goto out; + } status = decode_getfattr(xdr, res->fattr, res->server); out: return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 31463286402f..8bf6cba96c46 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1092,12 +1092,19 @@ struct nfs4_getattr_arg { struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; const u32 * bitmask; + bool get_dir_deleg; +}; + +struct nfs4_gdd_res { + u32 status; + nfs4_stateid deleg; }; struct nfs4_getattr_res { struct nfs4_sequence_res seq_res; const struct nfs_server * server; struct nfs_fattr * fattr; + struct nfs4_gdd_res * gdd_res; }; struct nfs4_link_arg { -- cgit v1.2.3 From 156b0948293362b036caf49e6e4d97cae30201de Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Nov 2025 10:06:42 -0500 Subject: NFS: Request a directory delegation on ACCESS, CREATE, and UNLINK This patch adds a new flag: NFS_INO_REQ_DIR_DELEG to signal that a directory wants to request a directory delegation the next time it does a GETATTR. I have the client request a directory delegation when doing an access, create, or unlink call since these calls indicate that a user is working with a directory. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 1 + fs/nfs/delegation.h | 6 ++++++ fs/nfs/nfs4proc.c | 55 +++++++++++++++++++++++++++++++++++++++++++---- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 1 + 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9d3a5f29f17f..b4c192f00e94 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -379,6 +379,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, delegation->inode = NULL; rcu_assign_pointer(nfsi->delegation, NULL); spin_unlock(&delegation->lock); + clear_bit(NFS_INO_REQ_DIR_DELEG, &nfsi->flags); return delegation; } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 08ec2e9c68a4..def50e8a83bf 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -124,6 +124,12 @@ static inline int nfs_have_delegated_mtime(struct inode *inode) NFS_DELEGATION_FLAG_TIME); } +static inline void nfs_request_directory_delegation(struct inode *inode) +{ + if (S_ISDIR(inode->i_mode)) + set_bit(NFS_INO_REQ_DIR_DELEG, &NFS_I(inode)->flags); +} + int nfs4_delegation_hash_alloc(struct nfs_server *server); #endif diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3b436ba2ed3b..99edc1d8d7aa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4470,6 +4470,28 @@ out: return status; } +#if IS_ENABLED(CONFIG_NFS_V4_1) +static bool should_request_dir_deleg(struct inode *inode) +{ + if (!inode) + return false; + if (!S_ISDIR(inode->i_mode)) + return false; + if (!nfs_server_capable(inode, NFS_CAP_DIR_DELEG)) + return false; + if (!test_and_clear_bit(NFS_INO_REQ_DIR_DELEG, &(NFS_I(inode)->flags))) + return false; + if (nfs4_have_delegation(inode, FMODE_READ, 0)) + return false; + return true; +} +#else +static bool should_request_dir_deleg(struct inode *inode) +{ + return false; +} +#endif /* CONFIG_NFS_V4_1 */ + static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct inode *inode) { @@ -4487,7 +4509,9 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + struct nfs4_gdd_res gdd_res; unsigned short task_flags = 0; + int status; if (nfs4_has_session(server->nfs_client)) task_flags = RPC_TASK_MOVEABLE; @@ -4496,11 +4520,26 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) task_flags |= RPC_TASK_TIMEOUT; + args.get_dir_deleg = should_request_dir_deleg(inode); + if (args.get_dir_deleg) + res.gdd_res = &gdd_res; + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, 0); nfs_fattr_init(fattr); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); - return nfs4_do_call_sync(server->client, server, &msg, - &args.seq_args, &res.seq_res, task_flags); + + status = nfs4_do_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, task_flags); + if (args.get_dir_deleg) { + if (status == -EOPNOTSUPP) { + server->caps &= ~NFS_CAP_DIR_DELEG; + } else if (status == 0 && gdd_res.status == GDD4_OK) { + status = nfs_inode_set_delegation(inode, current_cred(), + FMODE_READ, &gdd_res.deleg, + 0, NFS4_OPEN_DELEGATE_READ); + } + } + return status; } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, @@ -4513,8 +4552,10 @@ int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, do { err = _nfs4_proc_getattr(server, fhandle, fattr, inode); trace_nfs4_getattr(server, fhandle, fattr, err); - err = nfs4_handle_exception(server, err, - &exception); + if (err == -EOPNOTSUPP) + exception.retry = true; + else + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -4778,6 +4819,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry int status = 0; if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { + nfs_request_directory_delegation(inode); res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; @@ -4885,6 +4927,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); + nfs_request_directory_delegation(dir); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_mode &= ~current_umask(); state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, NULL); @@ -4981,6 +5025,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0); nfs_fattr_init(res->dir_attr); + nfs_request_directory_delegation(d_inode(dentry->d_parent)); if (inode) { nfs4_inode_return_delegation(inode); @@ -10832,6 +10877,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_DIR_DELEG | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 @@ -10858,6 +10904,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_DIR_DELEG | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c585939b6cd6..a6624edb7226 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -344,6 +344,7 @@ struct nfs4_copy_state { #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ #define NFS_INO_ODIRECT (12) /* I/O setting is O_DIRECT */ +#define NFS_INO_REQ_DIR_DELEG (13) /* Request a directory delegation */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d30c0245031c..4ba04de6b1ca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -305,6 +305,7 @@ struct nfs_server { #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OFFLOAD_STATUS (1U << 9) #define NFS_CAP_ZERO_RANGE (1U << 10) +#define NFS_CAP_DIR_DELEG (1U << 11) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From 2da211670782637fd2d4fbba06f91d1e7c70dc0c Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Nov 2025 10:06:43 -0500 Subject: NFS: Request a directory delegation during RENAME If we notice that we're renaming a file within a directory then we take that as a sign that the user is working with the current directory and may want a delegation to avoid extra revalidations when possible. The nfs_request_directory_delegation() function exists within the NFS v4 module, so I add an extra flag to rename_setup() to indicate if a dentry is being renamed within the same parent directory. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 3 ++- fs/nfs/nfs4proc.c | 5 ++++- fs/nfs/proc.c | 3 ++- fs/nfs/unlink.c | 3 ++- include/linux/nfs_xdr.h | 3 ++- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a4cb67573aa7..1181f9cc6dbd 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -483,7 +483,8 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) static void nfs3_proc_rename_setup(struct rpc_message *msg, struct dentry *old_dentry, - struct dentry *new_dentry) + struct dentry *new_dentry, + struct inode *same_parent) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 99edc1d8d7aa..6691a44866b6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5060,7 +5060,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) static void nfs4_proc_rename_setup(struct rpc_message *msg, struct dentry *old_dentry, - struct dentry *new_dentry) + struct dentry *new_dentry, + struct inode *same_parent) { struct nfs_renameargs *arg = msg->rpc_argp; struct nfs_renameres *res = msg->rpc_resp; @@ -5071,6 +5072,8 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, nfs4_inode_make_writeable(old_inode); if (new_inode) nfs4_inode_return_delegation(new_inode); + if (same_parent) + nfs_request_directory_delegation(same_parent); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; res->server = NFS_SB(old_dentry->d_sb); nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 63e71310b9f6..39df80e4ae6f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -353,7 +353,8 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) static void nfs_proc_rename_setup(struct rpc_message *msg, struct dentry *old_dentry, - struct dentry *new_dentry) + struct dentry *new_dentry, + struct inode *same_parent) { msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index b55467911648..4db818c0f9dd 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -390,7 +390,8 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, nfs_sb_active(old_dir->i_sb); - NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry); + NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry, + old_dir == new_dir ? old_dir : NULL); return rpc_run_task(&task_setup_data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8bf6cba96c46..79fe2dfb470f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1808,7 +1808,8 @@ struct nfs_rpc_ops { int (*unlink_done) (struct rpc_task *, struct inode *); void (*rename_setup) (struct rpc_message *msg, struct dentry *old_dentry, - struct dentry *new_dentry); + struct dentry *new_dentry, + struct inode *same_parent); void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); int (*link) (struct inode *, struct inode *, const struct qstr *); -- cgit v1.2.3 From 669c0580ac3757cad4dd16fd7dcb08cfc2abda56 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Nov 2025 10:06:44 -0500 Subject: NFS: Shortcut lookup revalidations if we have a directory delegation Holding a directory delegation means we know that nobody else has modified the directory on the server, so we can take a few revalidation shortcuts. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.h | 5 +++++ fs/nfs/dir.c | 19 +++++++++++++++++++ fs/nfs/inode.c | 3 +++ 3 files changed, 27 insertions(+) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index def50e8a83bf..8968f62bf438 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -130,6 +130,11 @@ static inline void nfs_request_directory_delegation(struct inode *inode) set_bit(NFS_INO_REQ_DIR_DELEG, &NFS_I(inode)->flags); } +static inline bool nfs_have_directory_delegation(struct inode *inode) +{ + return S_ISDIR(inode->i_mode) && nfs_have_delegated_attributes(inode); +} + int nfs4_delegation_hash_alloc(struct nfs_server *server); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3b8250ee0141..23a78a742b61 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1515,6 +1515,15 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, return 0; if (!nfs_dentry_verify_change(dir, dentry)) return 0; + + /* + * If we have a directory delegation then we don't need to revalidate + * the directory. The delegation will either get recalled or we will + * receive a notification when it changes. + */ + if (nfs_have_directory_delegation(dir)) + return 0; + /* Revalidate nfsi->cache_change_attribute before we declare a match */ if (nfs_mapping_need_revalidate_inode(dir)) { if (rcu_walk) @@ -2207,6 +2216,13 @@ no_open: } EXPORT_SYMBOL_GPL(nfs_atomic_open); +static int +nfs_lookup_revalidate_delegated_parent(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); +} + static int nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) @@ -2234,6 +2250,9 @@ nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, if (nfs_verifier_is_delegated(dentry)) return nfs_lookup_revalidate_delegated(dir, dentry, inode); + if (nfs_have_directory_delegation(dir)) + return nfs_lookup_revalidate_delegated_parent(dir, dentry, inode); + /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto full_reval; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 13ad70fc00d8..2060adb3b0c5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1389,6 +1389,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = pnfs_sync_inode(inode, false); if (status) goto out; + } else if (nfs_have_directory_delegation(inode)) { + status = 0; + goto out; } status = -ENOMEM; -- cgit v1.2.3 From b6d2a520f4638c2e7d3f2f11946918946941be18 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Nov 2025 10:06:45 -0500 Subject: NFS: Add a module option to disable directory delegations When this option is disabled then the client will not request directory delegations or check if we have one during the revalidation paths. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 7 +++++++ fs/nfs/delegation.h | 2 ++ fs/nfs/nfs4proc.c | 2 ++ 3 files changed, 11 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b4c192f00e94..2248e3ad089a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -30,6 +30,11 @@ static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); +bool directory_delegations = true; +module_param(directory_delegations, bool, 0644); +MODULE_PARM_DESC(directory_delegations, + "Enable the use of directory delegations, defaults to on."); + static struct hlist_head *nfs_delegation_hash(struct nfs_server *server, const struct nfs_fh *fhandle) { @@ -143,6 +148,8 @@ static int nfs4_do_check_delegation(struct inode *inode, fmode_t type, */ int nfs4_have_delegation(struct inode *inode, fmode_t type, int flags) { + if (S_ISDIR(inode->i_mode) && !directory_delegations) + nfs_inode_evict_delegation(inode); return nfs4_do_check_delegation(inode, type, flags, true); } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 8968f62bf438..46d866adb5c2 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -124,6 +124,8 @@ static inline int nfs_have_delegated_mtime(struct inode *inode) NFS_DELEGATION_FLAG_TIME); } +extern bool directory_delegations; + static inline void nfs_request_directory_delegation(struct inode *inode) { if (S_ISDIR(inode->i_mode)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6691a44866b6..c53ddb185aa3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4473,6 +4473,8 @@ out: #if IS_ENABLED(CONFIG_NFS_V4_1) static bool should_request_dir_deleg(struct inode *inode) { + if (!directory_delegations) + return false; if (!inode) return false; if (!S_ISDIR(inode->i_mode)) -- cgit v1.2.3 From 400fa37afbb11a601c204b72af0f0e5bc2db695c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 13:39:07 -0500 Subject: Revert "nfs: ignore SB_RDONLY when remounting nfs" This reverts commit 80c4de6ab44c14e910117a02f2f8241ffc6ec54a. Silently ignoring the "ro" and "rw" mount options causes user confusion, and regressions. Reported-by: Alkis Georgopoulos Cc: Li Lingfeng Fixes: 80c4de6ab44c ("nfs: ignore SB_RDONLY when remounting nfs") Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 72dee6f3050e..527000f5d150 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1051,16 +1051,6 @@ int nfs_reconfigure(struct fs_context *fc) sync_filesystem(sb); - /* - * The SB_RDONLY flag has been removed from the superblock during - * mounts to prevent interference between different filesystems. - * Similarly, it is also necessary to ignore the SB_RDONLY flag - * during reconfiguration; otherwise, it may also result in the - * creation of redundant superblocks when mounting a directory with - * different rw and ro flags multiple times. - */ - fc->sb_flags_mask &= ~SB_RDONLY; - /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which -- cgit v1.2.3 From d216b698d44e33417ad4cc796cb04ccddbb8c0ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 13:39:38 -0500 Subject: Revert "nfs: clear SB_RDONLY before getting superblock" This reverts commit 8cd9b785943c57a136536250da80ba1eb6f8eb18. Silently ignoring the "ro" and "rw" mount options causes user confusion, and regressions. Reported-by: Alkis Georgopoulos Cc: Li Lingfeng Fixes: 8cd9b785943c ("nfs: clear SB_RDONLY before getting superblock") Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 527000f5d150..9b9464e70a7f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1308,17 +1308,8 @@ int nfs_get_tree_common(struct fs_context *fc) if (IS_ERR(server)) return PTR_ERR(server); - /* - * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a - * superblock among each filesystem that mounts sub-directories - * belonging to a single exported root path. - * To prevent interference between different filesystems, the - * SB_RDONLY flag should be removed from the superblock. - */ if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; - else - fc->sb_flags &= ~SB_RDONLY; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) -- cgit v1.2.3 From d4a26d34f1946142f9d32e540490e4926ae9a46b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 13:39:45 -0500 Subject: Revert "nfs: ignore SB_RDONLY when mounting nfs" This reverts commit 52cb7f8f177878b4f22397b9c4d2c8f743766be3. Silently ignoring the "ro" and "rw" mount options causes user confusion, and regressions. Reported-by: Alkis Georgopoulos Cc: Li Lingfeng Fixes: 52cb7f8f1778 ("nfs: ignore SB_RDONLY when mounting nfs") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2ecd38e1d17a..ffd382aa31ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -13,7 +13,7 @@ #include #include -#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) +#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; -- cgit v1.2.3 From 8675c69816e4276b979ff475ee5fac4688f80125 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 14:22:44 -0500 Subject: NFS: Automounted filesystems should inherit ro,noexec,nodev,sync flags When a filesystem is being automounted, it needs to preserve the user-set superblock mount options, such as the "ro" flag. Reported-by: Li Lingfeng Link: https://lore.kernel.org/all/20240604112636.236517-3-lilingfeng@huaweicloud.com/ Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 6 ++++++ fs/nfs/super.c | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5a4d193da1a9..dca055676c4f 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -149,6 +149,7 @@ struct vfsmount *nfs_d_automount(struct path *path) struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct nfs_server *server = NFS_SB(path->dentry->d_sb); struct nfs_client *client = server->nfs_client; + unsigned long s_flags = path->dentry->d_sb->s_flags; int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout); int ret; @@ -174,6 +175,11 @@ struct vfsmount *nfs_d_automount(struct path *path) fc->net_ns = get_net(client->cl_net); } + /* Inherit the flags covered by NFS_SB_MASK */ + fc->sb_flags_mask |= NFS_SB_MASK; + fc->sb_flags &= ~NFS_SB_MASK; + fc->sb_flags |= s_flags & NFS_SB_MASK; + /* for submounts we want the same server; referrals will reassign */ memcpy(&ctx->nfs_server._address, &client->cl_addr, client->cl_addrlen); ctx->nfs_server.addrlen = client->cl_addrlen; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9b9464e70a7f..66413133b43e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1315,10 +1315,6 @@ int nfs_get_tree_common(struct fs_context *fc) if (server->flags & NFS_MOUNT_NOAC) fc->sb_flags |= SB_SYNCHRONOUS; - if (ctx->clone_data.sb) - if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS) - fc->sb_flags |= SB_SYNCHRONOUS; - /* Get a superblock - note that we may end up sharing one that already exists */ fc->s_fs_info = server; s = sget_fc(fc, compare_super, nfs_set_super); -- cgit v1.2.3 From 2b092175f5e301cdaa935093edfef2be9defb6df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 16:06:41 -0500 Subject: NFS: Fix inheritance of the block sizes when automounting Only inherit the block sizes that were actually specified as mount parameters for the parent mount. Fixes: 62a55d088cd8 ("NFS: Additional refactoring for fs_context conversion") Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 21 +++++++++++++++++---- fs/nfs/internal.h | 1 - fs/nfs/namespace.c | 5 ++++- fs/nfs/nfs4client.c | 18 ++++++++++++++---- fs/nfs/super.c | 10 +++------- include/linux/nfs_fs_sb.h | 5 +++++ 6 files changed, 43 insertions(+), 17 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 54699299d5b1..2aaea9c98c2c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -784,10 +784,18 @@ static int nfs_init_server(struct nfs_server *server, server->fattr_valid = NFS_ATTR_FATTR_V4; } - if (ctx->rsize) + if (ctx->bsize) { + server->bsize = ctx->bsize; + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_BSIZE; + } + if (ctx->rsize) { server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto); - if (ctx->wsize) + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_RSIZE; + } + if (ctx->wsize) { server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto); + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_WSIZE; + } server->acregmin = ctx->acregmin * HZ; server->acregmax = ctx->acregmax * HZ; @@ -977,8 +985,13 @@ EXPORT_SYMBOL_GPL(nfs_probe_server); void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; - target->rsize = source->rsize; - target->wsize = source->wsize; + target->automount_inherit = source->automount_inherit; + if (source->automount_inherit & NFS_AUTOMOUNT_INHERIT_BSIZE) + target->bsize = source->bsize; + if (source->automount_inherit & NFS_AUTOMOUNT_INHERIT_RSIZE) + target->rsize = source->rsize; + if (source->automount_inherit & NFS_AUTOMOUNT_INHERIT_WSIZE) + target->wsize = source->wsize; target->acregmin = source->acregmin; target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ffd382aa31ac..2e596244799f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -152,7 +152,6 @@ struct nfs_fs_context { struct super_block *sb; struct dentry *dentry; struct nfs_fattr *fattr; - unsigned int inherited_bsize; } clone_data; }; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index dca055676c4f..9e4d94f41fc6 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -190,6 +190,10 @@ struct vfsmount *nfs_d_automount(struct path *path) ctx->nfs_mod = client->cl_nfs_mod; get_nfs_version(ctx->nfs_mod); + /* Inherit block sizes if they were specified as mount parameters */ + if (server->automount_inherit & NFS_AUTOMOUNT_INHERIT_BSIZE) + ctx->bsize = server->bsize; + ret = client->rpc_ops->submount(fc, server); if (ret < 0) { mnt = ERR_PTR(ret); @@ -289,7 +293,6 @@ int nfs_do_submount(struct fs_context *fc) return -ENOMEM; ctx->internal = true; - ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; p = nfs_devname(dentry, buffer, 4096); if (IS_ERR(p)) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4e972f85d0ca..96bccefbe2cb 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1179,10 +1179,20 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) if (error < 0) return error; - if (ctx->rsize) - server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto); - if (ctx->wsize) - server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto); + if (ctx->bsize) { + server->bsize = ctx->bsize; + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_BSIZE; + } + if (ctx->rsize) { + server->rsize = + nfs_io_size(ctx->rsize, server->nfs_client->cl_proto); + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_RSIZE; + } + if (ctx->wsize) { + server->wsize = + nfs_io_size(ctx->wsize, server->nfs_client->cl_proto); + server->automount_inherit |= NFS_AUTOMOUNT_INHERIT_WSIZE; + } server->acregmin = ctx->acregmin * HZ; server->acregmax = ctx->acregmax * HZ; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 66413133b43e..57d372db03b9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1091,8 +1091,9 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) sb->s_blocksize = 0; sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; sb->s_op = server->nfs_client->cl_nfs_mod->sops; - if (ctx->bsize) - sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); + if (server->bsize) + sb->s_blocksize = + nfs_block_size(server->bsize, &sb->s_blocksize_bits); switch (server->nfs_client->rpc_ops->version) { case 2: @@ -1338,13 +1339,8 @@ int nfs_get_tree_common(struct fs_context *fc) } if (!s->s_root) { - unsigned bsize = ctx->clone_data.inherited_bsize; /* initial superblock/root creation */ nfs_fill_super(s, ctx); - if (bsize) { - s->s_blocksize_bits = bsize; - s->s_blocksize = 1U << bsize; - } error = nfs_get_cache_cookie(s, ctx); if (error < 0) goto error_splat_super; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4ba04de6b1ca..c58b870f31ee 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -172,6 +172,11 @@ struct nfs_server { #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 #define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 + unsigned int automount_inherit; /* Properties inherited by automount */ +#define NFS_AUTOMOUNT_INHERIT_BSIZE 0x0001 +#define NFS_AUTOMOUNT_INHERIT_RSIZE 0x0002 +#define NFS_AUTOMOUNT_INHERIT_WSIZE 0x0004 + unsigned int caps; /* server capabilities */ __u64 fattr_valid; /* Valid attributes */ unsigned int rsize; /* read size */ -- cgit v1.2.3 From a2a8fc27dd668e7562b5326b5ed2f1604cb1e2e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Nov 2025 18:56:46 -0500 Subject: NFS: Fix up the automount fs_context to use the correct cred When automounting, the fs_context should be fixed up to use the cred from the parent filesystem, since the operation is just extending the namespace. Authorisation to enter that namespace will already have been provided by the preceding lookup. Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 9e4d94f41fc6..af9be0c5f516 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -170,6 +170,11 @@ struct vfsmount *nfs_d_automount(struct path *path) if (!ctx->clone_data.fattr) goto out_fc; + if (fc->cred != server->cred) { + put_cred(fc->cred); + fc->cred = get_cred(server->cred); + } + if (fc->net_ns != client->cl_net) { put_net(fc->net_ns); fc->net_ns = get_net(client->cl_net); -- cgit v1.2.3 From f50d0328d02fe38ba196a73c143e5d87e341d4f7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Nov 2025 01:01:26 -0500 Subject: nfs/localio: remove alignment size checking in nfs_is_local_dio_possible This check to ensure dio_offset_align isn't larger than PAGE_SIZE is no longer relevant (older iterations of NFS Direct was allocating misaligned head and tail pages but no longer does, so this check isn't needed). Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Signed-off-by: Mike Snitzer Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 656976b4f42c..512d9c5ff608 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -339,8 +339,6 @@ nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw, if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align)) return false; - if (unlikely(nf_dio_offset_align > PAGE_SIZE)) - return false; if (unlikely(len < nf_dio_offset_align)) return false; -- cgit v1.2.3 From 0b873de2c02f9cc655bef6bee0eb9e404126ed6c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Nov 2025 01:01:27 -0500 Subject: nfs/localio: remove 61 byte hole from needless ____cacheline_aligned struct nfs_local_kiocb used ____cacheline_aligned on its iters[] array and as the structure evolved it caused a 61 byte hole to form. Fix this by removing ____cacheline_aligned and reordering iters[] before iter_is_dio_aligned[]. Fixes: 6a218b9c3183 ("nfs/localio: do not issue misaligned DIO out-of-order") Signed-off-by: Mike Snitzer Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 512d9c5ff608..b98bb292fef0 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -43,8 +43,8 @@ struct nfs_local_kiocb { size_t end_len; short int end_iter_index; atomic_t n_iters; + struct iov_iter iters[NFSLOCAL_MAX_IOS]; bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; - struct iov_iter iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned; /* End mostly DIO-specific members */ }; -- cgit v1.2.3 From bd3b04b46c7a9940989ff4b29376e899e93d3a4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Dec 2025 11:17:25 -0500 Subject: NFSv4: Handle NFS4ERR_NOTSUPP errors for directory delegations The error NFS4ERR_NOTSUPP will be returned for operations that are legal, but not supported by the server. Fixes: 156b09482933 ("NFS: Request a directory delegation on ACCESS, CREATE, and UNLINK") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c53ddb185aa3..ec1ce593dea2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4533,12 +4533,17 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, status = nfs4_do_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, task_flags); if (args.get_dir_deleg) { - if (status == -EOPNOTSUPP) { + switch (status) { + case 0: + if (gdd_res.status != GDD4_OK) + break; + status = nfs_inode_set_delegation( + inode, current_cred(), FMODE_READ, + &gdd_res.deleg, 0, NFS4_OPEN_DELEGATE_READ); + break; + case -ENOTSUPP: + case -EOPNOTSUPP: server->caps &= ~NFS_CAP_DIR_DELEG; - } else if (status == 0 && gdd_res.status == GDD4_OK) { - status = nfs_inode_set_delegation(inode, current_cred(), - FMODE_READ, &gdd_res.deleg, - 0, NFS4_OPEN_DELEGATE_READ); } } return status; @@ -4554,10 +4559,14 @@ int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, do { err = _nfs4_proc_getattr(server, fhandle, fattr, inode); trace_nfs4_getattr(server, fhandle, fattr, err); - if (err == -EOPNOTSUPP) - exception.retry = true; - else + switch (err) { + default: err = nfs4_handle_exception(server, err, &exception); + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + exception.retry = true; + } } while (exception.retry); return err; } -- cgit v1.2.3