From c58c844187df61ef7cc103d0abb5dd6198bcfcd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Mar 2013 19:45:14 -0400 Subject: NFS: Don't accept more reads/writes if the open context recovery failed If the state recovery failed, we want to ensure that the application doesn't try to use the same file descriptor for more reads or writes. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1cc25682b20b..f6b1956f3c86 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,7 @@ struct nfs_open_context { unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) +#define NFS_CONTEXT_BAD (2) int error; struct list_head list; -- cgit v1.2.3 From 9b20614988199fb03580b335a28250922e902098 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:52:00 -0400 Subject: NFSv4: The stateid must remain the same for replayed RPC calls If we replay a READ or WRITE call, we should not be changing the stateid. Currently, we may end up doing so, because the stateid is only selected at xdr encode time. This patch ensures that we select the stateid after we get an NFSv4.1 session slot, and that we keep that same stateid across retries. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4filelayout.c | 14 ++++++++++---- fs/nfs/nfs4proc.c | 27 +++++++++++++++++++++++---- fs/nfs/nfs4xdr.c | 28 ++-------------------------- include/linux/nfs_xdr.h | 2 ++ 5 files changed, 41 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9ce90135bf22..8309e98c44f9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -234,6 +234,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode); #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4ba32e23eddd..22d10623f5ee 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -317,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - nfs41_setup_sequence(rdata->ds_clp->cl_session, + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -421,10 +424,13 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - nfs41_setup_sequence(wdata->ds_clp->cl_session, + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE); } static void filelayout_write_call_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c3bbb6c53d61..26176ce3d96a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3454,6 +3454,19 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +void nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; + nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); +} +EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); + void __nfs4_read_done_cb(struct nfs_read_data *data) { nfs_invalidate_atime(data->header->inode); @@ -3496,10 +3509,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_READ); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3560,10 +3576,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return; + nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, FMODE_WRITE); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3edda554ac7..9d328777b4c1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1506,35 +1506,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) -{ - nfs4_stateid stateid; - - if (ctx->state != NULL) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, lockowner); - if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); - } else - encode_nfs4_stateid(xdr, &zero_stateid); -} - static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1670,8 +1647,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg __be32 *p; encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4b993d358dad..90a4aa190b43 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -486,6 +486,7 @@ struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; unsigned int pgbase; @@ -507,6 +508,7 @@ struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; struct nfs_lock_context *lock_context; + nfs4_stateid stateid; __u64 offset; __u32 count; enum nfs3_stable_how stable; -- cgit v1.2.3 From 3b66486c4c7136f8d4bbe1306d581fadc6bce4c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:31:15 -0400 Subject: NFSv4.1: Select the "most recent locking state" for read/write/setattr stateids Follow the practice described in section 8.2.2 of RFC5661: When sending a read/write or setattr stateid, set the seqid field to zero in order to signal that the NFS server should apply the most recent locking state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22c80f9ed824..625a729ebcca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6841,7 +6841,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR - | NFS_CAP_POSIX_LOCK, + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e95bd72f480..685b1e953ed8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1053,6 +1053,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, goto out; ret = nfs4_copy_open_stateid(dst, state); out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; return ret; } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6c6ed153a9b4..74c9e52c9338 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -197,5 +197,6 @@ struct nfs_server { #define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) +#define NFS_CAP_STATEID_NFSV41 (1U << 16) #endif -- cgit v1.2.3 From 49f9a0fafd844c32f2abada047c0b9a5ba0d6255 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Mar 2013 16:44:28 -0400 Subject: NFSv4.1: Enable open-by-filehandle Sometimes, we actually _want_ to do open-by-filehandle, for instance when recovering opens after a network partition, or when called from nfs4_file_open. Enable that functionality using a new capability NFS_CAP_ATOMIC_OPEN_V1, and which is only enabled for NFSv4.1 servers that support it. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 ++ fs/nfs/nfs4proc.c | 53 ++++++++++++++++++++++++++++++++++++++++------- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 49 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f23f455be42b..e093e73178b7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; if (d_mountpoint(dentry)) goto no_open; + if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) + goto no_open; inode = dentry->d_inode; parent = dget_parent(dentry); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7bbb06f280fc..732b76f703d6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -767,6 +767,35 @@ struct nfs4_opendata { int cancelled; }; +static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + if (err != -EINVAL) + return false; + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + return false; + server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; + exception->retry = 1; + return true; +} + +static enum open_claim_type4 +nfs4_map_atomic_open_claim(struct nfs_server *server, + enum open_claim_type4 claim) +{ + if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) + return claim; + switch (claim) { + default: + return claim; + case NFS4_OPEN_CLAIM_FH: + return NFS4_OPEN_CLAIM_NULL; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + return NFS4_OPEN_CLAIM_DELEGATE_CUR; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + return NFS4_OPEN_CLAIM_DELEGATE_PREV; + } +} static void nfs4_init_opendata_res(struct nfs4_opendata *p) { @@ -818,8 +847,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = claim; - switch (claim) { + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_DELEGATE_PREV: @@ -1326,6 +1355,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -1741,7 +1772,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s int ret; opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_NULL); + NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1759,6 +1790,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; switch (err) { default: goto out; @@ -1926,6 +1959,7 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; + enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; int status; /* Protect against reboot recovery conflicts */ @@ -1941,9 +1975,10 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; + if (dentry->d_inode) + claim = NFS4_OPEN_CLAIM_FH; opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - NFS4_OPEN_CLAIM_NULL, - GFP_KERNEL); + claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2001,6 +2036,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct rpc_cred *cred, struct nfs4_threshold **ctx_th) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; int status; @@ -2044,7 +2080,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + if (nfs4_clear_cap_atomic_open_v1(server, status, &exception)) + continue; + res = ERR_PTR(nfs4_handle_exception(server, status, &exception)); } while (exception.retry); return res; @@ -6858,7 +6896,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_ATOMIC_OPEN | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK - | NFS_CAP_STATEID_NFSV41, + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 74c9e52c9338..d8fdfdc7a8fe 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -198,5 +198,6 @@ struct nfs_server { #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) +#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #endif -- cgit v1.2.3 From 577b42327d707fbe7166aad6902c2eeee6a65015 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Apr 2013 21:38:12 -0400 Subject: NFS: Add functionality to allow waiting on all outstanding reads to complete This will later allow NFS locking code to wait for readahead to complete before releasing byte range locks. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/internal.h | 7 +++++++ fs/nfs/pagelist.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 7 +++++++ 4 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 55b840f05ab2..c1c7a9d78722 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -561,6 +561,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); + nfs_iocounter_init(&l_ctx->io_count); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 541c9ebdbc5a..91e59a39fc08 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -229,6 +229,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); +int nfs_iocounter_wait(struct nfs_io_counter *c); + +static inline void nfs_iocounter_init(struct nfs_io_counter *c) +{ + c->flags = 0; + atomic_set(&c->io_count, 0); +} /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7f0933086b36..29cfb7ade121 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -84,6 +84,55 @@ nfs_page_free(struct nfs_page *p) kmem_cache_free(nfs_page_cachep, p); } +static void +nfs_iocounter_inc(struct nfs_io_counter *c) +{ + atomic_inc(&c->io_count); +} + +static void +nfs_iocounter_dec(struct nfs_io_counter *c) +{ + if (atomic_dec_and_test(&c->io_count)) { + clear_bit(NFS_IO_INPROGRESS, &c->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&c->flags, NFS_IO_INPROGRESS); + } +} + +static int +__nfs_iocounter_wait(struct nfs_io_counter *c) +{ + wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); + DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); + int ret = 0; + + do { + prepare_to_wait(wq, &q.wait, TASK_KILLABLE); + set_bit(NFS_IO_INPROGRESS, &c->flags); + if (atomic_read(&c->io_count) == 0) + break; + ret = nfs_wait_bit_killable(&c->flags); + } while (atomic_read(&c->io_count) != 0); + finish_wait(wq, &q.wait); + return ret; +} + +/** + * nfs_iocounter_wait - wait for i/o to complete + * @c: nfs_io_counter to use + * + * returns -ERESTARTSYS if interrupted by a fatal signal. + * Otherwise returns 0 once the io_count hits 0. + */ +int +nfs_iocounter_wait(struct nfs_io_counter *c) +{ + if (atomic_read(&c->io_count) == 0) + return 0; + return __nfs_iocounter_wait(c); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use @@ -118,6 +167,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_CAST(l_ctx); } req->wb_lock_context = l_ctx; + nfs_iocounter_inc(&l_ctx->io_count); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in @@ -177,6 +227,7 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { + nfs_iocounter_dec(&l_ctx->io_count); nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f6b1956f3c86..fc01d5cb4cf1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -59,11 +59,18 @@ struct nfs_lockowner { pid_t l_pid; }; +#define NFS_IO_INPROGRESS 0 +struct nfs_io_counter { + unsigned long flags; + atomic_t io_count; +}; + struct nfs_lock_context { atomic_t count; struct list_head list; struct nfs_open_context *open_context; struct nfs_lockowner lockowner; + struct nfs_io_counter io_count; }; struct nfs4_state; -- cgit v1.2.3 From ba60eb25ff6be6f8e60488cdfd454e5c612bce60 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 10:49:37 -0400 Subject: SUNRPC: Fix a livelock problem in the xprt->backlog queue This patch ensures that we throttle new RPC requests if there are requests already waiting in the xprt->backlog queue. The reason for doing this is to fix livelock issues that can occur when an existing (high priority) task is waiting in the backlog queue, gets woken up by xprt_free_slot(), but a new task then steals the slot. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/clnt.c | 17 ++++++++++++- net/sunrpc/xprt.c | 61 ++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 76 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 30834be03011..12815f66a9c4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -279,6 +279,7 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); @@ -334,6 +335,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) +#define XPRT_CONGESTED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b95a0a2d5eea..a80ee9b80dcf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1306,6 +1306,8 @@ call_reserve(struct rpc_task *task) xprt_reserve(task); } +static void call_retry_reserve(struct rpc_task *task); + /* * 1b. Grok the result of xprt_reserve() */ @@ -1347,7 +1349,7 @@ call_reserveresult(struct rpc_task *task) case -ENOMEM: rpc_delay(task, HZ >> 2); case -EAGAIN: /* woken up; retry */ - task->tk_action = call_reserve; + task->tk_action = call_retry_reserve; return; case -EIO: /* probably a shutdown */ break; @@ -1359,6 +1361,19 @@ call_reserveresult(struct rpc_task *task) rpc_exit(task, status); } +/* + * 1c. Retry reserving an RPC call slot + */ +static void +call_retry_reserve(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_reserveresult; + xprt_retry_reserve(task); +} + /* * 2. Bind and/or refresh the credentials */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b7478d5e7ffd..745fca3cfd36 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +{ + set_bit(XPRT_CONGESTED, &xprt->state); + rpc_sleep_on(&xprt->backlog, task, NULL); +} + +static void xprt_wake_up_backlog(struct rpc_xprt *xprt) +{ + if (rpc_wake_up_next(&xprt->backlog) == NULL) + clear_bit(XPRT_CONGESTED, &xprt->state); +} + +static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) +{ + bool ret = false; + + if (!test_bit(XPRT_CONGESTED, &xprt->state)) + goto out; + spin_lock(&xprt->reserve_lock); + if (test_bit(XPRT_CONGESTED, &xprt->state)) { + rpc_sleep_on(&xprt->backlog, task, NULL); + ret = true; + } + spin_unlock(&xprt->reserve_lock); +out: + return ret; +} + static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); @@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) task->tk_status = -ENOMEM; break; case -EAGAIN: - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); dprintk("RPC: waiting for request slot\n"); default: task->tk_status = -EAGAIN; @@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) memset(req, 0, sizeof(*req)); /* mark unused */ list_add(&req->rq_list, &xprt->free); } - rpc_wake_up_next(&xprt->backlog); + xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } @@ -1092,13 +1120,40 @@ EXPORT_SYMBOL_GPL(xprt_free); * xprt_reserve - allocate an RPC request slot * @task: RPC task requesting a slot allocation * - * If no more slots are available, place the task on the transport's + * If the transport is marked as being congested, or if no more + * slots are available, place the task on the transport's * backlog queue. */ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + rcu_read_lock(); + xprt = rcu_dereference(task->tk_client->cl_xprt); + if (!xprt_throttle_congested(xprt, task)) + xprt->ops->alloc_slot(xprt, task); + rcu_read_unlock(); +} + +/** + * xprt_retry_reserve - allocate an RPC request slot + * @task: RPC task requesting a slot allocation + * + * If no more slots are available, place the task on the transport's + * backlog queue. + * Note that the only difference with xprt_reserve is that we now + * ignore the value of the XPRT_CONGESTED flag. + */ +void xprt_retry_reserve(struct rpc_task *task) +{ + struct rpc_xprt *xprt; + task->tk_status = 0; if (task->tk_rqstp != NULL) return; -- cgit v1.2.3 From b7993cebb841b0da7a33e9d5ce301a9fd3209165 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 11:42:00 -0400 Subject: SUNRPC: Allow rpc_create() to request that TCP slots be unlimited This is mainly for use by NFSv4.1, where the session negotiation ultimately wants to decide how many RPC slots we can fill. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 3 +++ net/sunrpc/clnt.c | 2 ++ net/sunrpc/xprtsock.c | 6 +++++- 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2cf4ffaa3cd4..e7d492ce7c18 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,6 +124,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) #define RPC_CLNT_CREATE_QUIET (1UL << 6) +#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 12815f66a9c4..ff5392421cb2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -255,6 +255,8 @@ static inline int bc_prealloc(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +#define XPRT_CREATE_INFINITE_SLOTS (1U) + struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ struct net * net; @@ -263,6 +265,7 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + unsigned int flags; }; struct xprt_class { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a80ee9b80dcf..651245aa829a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) + xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; /* * If the caller chooses not to specify a hostname, whip * up a string representation of the passed-in address. diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3081620cb02c..726e702b7a29 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2762,9 +2762,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct rpc_xprt *xprt; struct sock_xprt *transport; struct rpc_xprt *ret; + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; + + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, - xprt_max_tcp_slot_table_entries); + max_slot_table_size); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); -- cgit v1.2.3 From 98f98cf571b7b1f34683455a61dae9f35e7222a1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Apr 2013 11:49:51 -0400 Subject: NFSv4.1: Set the RPC_CLNT_CREATE_INFINITE_SLOTS flag for NFSv4.1 transports This ensures that the RPC layer doesn't override the NFS session negotiation. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/nfs4client.c | 2 ++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 84d8eae203a7..c513b0cc835f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -593,6 +593,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, args.flags |= RPC_CLNT_CREATE_DISCRTRY; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (!IS_ERR(clp->cl_rpcclient)) return 0; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 17b34b2da2df..f4d4d4ec6bf7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -198,6 +198,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; + if (clp->cl_minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, authflavour); if (error < 0) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d8fdfdc7a8fe..3b7fa2abecca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -40,6 +40,7 @@ struct nfs_client { #define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ #define NFS_CS_MIGRATION 2 /* - transparent state migr */ +#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From 549b19cc9f31e8fdda317625d564bac0052a3328 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Apr 2013 18:42:34 -0400 Subject: NFSv4: Record the OPEN create mode used in the nfs4_opendata structure If we're doing NFSv4.1 against a server that has persistent sessions, then we should not need to call SETATTR in order to reset the file attributes immediately after doing an exclusive create. Note that since the create mode depends on the type of session that has been negotiated with the server, we should not choose the mode until after we've got a session slot. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 ++++++++++++++-- fs/nfs/nfs4xdr.c | 37 ++++++++++++++++--------------------- include/linux/nfs_xdr.h | 1 + 3 files changed, 31 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af05df3c7c79..282d9fa6994a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1525,6 +1525,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; + struct nfs_client *clp = sp->so_server->nfs_client; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) goto out_wait; @@ -1545,7 +1546,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update client id. */ - data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; + data->o_arg.clientid = clp->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; @@ -1557,6 +1558,16 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) &data->o_res.seq_res, task) != 0) nfs_release_seqid(data->o_arg.seqid); + + /* Set the create mode (note dependency on the session type) */ + data->o_arg.createmode = NFS4_CREATE_UNCHECKED; + if (data->o_arg.open_flags & O_EXCL) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_GUARDED; + else if (clp->cl_mvops->minor_version > 0) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + } return; unlock_no_action: rcu_read_unlock(); @@ -2000,7 +2011,8 @@ static int _nfs4_do_open(struct inode *dir, if (status != 0) goto err_opendata_put; - if (opendata->o_arg.open_flags & O_EXCL) { + if ((opendata->o_arg.open_flags & O_EXCL) && + (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); nfs_fattr_init(opendata->o_res.f_attr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b744895b9e1..fef71cbec501 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1366,33 +1366,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) { + struct iattr dummy; __be32 *p; - struct nfs_client *clp; p = reserve_space(xdr, 4); - switch(arg->open_flags & O_EXCL) { - case 0: + switch(arg->createmode) { + case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); encode_attrs(xdr, arg->u.attrs, arg->server); break; - default: - clp = arg->server->nfs_client; - if (clp->cl_mvops->minor_version > 0) { - if (nfs4_has_persistent_session(clp)) { - *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->server); - } else { - struct iattr dummy; - - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); - encode_nfs4_verifier(xdr, &arg->u.verifier); - dummy.ia_valid = 0; - encode_attrs(xdr, &dummy, arg->server); - } - } else { - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); - } + case NFS4_CREATE_GUARDED: + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + case NFS4_CREATE_EXCLUSIVE: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->server); } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 90a4aa190b43..bdc100f66dfb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -349,6 +349,7 @@ struct nfs_openargs { const u32 * bitmask; const u32 * open_bitmap; __u32 claim; + enum createmode4 createmode; }; struct nfs_openres { -- cgit v1.2.3