diff options
Diffstat (limited to 'fs/userfaultfd.c')
| -rw-r--r-- | fs/userfaultfd.c | 66 | 
1 files changed, 56 insertions, 10 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ef4b48d1ea42..1c713fd5b3e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,  			break;  		if (ACCESS_ONCE(ctx->released) ||  		    fatal_signal_pending(current)) { +			/* +			 * &ewq->wq may be queued in fork_event, but +			 * __remove_wait_queue ignores the head +			 * parameter. It would be a problem if it +			 * didn't. +			 */  			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);  			if (ewq->msg.event == UFFD_EVENT_FORK) {  				struct userfaultfd_ctx *new; @@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,  					(unsigned long)  					uwq->msg.arg.reserved.reserved1;  				list_move(&uwq->wq.entry, &fork_event); +				/* +				 * fork_nctx can be freed as soon as +				 * we drop the lock, unless we take a +				 * reference on it. +				 */ +				userfaultfd_ctx_get(fork_nctx);  				spin_unlock(&ctx->event_wqh.lock);  				ret = 0;  				break; @@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,  	if (!ret && msg->event == UFFD_EVENT_FORK) {  		ret = resolve_userfault_fork(ctx, fork_nctx, msg); +		spin_lock(&ctx->event_wqh.lock); +		if (!list_empty(&fork_event)) { +			/* +			 * The fork thread didn't abort, so we can +			 * drop the temporary refcount. +			 */ +			userfaultfd_ctx_put(fork_nctx); + +			uwq = list_first_entry(&fork_event, +					       typeof(*uwq), +					       wq.entry); +			/* +			 * If fork_event list wasn't empty and in turn +			 * the event wasn't already released by fork +			 * (the event is allocated on fork kernel +			 * stack), put the event back to its place in +			 * the event_wq. fork_event head will be freed +			 * as soon as we return so the event cannot +			 * stay queued there no matter the current +			 * "ret" value. +			 */ +			list_del(&uwq->wq.entry); +			__add_wait_queue(&ctx->event_wqh, &uwq->wq); -		if (!ret) { -			spin_lock(&ctx->event_wqh.lock); -			if (!list_empty(&fork_event)) { -				uwq = list_first_entry(&fork_event, -						       typeof(*uwq), -						       wq.entry); -				list_del(&uwq->wq.entry); -				__add_wait_queue(&ctx->event_wqh, &uwq->wq); +			/* +			 * Leave the event in the waitqueue and report +			 * error to userland if we failed to resolve +			 * the userfault fork. +			 */ +			if (likely(!ret))  				userfaultfd_event_complete(ctx, uwq); -			} -			spin_unlock(&ctx->event_wqh.lock); +		} else { +			/* +			 * Here the fork thread aborted and the +			 * refcount from the fork thread on fork_nctx +			 * has already been released. We still hold +			 * the reference we took before releasing the +			 * lock above. If resolve_userfault_fork +			 * failed we've to drop it because the +			 * fork_nctx has to be freed in such case. If +			 * it succeeded we'll hold it because the new +			 * uffd references it. +			 */ +			if (ret) +				userfaultfd_ctx_put(fork_nctx);  		} +		spin_unlock(&ctx->event_wqh.lock);  	}  	return ret;  | 
