diff options
| author | Radim Krčmář <rkrcmar@redhat.com> | 2018-02-01 15:04:17 +0100 | 
|---|---|---|
| committer | Radim Krčmář <rkrcmar@redhat.com> | 2018-02-01 15:04:17 +0100 | 
| commit | 7bf14c28ee776be567855bd39ed8ff795ea19f55 (patch) | |
| tree | 6113748c673e85fccc2c56c050697789c00c6bc2 /fs/btrfs/super.c | |
| parent | 87cedc6be55954c6efd6eca2e694132513f65a2a (diff) | |
| parent | 5fa4ec9cb2e6679e2f828033726f758ea314b9c5 (diff) | |
Merge branch 'x86/hyperv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Topic branch for stable KVM clockource under Hyper-V.
Thanks to Christoffer Dall for resolving the ARM conflict.
Diffstat (limited to 'fs/btrfs/super.c')
| -rw-r--r-- | fs/btrfs/super.c | 348 | 
1 files changed, 187 insertions, 161 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3a4dce153645..6e71a2a78363 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,12 +61,21 @@  #include "tests/btrfs-tests.h"  #include "qgroup.h" -#include "backref.h"  #define CREATE_TRACE_POINTS  #include <trace/events/btrfs.h>  static const struct super_operations btrfs_super_ops; + +/* + * Types for mounting the default subvolume and a subvolume explicitly + * requested by subvol=/path. That way the callchain is straightforward and we + * don't have to play tricks with the mount options and recursive calls to + * btrfs_mount. + * + * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. + */  static struct file_system_type btrfs_fs_type; +static struct file_system_type btrfs_root_fs_type;  static int btrfs_remount(struct super_block *sb, int *flags, char *data); @@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno)  	return errstr;  } -/* btrfs handle error by forcing the filesystem readonly */ -static void btrfs_handle_error(struct btrfs_fs_info *fs_info) -{ -	struct super_block *sb = fs_info->sb; - -	if (sb_rdonly(sb)) -		return; - -	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { -		sb->s_flags |= SB_RDONLY; -		btrfs_info(fs_info, "forced readonly"); -		/* -		 * Note that a running device replace operation is not -		 * canceled here although there is no way to update -		 * the progress. It would add the risk of a deadlock, -		 * therefore the canceling is omitted. The only penalty -		 * is that some I/O remains active until the procedure -		 * completes. The next time when the filesystem is -		 * mounted writeable again, the device replace -		 * operation continues. -		 */ -	} -} -  /*   * __btrfs_handle_fs_error decodes expected errors from the caller and   * invokes the approciate error response. @@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function  	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);  	/* Don't go through full error handling during mount */ -	if (sb->s_flags & SB_BORN) -		btrfs_handle_error(fs_info); +	if (!(sb->s_flags & SB_BORN)) +		return; + +	if (sb_rdonly(sb)) +		return; + +	/* btrfs handle error by forcing the filesystem readonly */ +	sb->s_flags |= SB_RDONLY; +	btrfs_info(fs_info, "forced readonly"); +	/* +	 * Note that a running device replace operation is not canceled here +	 * although there is no way to update the progress. It would add the +	 * risk of a deadlock, therefore the canceling is omitted. The only +	 * penalty is that some I/O remains active until the procedure +	 * completes. The next time when the filesystem is mounted writeable +	 * again, the device replace operation continues. +	 */  }  #ifdef CONFIG_PRINTK @@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  			unsigned long new_flags)  {  	substring_t args[MAX_OPT_ARGS]; -	char *p, *num, *orig = NULL; +	char *p, *num;  	u64 cache_gen;  	int intarg;  	int ret = 0; @@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  	if (!options)  		goto check; -	/* -	 * strsep changes the string, duplicate it because parse_options -	 * gets called twice -	 */ -	options = kstrdup(options, GFP_KERNEL); -	if (!options) -		return -ENOMEM; - -	orig = options; -  	while ((p = strsep(&options, ",")) != NULL) {  		int token;  		if (!*p) @@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,  		case Opt_subvolrootid:  		case Opt_device:  			/* -			 * These are parsed by btrfs_parse_early_options +			 * These are parsed by btrfs_parse_subvol_options +			 * and btrfs_parse_early_options  			 * and can be happily ignored here.  			 */  			break; @@ -877,7 +868,6 @@ out:  		btrfs_info(info, "disk space caching is enabled");  	if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))  		btrfs_info(info, "using free space tree"); -	kfree(orig);  	return ret;  } @@ -888,11 +878,60 @@ out:   * only when we need to allocate a new super block.   */  static int btrfs_parse_early_options(const char *options, fmode_t flags, -		void *holder, char **subvol_name, u64 *subvol_objectid, -		struct btrfs_fs_devices **fs_devices) +		void *holder, struct btrfs_fs_devices **fs_devices)  {  	substring_t args[MAX_OPT_ARGS];  	char *device_name, *opts, *orig, *p; +	int error = 0; + +	if (!options) +		return 0; + +	/* +	 * strsep changes the string, duplicate it because btrfs_parse_options +	 * gets called later +	 */ +	opts = kstrdup(options, GFP_KERNEL); +	if (!opts) +		return -ENOMEM; +	orig = opts; + +	while ((p = strsep(&opts, ",")) != NULL) { +		int token; + +		if (!*p) +			continue; + +		token = match_token(p, tokens, args); +		if (token == Opt_device) { +			device_name = match_strdup(&args[0]); +			if (!device_name) { +				error = -ENOMEM; +				goto out; +			} +			error = btrfs_scan_one_device(device_name, +					flags, holder, fs_devices); +			kfree(device_name); +			if (error) +				goto out; +		} +	} + +out: +	kfree(orig); +	return error; +} + +/* + * Parse mount options that are related to subvolume id + * + * The value is later passed to mount_subvol() + */ +static int btrfs_parse_subvol_options(const char *options, fmode_t flags, +		char **subvol_name, u64 *subvol_objectid) +{ +	substring_t args[MAX_OPT_ARGS]; +	char *opts, *orig, *p;  	char *num = NULL;  	int error = 0; @@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,  		return 0;  	/* -	 * strsep changes the string, duplicate it because parse_options -	 * gets called twice +	 * strsep changes the string, duplicate it because +	 * btrfs_parse_early_options gets called later  	 */  	opts = kstrdup(options, GFP_KERNEL);  	if (!opts) @@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,  		case Opt_subvolrootid:  			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");  			break; -		case Opt_device: -			device_name = match_strdup(&args[0]); -			if (!device_name) { -				error = -ENOMEM; -				goto out; -			} -			error = btrfs_scan_one_device(device_name, -					flags, holder, fs_devices); -			kfree(device_name); -			if (error) -				goto out; -			break;  		default:  			break;  		} @@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)  static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)  {  	struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); -	char *compress_type; +	const char *compress_type;  	if (btrfs_test_opt(info, DEGRADED))  		seq_puts(seq, ",degraded"); @@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)  					     num_online_cpus() + 2, 8))  		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);  	if (btrfs_test_opt(info, COMPRESS)) { -		if (info->compress_type == BTRFS_COMPRESS_ZLIB) -			compress_type = "zlib"; -		else if (info->compress_type == BTRFS_COMPRESS_LZO) -			compress_type = "lzo"; -		else -			compress_type = "zstd"; +		compress_type = btrfs_compress_type2str(info->compress_type);  		if (btrfs_test_opt(info, FORCE_COMPRESS))  			seq_printf(seq, ",compress-force=%s", compress_type);  		else @@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode)  	return 0;  } -/* - * This will add subvolid=0 to the argument string while removing any subvol= - * and subvolid= arguments to make sure we get the top-level root for path - * walking to the subvol we want. - */ -static char *setup_root_args(char *args) -{ -	char *buf, *dst, *sep; - -	if (!args) -		return kstrdup("subvolid=0", GFP_KERNEL); - -	/* The worst case is that we add ",subvolid=0" to the end. */ -	buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, -			GFP_KERNEL); -	if (!buf) -		return NULL; - -	while (1) { -		sep = strchrnul(args, ','); -		if (!strstarts(args, "subvol=") && -		    !strstarts(args, "subvolid=")) { -			memcpy(dst, args, sep - args); -			dst += sep - args; -			*dst++ = ','; -		} -		if (*sep) -			args = sep + 1; -		else -			break; -	} -	strcpy(dst, "subvolid=0"); - -	return buf; -} -  static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, -				   int flags, const char *device_name, -				   char *data) +				   const char *device_name, struct vfsmount *mnt)  {  	struct dentry *root; -	struct vfsmount *mnt = NULL; -	char *newargs;  	int ret; -	newargs = setup_root_args(data); -	if (!newargs) { -		root = ERR_PTR(-ENOMEM); -		goto out; -	} - -	mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); -	if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { -		if (flags & SB_RDONLY) { -			mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY, -					     device_name, newargs); -		} else { -			mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY, -					     device_name, newargs); -			if (IS_ERR(mnt)) { -				root = ERR_CAST(mnt); -				mnt = NULL; -				goto out; -			} - -			down_write(&mnt->mnt_sb->s_umount); -			ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); -			up_write(&mnt->mnt_sb->s_umount); -			if (ret < 0) { -				root = ERR_PTR(ret); -				goto out; -			} -		} -	} -	if (IS_ERR(mnt)) { -		root = ERR_CAST(mnt); -		mnt = NULL; -		goto out; -	} -  	if (!subvol_name) {  		if (!subvol_objectid) {  			ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), @@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,  out:  	mntput(mnt); -	kfree(newargs);  	kfree(subvol_name);  	return root;  } @@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,  /*   * Find a superblock for the given device / mount point.   * - * Note:  This is based on get_sb_bdev from fs/super.c with a few additions - *	  for multiple device setup.  Make sure to keep it in sync. + * Note: This is based on mount_bdev from fs/super.c with a few additions + *       for multiple device setup.  Make sure to keep it in sync.   */ -static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, -		const char *device_name, void *data) +static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, +		int flags, const char *device_name, void *data)  {  	struct block_device *bdev = NULL;  	struct super_block *s; @@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,  	struct btrfs_fs_info *fs_info = NULL;  	struct security_mnt_opts new_sec_opts;  	fmode_t mode = FMODE_READ; -	char *subvol_name = NULL; -	u64 subvol_objectid = 0;  	int error = 0;  	if (!(flags & SB_RDONLY))  		mode |= FMODE_WRITE;  	error = btrfs_parse_early_options(data, mode, fs_type, -					  &subvol_name, &subvol_objectid,  					  &fs_devices);  	if (error) { -		kfree(subvol_name);  		return ERR_PTR(error);  	} -	if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { -		/* mount_subvol() will free subvol_name. */ -		return mount_subvol(subvol_name, subvol_objectid, flags, -				    device_name, data); -	} -  	security_init_mnt_opts(&new_sec_opts);  	if (data) {  		error = parse_security_options(data, &new_sec_opts); @@ -1674,6 +1611,84 @@ error_sec_opts:  	return ERR_PTR(error);  } +/* + * Mount function which is called by VFS layer. + * + * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() + * which needs vfsmount* of device's root (/).  This means device's root has to + * be mounted internally in any case. + * + * Operation flow: + *   1. Parse subvol id related options for later use in mount_subvol(). + * + *   2. Mount device's root (/) by calling vfs_kern_mount(). + * + *      NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the + *      first place. In order to avoid calling btrfs_mount() again, we use + *      different file_system_type which is not registered to VFS by + *      register_filesystem() (btrfs_root_fs_type). As a result, + *      btrfs_mount_root() is called. The return value will be used by + *      mount_subtree() in mount_subvol(). + * + *   3. Call mount_subvol() to get the dentry of subvolume. Since there is + *      "btrfs subvolume set-default", mount_subvol() is called always. + */ +static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, +		const char *device_name, void *data) +{ +	struct vfsmount *mnt_root; +	struct dentry *root; +	fmode_t mode = FMODE_READ; +	char *subvol_name = NULL; +	u64 subvol_objectid = 0; +	int error = 0; + +	if (!(flags & SB_RDONLY)) +		mode |= FMODE_WRITE; + +	error = btrfs_parse_subvol_options(data, mode, +					  &subvol_name, &subvol_objectid); +	if (error) { +		kfree(subvol_name); +		return ERR_PTR(error); +	} + +	/* mount device's root (/) */ +	mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); +	if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { +		if (flags & SB_RDONLY) { +			mnt_root = vfs_kern_mount(&btrfs_root_fs_type, +				flags & ~SB_RDONLY, device_name, data); +		} else { +			mnt_root = vfs_kern_mount(&btrfs_root_fs_type, +				flags | SB_RDONLY, device_name, data); +			if (IS_ERR(mnt_root)) { +				root = ERR_CAST(mnt_root); +				goto out; +			} + +			down_write(&mnt_root->mnt_sb->s_umount); +			error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); +			up_write(&mnt_root->mnt_sb->s_umount); +			if (error < 0) { +				root = ERR_PTR(error); +				mntput(mnt_root); +				goto out; +			} +		} +	} +	if (IS_ERR(mnt_root)) { +		root = ERR_CAST(mnt_root); +		goto out; +	} + +	/* mount_subvol() will free subvol_name and mnt_root */ +	root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); + +out: +	return root; +} +  static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,  				     int new_pool_size, int old_pool_size)  { @@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  			goto restore;  		} -		if (!btrfs_check_rw_degradable(fs_info)) { +		if (!btrfs_check_rw_degradable(fs_info, NULL)) {  			btrfs_warn(fs_info,  				"too many missing devices, writeable remount is not allowed");  			ret = -EACCES; @@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,  	rcu_read_lock();  	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { -		if (!device->in_fs_metadata || !device->bdev || -		    device->is_tgtdev_for_dev_replace) +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, +						&device->dev_state) || +		    !device->bdev || +		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))  			continue;  		if (i >= nr_devices) @@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = {  	.kill_sb	= btrfs_kill_super,  	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,  }; + +static struct file_system_type btrfs_root_fs_type = { +	.owner		= THIS_MODULE, +	.name		= "btrfs", +	.mount		= btrfs_mount_root, +	.kill_sb	= btrfs_kill_super, +	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, +}; +  MODULE_ALIAS_FS("btrfs");  static int btrfs_control_open(struct inode *inode, struct file *file) @@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,  	switch (cmd) {  	case BTRFS_IOC_SCAN_DEV:  		ret = btrfs_scan_one_device(vol->name, FMODE_READ, -					    &btrfs_fs_type, &fs_devices); +					    &btrfs_root_fs_type, &fs_devices);  		break;  	case BTRFS_IOC_DEVICES_READY:  		ret = btrfs_scan_one_device(vol->name, FMODE_READ, -					    &btrfs_fs_type, &fs_devices); +					    &btrfs_root_fs_type, &fs_devices);  		if (ret)  			break;  		ret = !(fs_devices->num_devices == fs_devices->total_devices); @@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)  	while (cur_devices) {  		head = &cur_devices->devices;  		list_for_each_entry(dev, head, dev_list) { -			if (dev->missing) +			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))  				continue;  			if (!dev->name)  				continue; @@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = {  MODULE_ALIAS_MISCDEV(BTRFS_MINOR);  MODULE_ALIAS("devname:btrfs-control"); -static int btrfs_interface_init(void) +static int __init btrfs_interface_init(void)  {  	return misc_register(&btrfs_misc);  } @@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void)  	misc_deregister(&btrfs_misc);  } -static void btrfs_print_mod_info(void) +static void __init btrfs_print_mod_info(void)  {  	pr_info("Btrfs loaded, crc32c=%s"  #ifdef CONFIG_BTRFS_DEBUG  | 
