summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/tree-log.c48
-rw-r--r--fs/exfat/dir.c12
-rw-r--r--fs/exfat/fatent.c10
-rw-r--r--fs/exfat/file.c5
-rw-r--r--fs/exfat/namei.c5
-rw-r--r--fs/exfat/super.c32
-rw-r--r--fs/f2fs/checkpoint.c8
-rw-r--r--fs/f2fs/compress.c120
-rw-r--r--fs/f2fs/data.c183
-rw-r--r--fs/f2fs/debug.c21
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/extent_cache.c10
-rw-r--r--fs/f2fs/f2fs.h151
-rw-r--r--fs/f2fs/file.c107
-rw-r--r--fs/f2fs/gc.c54
-rw-r--r--fs/f2fs/gc.h5
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c84
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/node.c261
-rw-r--r--fs/f2fs/node.h77
-rw-r--r--fs/f2fs/recovery.c116
-rw-r--r--fs/f2fs/segment.c62
-rw-r--r--fs/f2fs/segment.h59
-rw-r--r--fs/f2fs/super.c2111
-rw-r--r--fs/f2fs/sysfs.c48
-rw-r--r--fs/fat/fatent.c2
-rw-r--r--fs/fat/misc.c6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/blocklayout/dev.c5
-rw-r--r--fs/nfs/blocklayout/extent_tree.c104
-rw-r--r--fs/nfs/client.c47
-rw-r--r--fs/nfs/delegation.c114
-rw-r--r--fs/nfs/delegation.h3
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/export.c11
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c26
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/fs_context.c42
-rw-r--r--fs/nfs/inode.c69
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/localio.c7
-rw-r--r--fs/nfs/mount_clnt.c68
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4client.c185
-rw-r--r--fs/nfs/nfs4file.c25
-rw-r--r--fs/nfs/nfs4getroot.c14
-rw-r--r--fs/nfs/nfs4proc.c139
-rw-r--r--fs/nfs/nfs4trace.c2
-rw-r--r--fs/nfs/nfs4trace.h168
-rw-r--r--fs/nfs/nfs4xdr.c24
-rw-r--r--fs/nfs/nfstrace.h11
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs_nfs.c14
-rw-r--r--fs/nfs/write.c8
-rw-r--r--fs/nfs_common/nfslocalio.c28
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/ocfs2/dir.c8
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/inode.c70
-rw-r--r--fs/ocfs2/move_extents.c19
-rw-r--r--fs/ocfs2/namei.c11
-rw-r--r--fs/ocfs2/stack_user.c15
-rw-r--r--fs/proc/vmcore.c29
-rw-r--r--fs/smb/client/Makefile2
-rw-r--r--fs/smb/client/cifs_debug.c24
-rw-r--r--fs/smb/client/cifsfs.c8
-rw-r--r--fs/smb/client/cifsglob.h23
-rw-r--r--fs/smb/client/cifsproto.h15
-rw-r--r--fs/smb/client/cifssmb.c4
-rw-r--r--fs/smb/client/cifstransport.c566
-rw-r--r--fs/smb/client/connect.c35
-rw-r--r--fs/smb/client/fs_context.c19
-rw-r--r--fs/smb/client/fs_context.h18
-rw-r--r--fs/smb/client/link.c13
-rw-r--r--fs/smb/client/reparse.c2
-rw-r--r--fs/smb/client/smb1ops.c19
-rw-r--r--fs/smb/client/smb2inode.c5
-rw-r--r--fs/smb/client/smb2ops.c63
-rw-r--r--fs/smb/client/smb2transport.c4
-rw-r--r--fs/smb/client/smbdirect.c465
-rw-r--r--fs/smb/client/smbdirect.h92
-rw-r--r--fs/smb/client/transport.c602
-rw-r--r--fs/smb/common/smbdirect/smbdirect_socket.h118
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/smb_common.c2
-rw-r--r--fs/smb/server/transport_rdma.c97
-rw-r--r--fs/smb/server/transport_tcp.c17
-rw-r--r--fs/squashfs/block.c47
-rw-r--r--fs/squashfs/file.c7
90 files changed, 4109 insertions, 3131 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9f05d454b9df..2186e87fb61b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -321,8 +321,7 @@ struct walk_control {
/*
* Ignore any items from the inode currently being processed. Needs
- * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in
- * the LOG_WALK_REPLAY_INODES stage.
+ * to be set every time we find a BTRFS_INODE_ITEM_KEY.
*/
bool ignore_cur_inode;
@@ -2465,23 +2464,30 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
- btrfs_item_key_to_cpu(eb, &key, i);
+ struct btrfs_inode_item *inode_item;
- /* inode keys are done during the first stage */
- if (key.type == BTRFS_INODE_ITEM_KEY &&
- wc->stage == LOG_WALK_REPLAY_INODES) {
- struct btrfs_inode_item *inode_item;
- u32 mode;
+ btrfs_item_key_to_cpu(eb, &key, i);
- inode_item = btrfs_item_ptr(eb, i,
- struct btrfs_inode_item);
+ if (key.type == BTRFS_INODE_ITEM_KEY) {
+ inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item);
/*
- * If we have a tmpfile (O_TMPFILE) that got fsync'ed
- * and never got linked before the fsync, skip it, as
- * replaying it is pointless since it would be deleted
- * later. We skip logging tmpfiles, but it's always
- * possible we are replaying a log created with a kernel
- * that used to log tmpfiles.
+ * An inode with no links is either:
+ *
+ * 1) A tmpfile (O_TMPFILE) that got fsync'ed and never
+ * got linked before the fsync, skip it, as replaying
+ * it is pointless since it would be deleted later.
+ * We skip logging tmpfiles, but it's always possible
+ * we are replaying a log created with a kernel that
+ * used to log tmpfiles;
+ *
+ * 2) A non-tmpfile which got its last link deleted
+ * while holding an open fd on it and later got
+ * fsynced through that fd. We always log the
+ * parent inodes when inode->last_unlink_trans is
+ * set to the current transaction, so ignore all the
+ * inode items for this inode. We will delete the
+ * inode when processing the parent directory with
+ * replay_dir_deletes().
*/
if (btrfs_inode_nlink(eb, inode_item) == 0) {
wc->ignore_cur_inode = true;
@@ -2489,8 +2495,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
} else {
wc->ignore_cur_inode = false;
}
- ret = replay_xattr_deletes(wc->trans, root, log,
- path, key.objectid);
+ }
+
+ /* Inode keys are done during the first stage. */
+ if (key.type == BTRFS_INODE_ITEM_KEY &&
+ wc->stage == LOG_WALK_REPLAY_INODES) {
+ u32 mode;
+
+ ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid);
if (ret)
break;
mode = btrfs_inode_mode(eb, inode_item);
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 3103b932b674..ee060e26f51d 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -996,6 +996,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
struct exfat_hint_femp candi_empty;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
int num_entries = exfat_calc_num_entries(p_uniname);
+ unsigned int clu_count = 0;
if (num_entries < 0)
return num_entries;
@@ -1133,6 +1134,10 @@ rewind:
} else {
if (exfat_get_next_cluster(sb, &clu.dir))
return -EIO;
+
+ /* break if the cluster chain includes a loop */
+ if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi)))
+ goto not_found;
}
}
@@ -1195,6 +1200,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir)
int i, count = 0;
int dentries_per_clu;
unsigned int entry_type;
+ unsigned int clu_count = 0;
struct exfat_chain clu;
struct exfat_dentry *ep;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -1227,6 +1233,12 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir)
} else {
if (exfat_get_next_cluster(sb, &(clu.dir)))
return -EIO;
+
+ if (unlikely(++clu_count > sbi->used_clusters)) {
+ exfat_fs_error(sb, "FAT or bitmap is corrupted");
+ return -EIO;
+ }
+
}
}
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index 23065f948ae7..232cc7f8ab92 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -490,5 +490,15 @@ int exfat_count_num_clusters(struct super_block *sb,
}
*ret_count = count;
+
+ /*
+ * since exfat_count_used_clusters() is not called, sbi->used_clusters
+ * cannot be used here.
+ */
+ if (unlikely(i == sbi->num_clusters && clu != EXFAT_EOF_CLUSTER)) {
+ exfat_fs_error(sb, "The cluster chain has a loop");
+ return -EIO;
+ }
+
return 0;
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 6b82497572b4..538d2b6ac2ec 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -622,9 +622,8 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (pos > valid_size)
pos = valid_size;
- if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) {
- ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1,
- iocb->ki_flags & IOCB_SYNC);
+ if (iocb->ki_pos > pos) {
+ ssize_t err = generic_write_sync(iocb, iocb->ki_pos - pos);
if (err < 0)
return err;
}
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index fede0283d6e2..f5f1c4e8a29f 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -890,6 +890,7 @@ static int exfat_check_dir_empty(struct super_block *sb,
{
int i, dentries_per_clu;
unsigned int type;
+ unsigned int clu_count = 0;
struct exfat_chain clu;
struct exfat_dentry *ep;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -926,6 +927,10 @@ static int exfat_check_dir_empty(struct super_block *sb,
} else {
if (exfat_get_next_cluster(sb, &(clu.dir)))
return -EIO;
+
+ /* break if the cluster chain includes a loop */
+ if (unlikely(++clu_count > EXFAT_DATA_CLUSTER_COUNT(sbi)))
+ break;
}
}
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index ea5c1334a214..8926e63f5bb7 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -341,13 +341,12 @@ static void exfat_hash_init(struct super_block *sb)
INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
}
-static int exfat_read_root(struct inode *inode)
+static int exfat_read_root(struct inode *inode, struct exfat_chain *root_clu)
{
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_inode_info *ei = EXFAT_I(inode);
- struct exfat_chain cdir;
- int num_subdirs, num_clu = 0;
+ int num_subdirs;
exfat_chain_set(&ei->dir, sbi->root_dir, 0, ALLOC_FAT_CHAIN);
ei->entry = -1;
@@ -360,12 +359,9 @@ static int exfat_read_root(struct inode *inode)
ei->hint_stat.clu = sbi->root_dir;
ei->hint_femp.eidx = EXFAT_HINT_NONE;
- exfat_chain_set(&cdir, sbi->root_dir, 0, ALLOC_FAT_CHAIN);
- if (exfat_count_num_clusters(sb, &cdir, &num_clu))
- return -EIO;
- i_size_write(inode, num_clu << sbi->cluster_size_bits);
+ i_size_write(inode, EXFAT_CLU_TO_B(root_clu->size, sbi));
- num_subdirs = exfat_count_dir_entries(sb, &cdir);
+ num_subdirs = exfat_count_dir_entries(sb, root_clu);
if (num_subdirs < 0)
return -EIO;
set_nlink(inode, num_subdirs + EXFAT_MIN_SUBDIR);
@@ -578,7 +574,8 @@ static int exfat_verify_boot_region(struct super_block *sb)
}
/* mount the file system volume */
-static int __exfat_fill_super(struct super_block *sb)
+static int __exfat_fill_super(struct super_block *sb,
+ struct exfat_chain *root_clu)
{
int ret;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -595,6 +592,18 @@ static int __exfat_fill_super(struct super_block *sb)
goto free_bh;
}
+ /*
+ * Call exfat_count_num_cluster() before searching for up-case and
+ * bitmap directory entries to avoid infinite loop if they are missing
+ * and the cluster chain includes a loop.
+ */
+ exfat_chain_set(root_clu, sbi->root_dir, 0, ALLOC_FAT_CHAIN);
+ ret = exfat_count_num_clusters(sb, root_clu, &root_clu->size);
+ if (ret) {
+ exfat_err(sb, "failed to count the number of clusters in root");
+ goto free_bh;
+ }
+
ret = exfat_create_upcase_table(sb);
if (ret) {
exfat_err(sb, "failed to load upcase table");
@@ -627,6 +636,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
struct exfat_sb_info *sbi = sb->s_fs_info;
struct exfat_mount_options *opts = &sbi->options;
struct inode *root_inode;
+ struct exfat_chain root_clu;
int err;
if (opts->allow_utime == (unsigned short)-1)
@@ -645,7 +655,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS;
sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS;
- err = __exfat_fill_super(sb);
+ err = __exfat_fill_super(sb, &root_clu);
if (err) {
exfat_err(sb, "failed to recognize exfat type");
goto check_nls_io;
@@ -680,7 +690,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
root_inode->i_ino = EXFAT_ROOT_INO;
inode_set_iversion(root_inode, 1);
- err = exfat_read_root(root_inode);
+ err = exfat_read_root(root_inode, &root_clu);
if (err) {
exfat_err(sb, "failed to initialize root inode");
goto put_inode;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f149ec28aefd..db3831f7f2f5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -82,7 +82,7 @@ repeat:
if (folio_test_uptodate(folio))
goto out;
- fio.page = &folio->page;
+ fio.folio = folio;
err = f2fs_submit_page_bio(&fio);
if (err) {
@@ -309,7 +309,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
continue;
}
- fio.page = &folio->page;
+ fio.folio = folio;
err = f2fs_submit_page_bio(&fio);
f2fs_folio_put(folio, err ? true : false);
@@ -485,7 +485,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping,
folio_mark_uptodate(folio);
if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
return true;
}
return false;
@@ -1045,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
}
void f2fs_remove_dirty_inode(struct inode *inode)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index b3c1df93a163..5c1f47e45dab 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -23,20 +23,18 @@
static struct kmem_cache *cic_entry_slab;
static struct kmem_cache *dic_entry_slab;
-static void *page_array_alloc(struct inode *inode, int nr)
+static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int size = sizeof(struct page *) * nr;
if (likely(size <= sbi->page_array_slab_size))
return f2fs_kmem_cache_alloc(sbi->page_array_slab,
- GFP_F2FS_ZERO, false, F2FS_I_SB(inode));
+ GFP_F2FS_ZERO, false, sbi);
return f2fs_kzalloc(sbi, size, GFP_NOFS);
}
-static void page_array_free(struct inode *inode, void *pages, int nr)
+static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int size = sizeof(struct page *) * nr;
if (!pages)
@@ -73,17 +71,15 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc)
return cc->cluster_idx << cc->log_cluster_size;
}
-bool f2fs_is_compressed_page(struct page *page)
+bool f2fs_is_compressed_page(struct folio *folio)
{
- if (!PagePrivate(page))
- return false;
- if (!page_private(page))
+ if (!folio->private)
return false;
- if (page_private_nonpointer(page))
+ if (folio_test_f2fs_nonpointer(folio))
return false;
- f2fs_bug_on(F2FS_P_SB(page),
- *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
+ f2fs_bug_on(F2FS_F_SB(folio),
+ *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC);
return true;
}
@@ -149,13 +145,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
if (cc->rpages)
return 0;
- cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size);
return cc->rpages ? 0 : -ENOMEM;
}
void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
{
- page_array_free(cc->inode, cc->rpages, cc->cluster_size);
+ page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size);
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
@@ -216,13 +212,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic)
ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen,
dic->rbuf, &dic->rlen);
if (ret != LZO_E_OK) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lzo decompress failed, ret:%d", ret);
return -EIO;
}
if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lzo invalid rlen:%zu, expected:%lu",
dic->rlen, PAGE_SIZE << dic->log_cluster_size);
return -EIO;
@@ -296,13 +292,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf,
dic->clen, dic->rlen);
if (ret < 0) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lz4 decompress failed, ret:%d", ret);
return -EIO;
}
if (ret != PAGE_SIZE << dic->log_cluster_size) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lz4 invalid ret:%d, expected:%lu",
ret, PAGE_SIZE << dic->log_cluster_size);
return -EIO;
@@ -424,13 +420,13 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
workspace_size = zstd_dstream_workspace_bound(max_window_size);
- workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size);
+ workspace = f2fs_vmalloc(dic->sbi, workspace_size);
if (!workspace)
return -ENOMEM;
stream = zstd_init_dstream(max_window_size, workspace, workspace_size);
if (!stream) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s zstd_init_dstream failed", __func__);
vfree(workspace);
return -EIO;
@@ -466,14 +462,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
ret = zstd_decompress_stream(stream, &outbuf, &inbuf);
if (zstd_is_error(ret)) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s zstd_decompress_stream failed, ret: %d",
__func__, zstd_get_error_code(ret));
return -EIO;
}
if (dic->rlen != outbuf.pos) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s ZSTD invalid rlen:%zu, expected:%lu",
__func__, dic->rlen,
PAGE_SIZE << dic->log_cluster_size);
@@ -622,6 +618,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count)
static int f2fs_compress_pages(struct compress_ctx *cc)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct f2fs_inode_info *fi = F2FS_I(cc->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
@@ -642,7 +639,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
cc->valid_nr_cpages = cc->nr_cpages;
- cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
+ cc->cpages = page_array_alloc(sbi, cc->nr_cpages);
if (!cc->cpages) {
ret = -ENOMEM;
goto destroy_compress_ctx;
@@ -716,7 +713,7 @@ out_free_cpages:
if (cc->cpages[i])
f2fs_compress_free_page(cc->cpages[i]);
}
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
destroy_compress_ctx:
if (cops->destroy_compress_ctx)
@@ -734,7 +731,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+ struct f2fs_sb_info *sbi = dic->sbi;
struct f2fs_inode_info *fi = F2FS_I(dic->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
@@ -796,25 +793,27 @@ out_end_io:
f2fs_decompress_end_io(dic, ret, in_task);
}
+static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
+ struct folio *folio, nid_t ino, block_t blkaddr);
+
/*
* This is called when a page of a compressed cluster has been read from disk
* (or failed to be read from disk). It checks whether this page was the last
* page being waited on in the cluster, and if so, it decompresses the cluster
* (or in the case of a failure, cleans up without actually decompressing).
*/
-void f2fs_end_read_compressed_page(struct page *page, bool failed,
+void f2fs_end_read_compressed_page(struct folio *folio, bool failed,
block_t blkaddr, bool in_task)
{
- struct decompress_io_ctx *dic =
- (struct decompress_io_ctx *)page_private(page);
- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+ struct decompress_io_ctx *dic = folio->private;
+ struct f2fs_sb_info *sbi = dic->sbi;
dec_page_count(sbi, F2FS_RD_DATA);
if (failed)
WRITE_ONCE(dic->failed, true);
else if (blkaddr && in_task)
- f2fs_cache_compressed_page(sbi, page,
+ f2fs_cache_compressed_page(sbi, folio,
dic->inode->i_ino, blkaddr);
if (atomic_dec_and_test(&dic->remaining_pages))
@@ -1340,7 +1339,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
atomic_set(&cic->pending_pages, cc->valid_nr_cpages);
- cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ cic->rpages = page_array_alloc(sbi, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
@@ -1420,7 +1419,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
(*submitted)++;
unlock_continue:
inode_dec_dirty_pages(cc->inode);
- unlock_page(fio.page);
+ folio_unlock(fio.folio);
}
if (fio.compr_blocks)
@@ -1442,13 +1441,13 @@ unlock_continue:
spin_unlock(&fi->i_size_lock);
f2fs_put_rpages(cc);
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
f2fs_destroy_compress_ctx(cc, false);
return 0;
out_destroy_crypt:
- page_array_free(cc->inode, cic->rpages, cc->cluster_size);
+ page_array_free(sbi, cic->rpages, cc->cluster_size);
for (--i; i >= 0; i--) {
if (!cc->cpages[i])
@@ -1469,18 +1468,18 @@ out_free:
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
return -EAGAIN;
}
-void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
+void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
{
+ struct page *page = &folio->page;
struct f2fs_sb_info *sbi = bio->bi_private;
- struct compress_io_ctx *cic =
- (struct compress_io_ctx *)page_private(page);
- enum count_type type = WB_DATA_TYPE(page,
- f2fs_is_compressed_page(page));
+ struct compress_io_ctx *cic = folio->private;
+ enum count_type type = WB_DATA_TYPE(folio,
+ f2fs_is_compressed_page(folio));
int i;
if (unlikely(bio->bi_status != BLK_STS_OK))
@@ -1499,7 +1498,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
end_page_writeback(cic->rpages[i]);
}
- page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
+ page_array_free(sbi, cic->rpages, cic->nr_rpages);
kmem_cache_free(cic_entry_slab, cic);
}
@@ -1633,14 +1632,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi,
static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
bool pre_alloc)
{
- const struct f2fs_compress_ops *cops =
- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm];
int i;
- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc))
return 0;
- dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
+ dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size);
if (!dic->tpages)
return -ENOMEM;
@@ -1670,10 +1668,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
bool bypass_destroy_callback, bool pre_alloc)
{
- const struct f2fs_compress_ops *cops =
- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm];
- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc))
return;
if (!bypass_destroy_callback && cops->destroy_decompress_ctx)
@@ -1700,7 +1697,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
if (!dic)
return ERR_PTR(-ENOMEM);
- dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ dic->rpages = page_array_alloc(sbi, cc->cluster_size);
if (!dic->rpages) {
kmem_cache_free(dic_entry_slab, dic);
return ERR_PTR(-ENOMEM);
@@ -1708,6 +1705,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
+ dic->sbi = sbi;
+ dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm;
atomic_set(&dic->remaining_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
@@ -1721,7 +1720,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->rpages[i] = cc->rpages[i];
dic->nr_rpages = cc->cluster_size;
- dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
+ dic->cpages = page_array_alloc(sbi, dic->nr_cpages);
if (!dic->cpages) {
ret = -ENOMEM;
goto out_free;
@@ -1751,6 +1750,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
bool bypass_destroy_callback)
{
int i;
+ /* use sbi in dic to avoid UFA of dic->inode*/
+ struct f2fs_sb_info *sbi = dic->sbi;
f2fs_release_decomp_mem(dic, bypass_destroy_callback, true);
@@ -1762,7 +1763,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
continue;
f2fs_compress_free_page(dic->tpages[i]);
}
- page_array_free(dic->inode, dic->tpages, dic->cluster_size);
+ page_array_free(sbi, dic->tpages, dic->cluster_size);
}
if (dic->cpages) {
@@ -1771,10 +1772,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
continue;
f2fs_compress_free_page(dic->cpages[i]);
}
- page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
+ page_array_free(sbi, dic->cpages, dic->nr_cpages);
}
- page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
+ page_array_free(sbi, dic->rpages, dic->nr_rpages);
kmem_cache_free(dic_entry_slab, dic);
}
@@ -1793,8 +1794,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task)
f2fs_free_dic(dic, false);
} else {
INIT_WORK(&dic->free_work, f2fs_late_free_dic);
- queue_work(F2FS_I_SB(dic->inode)->post_read_wq,
- &dic->free_work);
+ queue_work(dic->sbi->post_read_wq, &dic->free_work);
}
}
}
@@ -1921,8 +1921,8 @@ void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1);
}
-void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
- nid_t ino, block_t blkaddr)
+static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
+ struct folio *folio, nid_t ino, block_t blkaddr)
{
struct folio *cfolio;
int ret;
@@ -1953,9 +1953,9 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
return;
}
- set_page_private_data(&cfolio->page, ino);
+ folio_set_f2fs_data(cfolio, ino);
- memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE);
+ memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE);
folio_mark_uptodate(cfolio);
f2fs_folio_put(cfolio, true);
}
@@ -2012,7 +2012,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino)
continue;
}
- if (ino != get_page_private_data(&folio->page)) {
+ if (ino != folio_get_f2fs_data(folio)) {
folio_unlock(folio);
continue;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 711ad80b38d0..7961e0ddfca3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -47,14 +47,14 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}
-bool f2fs_is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(const struct folio *folio)
{
- struct address_space *mapping = page_folio(page)->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode;
struct f2fs_sb_info *sbi;
- if (fscrypt_is_bounce_page(page))
- return page_private_gcing(fscrypt_pagecache_page(page));
+ if (fscrypt_is_bounce_folio(folio))
+ return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio));
inode = mapping->host;
sbi = F2FS_I_SB(inode);
@@ -65,7 +65,7 @@ bool f2fs_is_cp_guaranteed(struct page *page)
return true;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
- page_private_gcing(page))
+ folio_test_f2fs_gcing(folio))
return true;
return false;
}
@@ -142,9 +142,9 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
bio_for_each_folio_all(fi, bio) {
struct folio *folio = fi.folio;
- if (f2fs_is_compressed_page(&folio->page)) {
+ if (f2fs_is_compressed_page(folio)) {
if (ctx && !ctx->decompression_attempted)
- f2fs_end_read_compressed_page(&folio->page, true, 0,
+ f2fs_end_read_compressed_page(folio, true, 0,
in_task);
f2fs_put_folio_dic(folio, in_task);
continue;
@@ -181,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work)
* as those were handled separately by f2fs_end_read_compressed_page().
*/
if (may_have_compressed_pages) {
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
+ bio_for_each_folio_all(fi, bio) {
+ struct folio *folio = fi.folio;
- if (!f2fs_is_compressed_page(page) &&
- !fsverity_verify_page(page)) {
+ if (!f2fs_is_compressed_page(folio) &&
+ !fsverity_verify_page(&folio->page)) {
bio->bi_status = BLK_STS_IOERR;
break;
}
@@ -233,16 +232,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
bool in_task)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
bool all_compressed = true;
block_t blkaddr = ctx->fs_blkaddr;
- bio_for_each_segment_all(bv, ctx->bio, iter_all) {
- struct page *page = bv->bv_page;
+ bio_for_each_folio_all(fi, ctx->bio) {
+ struct folio *folio = fi.folio;
- if (f2fs_is_compressed_page(page))
- f2fs_end_read_compressed_page(page, false, blkaddr,
+ if (f2fs_is_compressed_page(folio))
+ f2fs_end_read_compressed_page(folio, false, blkaddr,
in_task);
else
all_compressed = false;
@@ -280,9 +278,9 @@ static void f2fs_post_read_work(struct work_struct *work)
static void f2fs_read_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
+ struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio));
struct bio_post_read_ctx *ctx;
- bool intask = in_task();
+ bool intask = in_task() && !irqs_disabled();
iostat_update_and_unbind_ctx(bio);
ctx = bio->bi_private;
@@ -339,13 +337,13 @@ static void f2fs_write_end_io(struct bio *bio)
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (f2fs_is_compressed_page(&folio->page)) {
- f2fs_compress_write_end_io(bio, &folio->page);
+ if (f2fs_is_compressed_page(folio)) {
+ f2fs_compress_write_end_io(bio, folio);
continue;
}
#endif
- type = WB_DATA_TYPE(&folio->page, false);
+ type = WB_DATA_TYPE(folio, false);
if (unlikely(bio->bi_status != BLK_STS_OK)) {
mapping_set_error(folio->mapping, -EIO);
@@ -355,12 +353,12 @@ static void f2fs_write_end_io(struct bio *bio)
}
f2fs_bug_on(sbi, is_node_folio(folio) &&
- folio->index != nid_of_node(&folio->page));
+ folio->index != nid_of_node(folio));
dec_page_count(sbi, type);
if (f2fs_in_warm_node_list(sbi, folio))
f2fs_del_fsync_node_entry(sbi, folio);
- clear_page_private_gcing(&folio->page);
+ folio_clear_f2fs_gcing(folio);
folio_end_writeback(folio);
}
if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
@@ -419,7 +417,6 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
{
unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
- struct folio *fio_folio = page_folio(fio->page);
unsigned int fua_flag, meta_flag, io_flag;
blk_opf_t op_flags = 0;
@@ -447,7 +444,7 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
op_flags |= REQ_FUA;
if (fio->type == DATA &&
- F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
+ F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
op_flags |= REQ_PRIO;
return op_flags;
@@ -546,14 +543,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
}
static bool __has_merged_page(struct bio *bio, struct inode *inode,
- struct page *page, nid_t ino)
+ struct folio *folio, nid_t ino)
{
struct folio_iter fi;
if (!bio)
return false;
- if (!inode && !page && !ino)
+ if (!inode && !folio && !ino)
return true;
bio_for_each_folio_all(fi, bio) {
@@ -564,7 +561,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode,
if (IS_ERR(target))
continue;
}
- if (f2fs_is_compressed_page(&target->page)) {
+ if (f2fs_is_compressed_page(target)) {
target = f2fs_compress_control_folio(target);
if (IS_ERR(target))
continue;
@@ -572,9 +569,9 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode,
if (inode && inode == target->mapping->host)
return true;
- if (page && page == &target->page)
+ if (folio && folio == target)
return true;
- if (ino && ino == ino_of_node(&target->page))
+ if (ino && ino == ino_of_node(target))
return true;
}
@@ -641,7 +638,7 @@ unlock_out:
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
@@ -653,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
f2fs_down_read(&io->io_rwsem);
- ret = __has_merged_page(io->bio, inode, page, ino);
+ ret = __has_merged_page(io->bio, inode, folio, ino);
f2fs_up_read(&io->io_rwsem);
}
if (ret)
@@ -671,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type)
{
- __submit_merged_write_cond(sbi, inode, page, ino, type, false);
+ __submit_merged_write_cond(sbi, inode, folio, ino, type, false);
}
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -691,7 +688,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio;
- struct folio *fio_folio = page_folio(fio->page);
+ struct folio *fio_folio = fio->folio;
struct folio *data_folio = fio->encrypted_page ?
page_folio(fio->encrypted_page) : fio_folio;
@@ -713,7 +710,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false));
+ __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false));
if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -779,7 +776,7 @@ static void del_bio_entry(struct bio_entry *be)
static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
struct page *page)
{
- struct folio *fio_folio = page_folio(fio->page);
+ struct folio *fio_folio = fio->folio;
struct f2fs_sb_info *sbi = fio->sbi;
enum temp_type temp;
bool found = false;
@@ -848,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
- &folio->page, 0);
+ folio, 0);
if (found)
break;
}
@@ -865,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
- &folio->page, 0);
+ folio, 0);
if (found) {
target = be->bio;
del_bio_entry(be);
@@ -886,15 +883,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
- struct page *page = fio->encrypted_page ?
- fio->encrypted_page : fio->page;
- struct folio *folio = page_folio(fio->page);
+ struct folio *data_folio = fio->encrypted_page ?
+ page_folio(fio->encrypted_page) : fio->folio;
+ struct folio *folio = fio->folio;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFSCORRUPTED;
- trace_f2fs_submit_folio_bio(page_folio(page), fio);
+ trace_f2fs_submit_folio_bio(data_folio, fio);
if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
fio->new_blkaddr))
@@ -905,16 +902,16 @@ alloc_new:
f2fs_set_bio_crypt_ctx(bio, folio->mapping->host,
folio->index, fio, GFP_NOIO);
- add_bio_entry(fio->sbi, bio, page, fio->temp);
+ add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp);
} else {
- if (add_ipu_page(fio, &bio, page))
+ if (add_ipu_page(fio, &bio, &data_folio->page))
goto alloc_new;
}
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
- inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -949,7 +946,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
- struct page *bio_page;
+ struct folio *bio_folio;
enum count_type type;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -980,44 +977,44 @@ next:
verify_fio_blkaddr(fio);
if (fio->encrypted_page)
- bio_page = fio->encrypted_page;
+ bio_folio = page_folio(fio->encrypted_page);
else if (fio->compressed_page)
- bio_page = fio->compressed_page;
+ bio_folio = page_folio(fio->compressed_page);
else
- bio_page = fio->page;
+ bio_folio = fio->folio;
/* set submitted = true as a return value */
fio->submitted = 1;
- type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ type = WB_DATA_TYPE(bio_folio, fio->compressed_page);
inc_page_count(sbi, type);
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
fio->new_blkaddr) ||
!f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio),
- page_folio(bio_page)->index, fio)))
+ bio_folio->index, fio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
io->bio = __bio_alloc(fio, BIO_MAX_VECS);
f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio),
- page_folio(bio_page)->index, fio, GFP_NOIO);
+ bio_folio->index, fio, GFP_NOIO);
io->fio = *fio;
}
- if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) {
__submit_merged_bio(io);
goto alloc_new;
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
- PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, fio->folio,
+ folio_size(fio->folio));
io->last_block_in_bio = fio->new_blkaddr;
- trace_f2fs_submit_folio_write(page_folio(fio->page), fio);
+ trace_f2fs_submit_folio_write(fio->folio, fio);
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
@@ -1553,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
unsigned int start_pgofs;
int bidx = 0;
bool is_hole;
+ bool lfs_dio_write;
if (!maxblocks)
return 0;
+ lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
+ map->m_may_create);
+
if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
goto out;
@@ -1572,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
end = pgofs + maxblocks;
next_dnode:
- if (map->m_may_create)
+ if (map->m_may_create) {
+ if (f2fs_lfs_mode(sbi))
+ f2fs_balance_fs(sbi, true);
f2fs_map_lock(sbi, flag);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1589,7 +1593,7 @@ next_dnode:
start_pgofs = pgofs;
prealloc = 0;
last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
next_block:
blkaddr = f2fs_data_blkaddr(&dn);
@@ -1603,7 +1607,7 @@ next_block:
/* use out-place-update for direct IO under LFS mode */
if (map->m_may_create && (is_hole ||
(flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
- !f2fs_is_pinned_file(inode)))) {
+ !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto sync_out;
@@ -1687,10 +1691,15 @@ next_block:
if (map->m_multidev_dio)
map->m_bdev = FDEV(bidx).bdev;
+
+ if (lfs_dio_write)
+ map->m_last_pblk = NULL_ADDR;
} else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
ofs++;
map->m_len++;
} else {
+ if (lfs_dio_write && !f2fs_is_pinned_file(inode))
+ map->m_last_pblk = blkaddr;
goto sync_out;
}
@@ -1715,14 +1724,6 @@ skip:
dn.ofs_in_node = end_offset;
}
- if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
- map->m_may_create) {
- /* the next block to be allocated may not be contiguous. */
- if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) ==
- CAP_BLKS_PER_SEC(sbi) - 1)
- goto sync_out;
- }
-
if (pgofs >= end)
goto sync_out;
else if (dn.ofs_in_node < end_offset)
@@ -2303,7 +2304,7 @@ submit_and_realloc:
}
if (!bio) {
- bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
+ bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i,
f2fs_ra_op_flags(rac),
folio->index, for_write);
if (IS_ERR(bio)) {
@@ -2376,6 +2377,14 @@ static int f2fs_mpage_readpages(struct inode *inode,
unsigned max_nr_pages = nr_pages;
int ret = 0;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ index = rac ? readahead_index(rac) : folio->index;
+ max_nr_pages = round_up(index + nr_pages, cc.cluster_size) -
+ round_down(index, cc.cluster_size);
+ }
+#endif
+
map.m_pblk = 0;
map.m_lblk = 0;
map.m_len = 0;
@@ -2642,7 +2651,7 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
{
- struct folio *folio = page_folio(fio->page);
+ struct folio *folio = fio->folio;
struct inode *inode = folio->mapping->host;
struct dnode_of_data dn;
struct node_info ni;
@@ -2652,7 +2661,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* Use COW inode to make dnode_of_data for atomic write */
atomic_commit = f2fs_is_atomic_file(inode) &&
- page_private_atomic(folio_page(folio, 0));
+ folio_test_f2fs_atomic(folio);
if (atomic_commit)
set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
else
@@ -2683,7 +2692,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* This page is already truncated */
if (fio->old_blkaddr == NULL_ADDR) {
folio_clear_uptodate(folio);
- clear_page_private_gcing(folio_page(folio, 0));
+ folio_clear_f2fs_gcing(folio);
goto out_writepage;
}
got_it:
@@ -2753,7 +2762,7 @@ got_it:
trace_f2fs_do_write_data_page(folio, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (atomic_commit)
- clear_page_private_atomic(folio_page(folio, 0));
+ folio_clear_f2fs_atomic(folio);
out_writepage:
f2fs_put_dnode(&dn);
out:
@@ -2771,7 +2780,6 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted,
bool allow_balance)
{
struct inode *inode = folio->mapping->host;
- struct page *page = folio_page(folio, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long)i_size)
@@ -2788,7 +2796,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
.old_blkaddr = NULL_ADDR,
- .page = page,
+ .folio = folio,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
@@ -2890,7 +2898,7 @@ out:
inode_dec_dirty_pages(inode);
if (err) {
folio_clear_uptodate(folio);
- clear_page_private_gcing(page);
+ folio_clear_f2fs_gcing(folio);
}
folio_unlock(folio);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
@@ -3376,7 +3384,7 @@ restart:
f2fs_do_read_inline_data(folio, ifolio);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
- set_page_private_inline(&ifolio->page);
+ folio_set_f2fs_inline(ifolio);
goto out;
}
err = f2fs_convert_inline_folio(&dn, folio);
@@ -3698,7 +3706,7 @@ static int f2fs_write_end(const struct kiocb *iocb,
folio_mark_dirty(folio);
if (f2fs_is_atomic_file(inode))
- set_page_private_atomic(folio_page(folio, 0));
+ folio_set_f2fs_atomic(folio);
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
@@ -3733,7 +3741,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
f2fs_remove_dirty_inode(inode);
}
}
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
}
bool f2fs_release_folio(struct folio *folio, gfp_t wait)
@@ -3742,7 +3750,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait)
if (folio_test_dirty(folio))
return false;
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
return true;
}
@@ -4160,7 +4168,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap,
struct iomap *srcmap)
{
- struct f2fs_map_blocks map = {};
+ struct f2fs_map_blocks map = { NULL, };
pgoff_t next_pgofs = 0;
int err;
@@ -4169,6 +4177,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
map.m_next_pgofs = &next_pgofs;
map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
+ if (flags & IOMAP_WRITE && iomap->private) {
+ map.m_last_pblk = (unsigned long)iomap->private;
+ iomap->private = NULL;
+ }
/*
* If the blocks being overwritten are already allocated,
@@ -4207,6 +4219,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->flags |= IOMAP_F_MERGED;
iomap->bdev = map.m_bdev;
iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
+
+ if (flags & IOMAP_WRITE && map.m_last_pblk)
+ iomap->private = (void *)map.m_last_pblk;
} else {
if (flags & IOMAP_WRITE)
return -ENOTBLK;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 16c2dfb4f595..43a83bbd3bc5 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,7 +21,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
+static DEFINE_SPINLOCK(f2fs_stat_lock);
#ifdef CONFIG_DEBUG_FS
static struct dentry *f2fs_debugfs_root;
#endif
@@ -91,7 +91,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi)
seg_blks = get_seg_entry(sbi, j)->valid_blocks;
/* update segment stats */
- if (IS_CURSEG(sbi, j))
+ if (is_curseg(sbi, j))
dev_stats[i].devstats[0][DEVSTAT_INUSE]++;
else if (seg_blks == BLKS_PER_SEG(sbi))
dev_stats[i].devstats[0][DEVSTAT_FULL]++;
@@ -109,7 +109,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi)
sec_blks = get_sec_entry(sbi, j)->valid_blocks;
/* update section stats */
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j)))
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j)))
dev_stats[i].devstats[1][DEVSTAT_INUSE]++;
else if (sec_blks == BLKS_PER_SEC(sbi))
dev_stats[i].devstats[1][DEVSTAT_FULL]++;
@@ -439,9 +439,8 @@ static int stat_show(struct seq_file *s, void *v)
{
struct f2fs_stat_info *si;
int i = 0, j = 0;
- unsigned long flags;
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
struct f2fs_sb_info *sbi = si->sbi;
@@ -753,7 +752,7 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
return 0;
}
@@ -765,7 +764,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
struct f2fs_dev_stats *dev_stats;
- unsigned long flags;
int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -817,9 +815,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->max_aw_cnt, 0);
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_add_tail(&si->stat_list, &f2fs_stat_list);
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
return 0;
}
@@ -827,11 +825,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned long flags;
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_del(&si->stat_list);
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
kfree(si->dev_stats);
kfree(si);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfff..fffd7749d6d1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -454,7 +454,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode,
f2fs_folio_wait_writeback(ifolio, NODE, true, true);
/* copy name info. to this inode folio */
- ri = F2FS_INODE(&ifolio->page);
+ ri = F2FS_INODE(ifolio);
ri->i_namelen = cpu_to_le32(fname->disk_name.len);
memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
if (IS_ENCRYPTED(dir)) {
@@ -897,7 +897,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio,
f2fs_clear_page_cache_dirty_tag(folio);
folio_clear_dirty_for_io(folio);
folio_clear_uptodate(folio);
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
inode_dec_dirty_pages(dir);
f2fs_remove_dirty_inode(dir);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index cfe925a3d555..199c1e7a83ef 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -19,10 +19,10 @@
#include "node.h"
#include <trace/events/f2fs.h>
-bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
+bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
+ struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
struct extent_info ei;
int devi;
@@ -411,10 +411,10 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
- struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext;
+ struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
struct extent_tree *et;
struct extent_node *en;
- struct extent_info ei;
+ struct extent_info ei = {0};
if (!__may_extent_tree(inode, EX_READ)) {
/* drop largest read extent */
@@ -934,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ
if (!__may_extent_tree(dn->inode, type))
return;
- ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) +
+ ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) +
dn->ofs_in_node;
ei.len = 1;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c78464792ceb..46be7560548c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -386,7 +386,7 @@ struct discard_cmd {
struct rb_node rb_node; /* rb node located in rb-tree */
struct discard_info di; /* discard info */
struct list_head list; /* command list */
- struct completion wait; /* compleation */
+ struct completion wait; /* completion */
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
@@ -732,6 +732,7 @@ struct f2fs_map_blocks {
block_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
+ unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
@@ -875,6 +876,7 @@ struct f2fs_inode_info {
/* linked in global inode list for cache donation */
struct list_head gdonate_list;
pgoff_t donate_start, donate_end; /* inclusive */
+ atomic_t open_count; /* # of open files */
struct task_struct *atomic_write_task; /* store atomic write task */
struct extent_tree *extent_tree[NR_EXTENT_CACHES];
@@ -1123,8 +1125,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p, f) \
- (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(folio, f) \
+ (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -1240,7 +1242,10 @@ struct f2fs_io_info {
blk_opf_t op_flags; /* req_flag_bits */
block_t new_blkaddr; /* new block address to be written */
block_t old_blkaddr; /* old block address before Cow */
- struct page *page; /* page to be written */
+ union {
+ struct page *page; /* page to be written */
+ struct folio *folio;
+ };
struct page *encrypted_page; /* encrypted page */
struct page *compressed_page; /* compressed page */
struct list_head list; /* serialize IOs */
@@ -1286,7 +1291,7 @@ struct f2fs_bio_info {
struct f2fs_dev_info {
struct file *bdev_file;
struct block_device *bdev;
- char path[MAX_PATH_LEN];
+ char path[MAX_PATH_LEN + 1];
unsigned int total_segments;
block_t start_blk;
block_t end_blk;
@@ -1427,7 +1432,7 @@ enum {
enum {
MEMORY_MODE_NORMAL, /* memory mode for normal devices */
- MEMORY_MODE_LOW, /* memory mode for low memry devices */
+ MEMORY_MODE_LOW, /* memory mode for low memory devices */
};
enum errors_option {
@@ -1491,7 +1496,7 @@ enum compress_flag {
#define COMPRESS_DATA_RESERVED_SIZE 4
struct compress_data {
__le32 clen; /* compressed data size */
- __le32 chksum; /* compressed data chksum */
+ __le32 chksum; /* compressed data checksum */
__le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */
u8 cdata[]; /* compressed data */
};
@@ -1536,6 +1541,7 @@ struct compress_io_ctx {
struct decompress_io_ctx {
u32 magic; /* magic number to indicate page is compressed */
struct inode *inode; /* inode the context belong to */
+ struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
pgoff_t cluster_idx; /* cluster index number */
unsigned int cluster_size; /* page count in cluster */
unsigned int log_cluster_size; /* log of cluster size */
@@ -1576,6 +1582,7 @@ struct decompress_io_ctx {
bool failed; /* IO error occurred before decompression? */
bool need_verity; /* need fs-verity verification after decompression? */
+ unsigned char compress_algorithm; /* backup algorithm type */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
struct work_struct verity_work; /* work to verify the decompressed pages */
@@ -1724,6 +1731,9 @@ struct f2fs_sb_info {
/* for skip statistic */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
+ /* free sections reserved for pinned file */
+ unsigned int reserved_pin_section;
+
/* threshold for gc trials on pinned files */
unsigned short gc_pin_file_threshold;
struct f2fs_rwsem pin_sem;
@@ -2013,16 +2023,11 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
return F2FS_I_SB(mapping->host);
}
-static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio)
+static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio)
{
return F2FS_M_SB(folio->mapping);
}
-static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
-{
- return F2FS_F_SB(page_folio(page));
-}
-
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
{
return (struct f2fs_super_block *)(sbi->raw_super);
@@ -2043,14 +2048,14 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
return (struct f2fs_checkpoint *)(sbi->ckpt);
}
-static inline struct f2fs_node *F2FS_NODE(const struct page *page)
+static inline struct f2fs_node *F2FS_NODE(const struct folio *folio)
{
- return (struct f2fs_node *)page_address(page);
+ return (struct f2fs_node *)folio_address(folio);
}
-static inline struct f2fs_inode *F2FS_INODE(struct page *page)
+static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio)
{
- return &((struct f2fs_node *)page_address(page))->i;
+ return &((struct f2fs_node *)folio_address(folio))->i;
}
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
@@ -2453,6 +2458,13 @@ release_quota:
}
#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
+static inline bool folio_test_f2fs_##name(const struct folio *folio) \
+{ \
+ unsigned long priv = (unsigned long)folio->private; \
+ unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \
+ (1UL << PAGE_PRIVATE_##flagname); \
+ return (priv & v) == v; \
+} \
static inline bool page_private_##name(struct page *page) \
{ \
return PagePrivate(page) && \
@@ -2461,6 +2473,17 @@ static inline bool page_private_##name(struct page *page) \
}
#define PAGE_PRIVATE_SET_FUNC(name, flagname) \
+static inline void folio_set_f2fs_##name(struct folio *folio) \
+{ \
+ unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \
+ (1UL << PAGE_PRIVATE_##flagname); \
+ if (!folio->private) \
+ folio_attach_private(folio, (void *)v); \
+ else { \
+ v |= (unsigned long)folio->private; \
+ folio->private = (void *)v; \
+ } \
+} \
static inline void set_page_private_##name(struct page *page) \
{ \
if (!PagePrivate(page)) \
@@ -2470,6 +2493,16 @@ static inline void set_page_private_##name(struct page *page) \
}
#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \
+static inline void folio_clear_f2fs_##name(struct folio *folio) \
+{ \
+ unsigned long v = (unsigned long)folio->private; \
+ \
+ v &= ~(1UL << PAGE_PRIVATE_##flagname); \
+ if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \
+ folio_detach_private(folio); \
+ else \
+ folio->private = (void *)v; \
+} \
static inline void clear_page_private_##name(struct page *page) \
{ \
clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
@@ -2492,39 +2525,23 @@ PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE);
-static inline unsigned long get_page_private_data(struct page *page)
+static inline unsigned long folio_get_f2fs_data(struct folio *folio)
{
- unsigned long data = page_private(page);
+ unsigned long data = (unsigned long)folio->private;
if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data))
return 0;
return data >> PAGE_PRIVATE_MAX;
}
-static inline void set_page_private_data(struct page *page, unsigned long data)
+static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data)
{
- if (!PagePrivate(page))
- attach_page_private(page, (void *)0);
- set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
- page_private(page) |= data << PAGE_PRIVATE_MAX;
-}
-
-static inline void clear_page_private_data(struct page *page)
-{
- page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0);
- if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER))
- detach_page_private(page);
-}
+ data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX);
-static inline void clear_page_private_all(struct page *page)
-{
- clear_page_private_data(page);
- clear_page_private_reference(page);
- clear_page_private_gcing(page);
- clear_page_private_inline(page);
- clear_page_private_atomic(page);
-
- f2fs_bug_on(F2FS_P_SB(page), page_private(page));
+ if (!folio_test_private(folio))
+ folio_attach_private(folio, (void *)data);
+ else
+ folio->private = (void *)((unsigned long)folio->private | data);
}
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
@@ -3011,9 +3028,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
-static inline bool IS_INODE(struct page *page)
+static inline bool IS_INODE(const struct folio *folio)
{
- struct f2fs_node *p = F2FS_NODE(page);
+ struct f2fs_node *p = F2FS_NODE(folio);
return RAW_IS_INODE(p);
}
@@ -3031,20 +3048,20 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
static inline int f2fs_has_extra_attr(struct inode *inode);
static inline unsigned int get_dnode_base(struct inode *inode,
- struct page *node_page)
+ struct folio *node_folio)
{
- if (!IS_INODE(node_page))
+ if (!IS_INODE(node_folio))
return 0;
return inode ? get_extra_isize(inode) :
- offset_in_addr(&F2FS_NODE(node_page)->i);
+ offset_in_addr(&F2FS_NODE(node_folio)->i);
}
static inline __le32 *get_dnode_addr(struct inode *inode,
struct folio *node_folio)
{
- return blkaddr_in_node(F2FS_NODE(&node_folio->page)) +
- get_dnode_base(inode, &node_folio->page);
+ return blkaddr_in_node(F2FS_NODE(node_folio)) +
+ get_dnode_base(inode, node_folio);
}
static inline block_t data_blkaddr(struct inode *inode,
@@ -3366,9 +3383,10 @@ static inline unsigned int addrs_per_page(struct inode *inode,
return addrs;
}
-static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio)
+static inline
+void *inline_xattr_addr(struct inode *inode, const struct folio *folio)
{
- struct f2fs_inode *ri = F2FS_INODE(&folio->page);
+ struct f2fs_inode *ri = F2FS_INODE(folio);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
get_inline_xattr_addrs(inode)]);
@@ -3628,13 +3646,14 @@ int f2fs_pin_file_control(struct inode *inode, bool inc);
*/
void f2fs_set_inode_flags(struct inode *inode);
bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio);
-void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio);
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_update_inode(struct inode *inode, struct folio *node_folio);
void f2fs_update_inode_page(struct inode *inode);
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
+void f2fs_remove_donate_inode(struct inode *inode);
void f2fs_evict_inode(struct inode *inode);
void f2fs_handle_failed_inode(struct inode *inode);
@@ -3784,8 +3803,8 @@ void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio);
-int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
-int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio);
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
@@ -3852,7 +3871,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
bool recover_newaddr);
enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi,
enum log_type seg_type);
-int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
@@ -3886,7 +3905,7 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi,
static inline struct inode *fio_inode(struct f2fs_io_info *fio)
{
- return page_folio(fio->page)->mapping->host;
+ return fio->folio->mapping->host;
}
#define DEF_FRAGMENT_SIZE 4
@@ -3953,7 +3972,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
-bool f2fs_is_cp_guaranteed(struct page *page);
+bool f2fs_is_cp_guaranteed(const struct folio *folio);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
@@ -3961,7 +3980,7 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type);
void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
struct bio **bio, struct folio *folio);
@@ -4303,7 +4322,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
-bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage);
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio);
bool f2fs_may_inline_dentry(struct inode *inode);
void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio);
void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio,
@@ -4345,7 +4364,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-bool sanity_check_extent_cache(struct inode *inode, struct page *ipage);
+bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio);
void f2fs_init_extent_tree(struct inode *inode);
void f2fs_drop_extent_tree(struct inode *inode);
void f2fs_destroy_extent_node(struct inode *inode);
@@ -4435,20 +4454,20 @@ enum cluster_check_type {
CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */
CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */
};
-bool f2fs_is_compressed_page(struct page *page);
+bool f2fs_is_compressed_page(struct folio *folio);
struct folio *f2fs_compress_control_folio(struct folio *folio);
int f2fs_prepare_compress_overwrite(struct inode *inode,
struct page **pagep, pgoff_t index, void **fsdata);
bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
pgoff_t index, unsigned copied);
int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
-void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
+void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio);
bool f2fs_is_compress_backend_ready(struct inode *inode);
bool f2fs_is_compress_level_valid(int alg, int lvl);
int __init f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
-void f2fs_end_read_compressed_page(struct page *page, bool failed,
+void f2fs_end_read_compressed_page(struct folio *folio, bool failed,
block_t blkaddr, bool in_task);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
@@ -4486,8 +4505,6 @@ void f2fs_destroy_compress_cache(void);
struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi);
void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
block_t blkaddr, unsigned int len);
-void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
- nid_t ino, block_t blkaddr);
bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio,
block_t blkaddr);
void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino);
@@ -4504,7 +4521,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino);
sbi->compr_saved_block += diff; \
} while (0)
#else
-static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
+static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; }
static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
{
if (!f2fs_compressed_file(inode))
@@ -4522,7 +4539,7 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic,
bool in_task) { }
-static inline void f2fs_end_read_compressed_page(struct page *page,
+static inline void f2fs_end_read_compressed_page(struct folio *folio,
bool failed, block_t blkaddr, bool in_task)
{
WARN_ON_ONCE(1);
@@ -4542,8 +4559,6 @@ static inline int __init f2fs_init_compress_cache(void) { return 0; }
static inline void f2fs_destroy_compress_cache(void) { }
static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
block_t blkaddr, unsigned int len) { }
-static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
- struct page *page, nid_t ino, block_t blkaddr) { }
static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi,
struct folio *folio, block_t blkaddr) { return false; }
static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c677230699fd..42faaed6a02d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -489,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
}
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
@@ -629,7 +629,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
- return finish_preallocate_blocks(inode);
+ err = finish_preallocate_blocks(inode);
+ if (!err)
+ atomic_inc(&F2FS_I(inode)->open_count);
+ return err;
}
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -708,7 +711,7 @@ next:
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
- fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page),
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio),
dn->inode) + ofs;
f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
f2fs_update_age_extent_cache_range(dn, fofs, len);
@@ -815,12 +818,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
goto out;
}
- count = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ count = ADDRS_PER_PAGE(dn.node_folio, inode);
count -= dn.ofs_in_node;
f2fs_bug_on(sbi, count < 0);
- if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) {
+ if (dn.ofs_in_node || IS_INODE(dn.node_folio)) {
f2fs_truncate_data_blocks_range(&dn, count);
free_from += count;
}
@@ -1043,11 +1046,24 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = setattr_prepare(idmap, dentry, attr);
+ if (err)
+ return err;
+
+ err = fscrypt_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
+ err = fsverity_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
@@ -1064,20 +1080,19 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
!IS_ALIGNED(attr->ia_size,
F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
return -EINVAL;
+ /*
+ * To prevent scattered pin block generation, we don't allow
+ * smaller/equal size unaligned truncation for pinned file.
+ * We only support overwrite IO to pinned file, so don't
+ * care about larger size truncation.
+ */
+ if (f2fs_is_pinned_file(inode) &&
+ attr->ia_size <= i_size_read(inode) &&
+ !IS_ALIGNED(attr->ia_size,
+ F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi))))
+ return -EINVAL;
}
- err = setattr_prepare(idmap, dentry, attr);
- if (err)
- return err;
-
- err = fscrypt_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
- err = fsverity_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
if (is_quota_modification(idmap, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
@@ -1085,12 +1100,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
}
if (i_uid_needs_update(idmap, attr, inode) ||
i_gid_needs_update(idmap, attr, inode)) {
- f2fs_lock_op(F2FS_I_SB(inode));
+ f2fs_lock_op(sbi);
err = dquot_transfer(idmap, inode, attr);
if (err) {
- set_sbi_flag(F2FS_I_SB(inode),
- SBI_QUOTA_NEED_REPAIR);
- f2fs_unlock_op(F2FS_I_SB(inode));
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ f2fs_unlock_op(sbi);
return err;
}
/*
@@ -1100,7 +1114,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
i_uid_update(idmap, attr, inode);
i_gid_update(idmap, attr, inode);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_unlock_op(F2FS_I_SB(inode));
+ f2fs_unlock_op(sbi);
}
if (attr->ia_valid & ATTR_SIZE) {
@@ -1163,7 +1177,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
return err;
}
@@ -1223,7 +1237,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
return err;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
@@ -1322,7 +1336,7 @@ next_dnode:
goto next;
}
- done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) -
+ done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) -
dn.ofs_in_node, len);
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = f2fs_data_blkaddr(&dn);
@@ -1411,7 +1425,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
}
ilen = min((pgoff_t)
- ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) -
+ ADDRS_PER_PAGE(dn.node_folio, dst_inode) -
dn.ofs_in_node, len - i);
do {
dn.data_blkaddr = f2fs_data_blkaddr(&dn);
@@ -1453,7 +1467,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE);
folio_mark_dirty(fdst);
- set_page_private_gcing(&fdst->page);
+ folio_set_f2fs_gcing(fdst);
f2fs_folio_put(fdst, true);
f2fs_folio_put(fsrc, true);
@@ -1707,7 +1721,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
goto out;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
end = min(pg_end, end_offset - dn.ofs_in_node + index);
ret = f2fs_do_zero_range(&dn, index, end);
@@ -1888,9 +1902,8 @@ next_alloc:
}
}
- if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ?
- ZONED_PIN_SEC_REQUIRED_COUNT :
- GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
+ if (has_not_enough_free_secs(sbi, 0,
+ sbi->reserved_pin_section)) {
f2fs_down_write(&sbi->gc_lock);
stat_inc_gc_call_count(sbi, FOREGROUND);
err = f2fs_gc(sbi, &gc_control);
@@ -2028,6 +2041,9 @@ out:
static int f2fs_release_file(struct inode *inode, struct file *filp)
{
+ if (atomic_dec_and_test(&F2FS_I(inode)->open_count))
+ f2fs_remove_donate_inode(inode);
+
/*
* f2fs_release_file is called at every close calls. So we should
* not drop any inmemory pages by close called by other process.
@@ -2978,7 +2994,7 @@ do_map:
f2fs_folio_wait_writeback(folio, DATA, true, true);
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
f2fs_folio_put(folio, true);
idx++;
@@ -3876,7 +3892,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
break;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
count = round_up(count, fi->i_cluster_size);
@@ -4054,7 +4070,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
break;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
count = round_up(count, fi->i_cluster_size);
@@ -4218,7 +4234,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
goto out;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, pg_end - index);
for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
struct block_device *cur_bdev;
@@ -4415,7 +4431,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
f2fs_folio_wait_writeback(folio, DATA, true, true);
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
redirty_idx = folio_next_index(folio);
folio_unlock(folio);
folio_put_refs(folio, 2);
@@ -4825,6 +4841,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
const loff_t pos = iocb->ki_pos;
ssize_t ret;
+ bool dio;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
@@ -4833,12 +4850,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
iov_iter_count(to), READ);
+ dio = f2fs_should_use_dio(inode, iocb, to);
+
/* In LFS mode, if there is inflight dio, wait for its completion */
if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
- get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE))
+ get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) &&
+ (!f2fs_is_pinned_file(inode) || !dio))
inode_dio_wait(inode);
- if (f2fs_should_use_dio(inode, iocb, to)) {
+ if (dio) {
ret = f2fs_dio_read_iter(iocb, to);
} else {
ret = filemap_read(iocb, to, 0);
@@ -4846,8 +4866,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_READ_IO, ret);
}
- if (trace_f2fs_dataread_end_enabled())
- trace_f2fs_dataread_end(inode, pos, ret);
+ trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}
@@ -4870,8 +4889,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_READ_IO, ret);
- if (trace_f2fs_dataread_end_enabled())
- trace_f2fs_dataread_end(inode, pos, ret);
+ trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}
@@ -5216,8 +5234,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
f2fs_dio_write_iter(iocb, from, &may_need_sync) :
f2fs_buffered_write_iter(iocb, from);
- if (trace_f2fs_datawrite_end_enabled())
- trace_f2fs_datawrite_end(inode, orig_pos, ret);
+ trace_f2fs_datawrite_end(inode, orig_pos, ret);
}
/* Don't leave any preallocated blocks around past i_size. */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3cb5242f4ddf..098e9f71421e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -141,10 +141,10 @@ do_gc:
FOREGROUND : BACKGROUND);
sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) ||
- gc_control.one_time;
+ (gc_control.one_time && gc_th->boost_gc_greedy);
/* foreground GC was been triggered via f2fs_balance_fs() */
- if (foreground)
+ if (foreground && !f2fs_sb_has_blkzoned(sbi))
sync_mode = false;
gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
@@ -197,6 +197,8 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
+ gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE;
+ gc_th->boost_gc_greedy = GC_GREEDY;
if (f2fs_sb_has_blkzoned(sbi)) {
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED;
@@ -278,12 +280,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- if (p->alloc_mode == SSR) {
- p->gc_mode = GC_GREEDY;
- p->dirty_bitmap = dirty_i->dirty_segmap[type];
- p->max_search = dirty_i->nr_dirty[type];
- p->ofs_unit = 1;
- } else if (p->alloc_mode == AT_SSR) {
+ if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) {
p->gc_mode = GC_GREEDY;
p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
@@ -389,14 +386,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
- unsigned int segno, struct victim_sel_policy *p)
+ unsigned int segno, struct victim_sel_policy *p,
+ unsigned int valid_thresh_ratio)
{
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
- if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >=
- CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio /
- 100))
+ if (p->one_time_gc && (valid_thresh_ratio < 100) &&
+ (get_valid_blocks(sbi, segno, true) >=
+ CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100))
return UINT_MAX;
/* alloc_mode == LFS */
@@ -777,6 +775,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
unsigned int secno, last_victim;
unsigned int last_segment;
unsigned int nsearched;
+ unsigned int valid_thresh_ratio = 100;
bool is_atgc;
int ret = 0;
@@ -786,7 +785,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
p.alloc_mode = alloc_mode;
p.age = age;
p.age_threshold = sbi->am.age_threshold;
- p.one_time_gc = one_time;
+ if (one_time) {
+ p.one_time_gc = one_time;
+ if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG))
+ valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio;
+ }
retry:
select_policy(sbi, gc_type, type, &p);
@@ -912,7 +915,7 @@ retry:
goto next;
}
- cost = get_gc_cost(sbi, segno, &p);
+ cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio);
if (p.min_cost > cost) {
p.min_segno = segno;
@@ -1162,8 +1165,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return false;
}
- if (IS_INODE(&node_folio->page)) {
- base = offset_in_addr(F2FS_INODE(&node_folio->page));
+ if (IS_INODE(node_folio)) {
+ base = offset_in_addr(F2FS_INODE(node_folio));
max_addrs = DEF_ADDRS_PER_INODE;
} else {
base = 0;
@@ -1177,7 +1180,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return false;
}
- *nofs = ofs_of_node(&node_folio->page);
+ *nofs = ofs_of_node(node_folio);
source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node);
f2fs_folio_put(node_folio, true);
@@ -1249,7 +1252,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
}
got_it:
/* read folio */
- fio.page = &folio->page;
+ fio.folio = folio;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
/*
@@ -1353,7 +1356,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
goto put_out;
/* read page */
- fio.page = &folio->page;
+ fio.folio = folio;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
if (lfs_mode)
@@ -1473,7 +1476,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
goto out;
}
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
} else {
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -1483,7 +1486,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.need_lock = LOCK_REQ,
.io_type = FS_GC_DATA_IO,
@@ -1499,11 +1502,11 @@ retry:
f2fs_remove_dirty_inode(inode);
}
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
err = f2fs_do_write_data_page(&fio);
if (err) {
- clear_page_private_gcing(&folio->page);
+ folio_clear_f2fs_gcing(folio);
if (err == -ENOMEM) {
memalloc_retry_wait(GFP_NOFS);
goto retry;
@@ -1749,7 +1752,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
!has_enough_free_blocks(sbi,
sbi->gc_thread->boost_zoned_gc_percent))
window_granularity *=
- BOOST_GC_MULTIPLE;
+ sbi->gc_thread->boost_gc_multiple;
end_segno = start_segno + window_granularity;
}
@@ -1891,6 +1894,7 @@ gc_more:
/* Let's run FG_GC, if we don't have enough space. */
if (has_not_enough_free_secs(sbi, 0, 0)) {
gc_type = FG_GC;
+ gc_control->one_time = false;
/*
* For example, if there are many prefree_segments below given
@@ -2064,7 +2068,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno)))
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
continue;
do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 5c1eaf55e127..24e8b1c27acc 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -68,6 +68,8 @@ struct f2fs_gc_kthread {
unsigned int no_zoned_gc_percent;
unsigned int boost_zoned_gc_percent;
unsigned int valid_thresh_ratio;
+ unsigned int boost_gc_multiple;
+ unsigned int boost_gc_greedy;
};
struct gc_inode_list {
@@ -194,6 +196,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi)
{
if (f2fs_sb_has_blkzoned(sbi))
- return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC);
+ return !has_enough_free_blocks(sbi,
+ sbi->gc_thread->boost_zoned_gc_percent);
return has_enough_invalid_blocks(sbi);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 901c630685ce..58ac831ef704 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -33,9 +33,9 @@ bool f2fs_may_inline_data(struct inode *inode)
return !f2fs_post_read_required(inode);
}
-static bool inode_has_blocks(struct inode *inode, struct page *ipage)
+static bool inode_has_blocks(struct inode *inode, struct folio *ifolio)
{
- struct f2fs_inode *ri = F2FS_INODE(ipage);
+ struct f2fs_inode *ri = F2FS_INODE(ifolio);
int i;
if (F2FS_HAS_BLOCKS(inode))
@@ -48,12 +48,12 @@ static bool inode_has_blocks(struct inode *inode, struct page *ipage)
return false;
}
-bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage)
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio)
{
if (!f2fs_has_inline_data(inode))
return false;
- if (inode_has_blocks(inode, ipage))
+ if (inode_has_blocks(inode, ifolio))
return false;
if (!support_inline_data(inode))
@@ -150,7 +150,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_PRIO,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
@@ -206,7 +206,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
/* clear inline data and flag after data writeback */
f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0);
- clear_page_private_inline(&dn->inode_folio->page);
+ folio_clear_f2fs_inline(dn->inode_folio);
clear_out:
stat_dec_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
@@ -286,7 +286,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio)
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
- clear_page_private_inline(&ifolio->page);
+ folio_clear_f2fs_inline(ifolio);
f2fs_folio_put(ifolio, 1);
return 0;
}
@@ -305,8 +305,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio)
* x o -> remove data blocks, and then recover inline_data
* x x -> recover data blocks
*/
- if (IS_INODE(&nfolio->page))
- ri = F2FS_INODE(&nfolio->page);
+ if (IS_INODE(nfolio))
+ ri = F2FS_INODE(nfolio);
if (f2fs_has_inline_data(inode) &&
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
@@ -825,7 +825,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ifolio) -
- (char *)F2FS_INODE(&ifolio->page);
+ (char *)F2FS_INODE(ifolio);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
out:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 083d52a42bfb..8c4eafe9ffac 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -108,7 +108,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio)
f2fs_folio_wait_writeback(ifolio, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
- set_raw_inline(inode, F2FS_INODE(&ifolio->page));
+ set_raw_inline(inode, F2FS_INODE(ifolio));
folio_mark_dirty(ifolio);
return;
}
@@ -116,14 +116,15 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio)
return;
}
-static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
+static
+bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_inode *ri = &F2FS_NODE(page)->i;
+ struct f2fs_inode *ri = &F2FS_NODE(folio)->i;
if (!f2fs_sb_has_inode_chksum(sbi))
return false;
- if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
@@ -133,9 +134,9 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
return true;
}
-static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
+static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_node *node = F2FS_NODE(page);
+ struct f2fs_node *node = F2FS_NODE(folio);
struct f2fs_inode *ri = &node->i;
__le32 ino = node->footer.ino;
__le32 gen = ri->i_generation;
@@ -164,34 +165,34 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio)
return true;
#ifdef CONFIG_F2FS_CHECK_FS
- if (!f2fs_enable_inode_chksum(sbi, &folio->page))
+ if (!f2fs_enable_inode_chksum(sbi, folio))
#else
- if (!f2fs_enable_inode_chksum(sbi, &folio->page) ||
+ if (!f2fs_enable_inode_chksum(sbi, folio) ||
folio_test_dirty(folio) ||
folio_test_writeback(folio))
#endif
return true;
- ri = &F2FS_NODE(&folio->page)->i;
+ ri = &F2FS_NODE(folio)->i;
provided = le32_to_cpu(ri->i_inode_checksum);
- calculated = f2fs_inode_chksum(sbi, &folio->page);
+ calculated = f2fs_inode_chksum(sbi, folio);
if (provided != calculated)
f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
- folio->index, ino_of_node(&folio->page),
+ folio->index, ino_of_node(folio),
provided, calculated);
return provided == calculated;
}
-void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
+void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_inode *ri = &F2FS_NODE(page)->i;
+ struct f2fs_inode *ri = &F2FS_NODE(folio)->i;
- if (!f2fs_enable_inode_chksum(sbi, page))
+ if (!f2fs_enable_inode_chksum(sbi, folio))
return;
- ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
+ ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio));
}
static bool sanity_check_compress_inode(struct inode *inode,
@@ -266,28 +267,28 @@ err_level:
return false;
}
-static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+static bool sanity_check_inode(struct inode *inode, struct folio *node_folio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_inode *ri = F2FS_INODE(node_page);
+ struct f2fs_inode *ri = F2FS_INODE(node_folio);
unsigned long long iblocks;
- iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks);
if (!iblocks) {
f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
__func__, inode->i_ino, iblocks);
return false;
}
- if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ if (ino_of_node(node_folio) != nid_of_node(node_folio)) {
f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
__func__, inode->i_ino,
- ino_of_node(node_page), nid_of_node(node_page));
+ ino_of_node(node_folio), nid_of_node(node_folio));
return false;
}
- if (ino_of_node(node_page) == fi->i_xattr_nid) {
+ if (ino_of_node(node_folio) == fi->i_xattr_nid) {
f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.",
__func__, inode->i_ino, fi->i_xattr_nid);
return false;
@@ -354,7 +355,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
- if (f2fs_sanity_check_inline_data(inode, node_page)) {
+ if (f2fs_sanity_check_inline_data(inode, node_folio)) {
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
@@ -419,7 +420,7 @@ static int do_read_inode(struct inode *inode)
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);
- ri = F2FS_INODE(&node_folio->page);
+ ri = F2FS_INODE(node_folio);
inode->i_mode = le16_to_cpu(ri->i_mode);
i_uid_write(inode, le32_to_cpu(ri->i_uid));
@@ -469,7 +470,7 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
- if (!sanity_check_inode(inode, &node_folio->page)) {
+ if (!sanity_check_inode(inode, node_folio)) {
f2fs_folio_put(node_folio, true);
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
@@ -481,9 +482,9 @@ static int do_read_inode(struct inode *inode)
__recover_inline_status(inode, node_folio);
/* try to recover cold bit for non-dir inode */
- if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) {
+ if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) {
f2fs_folio_wait_writeback(node_folio, NODE, true, true);
- set_cold_node(&node_folio->page, false);
+ set_cold_node(node_folio, false);
folio_mark_dirty(node_folio);
}
@@ -531,7 +532,7 @@ static int do_read_inode(struct inode *inode)
init_idisk_time(inode);
- if (!sanity_check_extent_cache(inode, &node_folio->page)) {
+ if (!sanity_check_extent_cache(inode, node_folio)) {
f2fs_folio_put(node_folio, true);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
@@ -669,7 +670,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio)
f2fs_inode_synced(inode);
- ri = F2FS_INODE(&node_folio->page);
+ ri = F2FS_INODE(node_folio);
ri->i_mode = cpu_to_le16(inode->i_mode);
ri->i_advise = fi->i_advise;
@@ -748,11 +749,11 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio)
/* deleted inode */
if (inode->i_nlink == 0)
- clear_page_private_inline(&node_folio->page);
+ folio_clear_f2fs_inline(node_folio);
init_idisk_time(inode);
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page);
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio);
#endif
}
@@ -820,7 +821,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0;
}
-static void f2fs_remove_donate_inode(struct inode *inode)
+void f2fs_remove_donate_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -933,6 +934,19 @@ retry:
f2fs_update_inode_page(inode);
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
+ /*
+ * If both f2fs_truncate() and f2fs_update_inode_page() failed
+ * due to fuzzed corrupted inode, call f2fs_inode_synced() to
+ * avoid triggering later f2fs_bug_on().
+ */
+ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+ f2fs_warn(sbi,
+ "f2fs_evict_inode: inode is dirty, ino:%lu",
+ inode->i_ino);
+ f2fs_inode_synced(inode);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
}
if (freeze_protected)
sb_end_intwrite(inode->i_sb);
@@ -949,8 +963,12 @@ no_delete:
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
- else
- f2fs_inode_synced(inode);
+
+ /*
+ * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META]
+ * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint.
+ */
+ f2fs_inode_synced(inode);
/* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */
if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 07e333ee21b7..b882771e4699 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1298,19 +1298,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
const char *target;
if (!dentry)
return ERR_PTR(-ECHILD);
- page = read_mapping_page(inode->i_mapping, 0, NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ folio = read_mapping_folio(inode->i_mapping, 0, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
- target = fscrypt_get_symlink(inode, page_address(page),
+ target = fscrypt_get_symlink(inode, folio_address(folio),
inode->i_sb->s_blocksize, done);
- put_page(page);
+ folio_put(folio);
return target;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bfe104db284e..27743b93e186 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -135,7 +135,7 @@ static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid)
return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid));
}
-static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid)
{
struct folio *src_folio;
struct folio *dst_folio;
@@ -149,7 +149,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
/* get current nat block page with lock */
src_folio = get_current_nat_folio(sbi, nid);
if (IS_ERR(src_folio))
- return &src_folio->page;
+ return src_folio;
dst_folio = f2fs_grab_meta_folio(sbi, dst_off);
f2fs_bug_on(sbi, folio_test_dirty(src_folio));
@@ -161,7 +161,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
set_to_next_nat(nm_i, nid);
- return &dst_folio->page;
+ return dst_folio;
}
static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
@@ -185,7 +185,7 @@ static void __free_nat_entry(struct nat_entry *e)
/* must be locked by nat_tree_lock */
static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
- struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
+ struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty)
{
if (no_fail)
f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
@@ -195,6 +195,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+ if (init_dirty) {
+ INIT_LIST_HEAD(&ne->list);
+ nm_i->nat_cnt[TOTAL_NAT]++;
+ return ne;
+ }
+
spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
@@ -204,14 +210,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
return ne;
}
-static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
+static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty)
{
struct nat_entry *ne;
ne = radix_tree_lookup(&nm_i->nat_root, n);
- /* for recent accessed nat entry, move it to tail of lru list */
- if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ /*
+ * for recent accessed nat entry which will not be dirtied soon
+ * later, move it to tail of lru list.
+ */
+ if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) {
spin_lock(&nm_i->nat_list_lock);
if (!list_empty(&ne->list))
list_move_tail(&ne->list, &nm_i->nat_entries);
@@ -256,7 +265,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
}
static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
- struct nat_entry *ne)
+ struct nat_entry *ne, bool init_dirty)
{
struct nat_entry_set *head;
bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
@@ -279,7 +288,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
goto refresh_list;
nm_i->nat_cnt[DIRTY_NAT]++;
- nm_i->nat_cnt[RECLAIMABLE_NAT]--;
+ if (!init_dirty)
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
spin_lock(&nm_i->nat_list_lock);
@@ -312,8 +322,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio)
{
- return is_node_folio(folio) && IS_DNODE(&folio->page) &&
- is_cold_node(&folio->page);
+ return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio);
}
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
@@ -384,7 +393,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
bool need = false;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
@@ -401,7 +410,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
bool is_cp = true;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
f2fs_up_read(&nm_i->nat_tree_lock);
@@ -415,7 +424,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
bool need_update = true;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, ino);
+ e = __lookup_nat_cache(nm_i, ino, false);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
@@ -440,9 +449,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
return;
f2fs_down_write(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (!e)
- e = __init_nat_entry(nm_i, new, ne, false);
+ e = __init_nat_entry(nm_i, new, ne, false, false);
else
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
@@ -459,11 +468,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
+ bool init_dirty = false;
f2fs_down_write(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, ni->nid);
+ e = __lookup_nat_cache(nm_i, ni->nid, true);
if (!e) {
- e = __init_nat_entry(nm_i, new, NULL, true);
+ init_dirty = true;
+ e = __init_nat_entry(nm_i, new, NULL, true, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -499,11 +510,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
nat_set_blkaddr(e, new_blkaddr);
if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
- __set_nat_cache_dirty(nm_i, e);
+ __set_nat_cache_dirty(nm_i, e, init_dirty);
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
- e = __lookup_nat_cache(nm_i, ni->ino);
+ e = __lookup_nat_cache(nm_i, ni->ino, false);
if (e) {
if (fsync_done && ni->nid == ni->ino)
set_nat_flag(e, HAS_FSYNCED_INODE, true);
@@ -555,20 +566,24 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
- block_t blkaddr;
int i;
+ bool need_cache = true;
ni->flag = 0;
ni->nid = nid;
retry:
/* Check nat cache */
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
f2fs_up_read(&nm_i->nat_tree_lock);
+ if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) {
+ need_cache = false;
+ goto sanity_check;
+ }
return 0;
}
@@ -594,7 +609,7 @@ retry:
up_read(&curseg->journal_rwsem);
if (i >= 0) {
f2fs_up_read(&nm_i->nat_tree_lock);
- goto cache;
+ goto sanity_check;
}
/* Fill node_info from nat page */
@@ -609,14 +624,23 @@ retry:
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
f2fs_folio_put(folio, true);
-cache:
- blkaddr = le32_to_cpu(ne.block_addr);
- if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
- return -EFAULT;
+sanity_check:
+ if (__is_valid_data_blkaddr(ni->blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni->blk_addr,
+ DATA_GENERIC_ENHANCE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_get_node_info of %pS: inconsistent nat entry, "
+ "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u",
+ __builtin_return_address(0),
+ ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT);
+ return -EFSCORRUPTED;
+ }
/* cache nat entry */
- cache_nat_entry(sbi, nid, &ne);
+ if (need_cache)
+ cache_nat_entry(sbi, nid, &ne);
return 0;
}
@@ -636,7 +660,7 @@ static void f2fs_ra_node_pages(struct folio *parent, int start, int n)
end = start + n;
end = min(end, (int)NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
- nid = get_nid(&parent->page, i, false);
+ nid = get_nid(parent, i, false);
f2fs_ra_node_page(sbi, nid);
}
@@ -795,7 +819,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
parent = nfolio[0];
if (level != 0)
- nids[1] = get_nid(&parent->page, offset[0], true);
+ nids[1] = get_nid(parent, offset[0], true);
dn->inode_folio = nfolio[0];
dn->inode_folio_locked = true;
@@ -803,6 +827,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
for (i = 1; i <= level; i++) {
bool done = false;
+ if (nids[i] && nids[i] == dn->inode->i_ino) {
+ err = -EFSCORRUPTED;
+ f2fs_err_ratelimited(sbi,
+ "inode mapping table is corrupted, run fsck to fix it, "
+ "ino:%lu, nid:%u, level:%d, offset:%d",
+ dn->inode->i_ino, nids[i], level, offset[level]);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto release_pages;
+ }
+
if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
@@ -846,7 +880,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (i < level) {
parent = nfolio[i];
- nids[i + 1] = get_nid(&parent->page, offset[i], false);
+ nids[i + 1] = get_nid(parent, offset[i], false);
}
}
dn->nid = nids[level];
@@ -961,9 +995,9 @@ static int truncate_dnode(struct dnode_of_data *dn)
else if (IS_ERR(folio))
return PTR_ERR(folio);
- if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) {
+ if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) {
f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
- dn->inode->i_ino, dn->nid, ino_of_node(&folio->page));
+ dn->inode->i_ino, dn->nid, ino_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
f2fs_folio_put(folio, true);
@@ -1007,7 +1041,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK);
- rn = F2FS_NODE(&folio->page);
+ rn = F2FS_NODE(folio);
if (depth < 3) {
for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
child_nid = le32_to_cpu(rn->in.nid[i]);
@@ -1070,7 +1104,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
int i;
int idx = depth - 2;
- nid[0] = get_nid(&dn->inode_folio->page, offset[0], true);
+ nid[0] = get_nid(dn->inode_folio, offset[0], true);
if (!nid[0])
return 0;
@@ -1083,14 +1117,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
idx = i - 1;
goto fail;
}
- nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false);
+ nid[i + 1] = get_nid(folios[i], offset[i + 1], false);
}
f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK);
/* free direct nodes linked to a partial indirect node */
for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
- child_nid = get_nid(&folios[idx]->page, i, false);
+ child_nid = get_nid(folios[idx], i, false);
if (!child_nid)
continue;
dn->nid = child_nid;
@@ -1159,7 +1193,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
set_new_dnode(&dn, inode, folio, NULL, 0);
folio_unlock(folio);
- ri = F2FS_INODE(&folio->page);
+ ri = F2FS_INODE(folio);
switch (level) {
case 0:
case 1:
@@ -1188,7 +1222,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
skip_partial:
while (cont) {
- dn.nid = get_nid(&folio->page, offset[0], true);
+ dn.nid = get_nid(folio, offset[0], true);
switch (offset[0]) {
case NODE_DIR1_BLOCK:
case NODE_DIR2_BLOCK:
@@ -1220,7 +1254,7 @@ skip_partial:
}
if (err < 0)
goto fail;
- if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) {
+ if (offset[1] == 0 && get_nid(folio, offset[0], true)) {
folio_lock(folio);
BUG_ON(!is_node_folio(folio));
set_nid(folio, offset[0], 0, true);
@@ -1367,8 +1401,8 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs)
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
f2fs_folio_wait_writeback(folio, NODE, true, true);
- fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true);
- set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode));
+ fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true);
+ set_cold_node(folio, S_ISDIR(dn->inode->i_mode));
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
if (folio_mark_dirty(folio))
@@ -1400,7 +1434,7 @@ static int read_node_folio(struct folio *folio, blk_opf_t op_flags)
.type = NODE,
.op = REQ_OP_READ,
.op_flags = op_flags,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
};
int err;
@@ -1462,17 +1496,15 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi,
struct folio *folio, pgoff_t nid,
enum node_type ntype)
{
- struct page *page = &folio->page;
-
- if (unlikely(nid != nid_of_node(page) ||
- (ntype == NODE_TYPE_INODE && !IS_INODE(page)) ||
+ if (unlikely(nid != nid_of_node(folio) ||
+ (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) ||
(ntype == NODE_TYPE_XATTR &&
- !f2fs_has_xattr_block(ofs_of_node(page))) ||
+ !f2fs_has_xattr_block(ofs_of_node(folio))) ||
time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) {
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- ntype, nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
+ ntype, nid, nid_of_node(folio), ino_of_node(folio),
+ ofs_of_node(folio), cpver_of_node(folio),
next_blkaddr_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
@@ -1553,7 +1585,7 @@ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid)
static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_F_SB(parent);
- nid_t nid = get_nid(&parent->page, start, false);
+ nid_t nid = get_nid(parent, start, false);
return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR);
}
@@ -1618,9 +1650,9 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
return ERR_PTR(-EIO);
}
- if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page))
+ if (!IS_DNODE(folio) || !is_cold_node(folio))
continue;
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
continue;
folio_lock(folio);
@@ -1630,7 +1662,7 @@ continue_unlock:
folio_unlock(folio);
continue;
}
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
goto continue_unlock;
if (!folio_test_dirty(folio)) {
@@ -1660,11 +1692,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
- .ino = ino_of_node(&folio->page),
+ .ino = ino_of_node(folio),
.type = NODE,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.submitted = 0,
.io_type = io_type,
@@ -1689,11 +1721,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
wbc->sync_mode == WB_SYNC_NONE &&
- IS_DNODE(&folio->page) && is_cold_node(&folio->page))
+ IS_DNODE(folio) && is_cold_node(folio))
goto redirty_out;
/* get old block addr of this node page */
- nid = nid_of_node(&folio->page);
+ nid = nid_of_node(folio);
f2fs_bug_on(sbi, folio->index != nid);
if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
@@ -1731,7 +1763,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page));
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio));
dec_page_count(sbi, F2FS_DIRTY_NODES);
f2fs_up_read(&sbi->node_write);
@@ -1827,9 +1859,9 @@ retry:
goto out;
}
- if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page))
+ if (!IS_DNODE(folio) || !is_cold_node(folio))
continue;
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
continue;
folio_lock(folio);
@@ -1839,7 +1871,7 @@ continue_unlock:
folio_unlock(folio);
continue;
}
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
goto continue_unlock;
if (!folio_test_dirty(folio) && folio != last_folio) {
@@ -1849,17 +1881,17 @@ continue_unlock:
f2fs_folio_wait_writeback(folio, NODE, true, true);
- set_fsync_mark(&folio->page, 0);
- set_dentry_mark(&folio->page, 0);
+ set_fsync_mark(folio, 0);
+ set_dentry_mark(folio, 0);
if (!atomic || folio == last_folio) {
- set_fsync_mark(&folio->page, 1);
+ set_fsync_mark(folio, 1);
percpu_counter_inc(&sbi->rf_node_block_count);
- if (IS_INODE(&folio->page)) {
+ if (IS_INODE(folio)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
f2fs_update_inode(inode, folio);
- set_dentry_mark(&folio->page,
+ set_dentry_mark(folio,
f2fs_need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
@@ -1935,7 +1967,7 @@ static bool flush_dirty_inode(struct folio *folio)
{
struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
struct inode *inode;
- nid_t ino = ino_of_node(&folio->page);
+ nid_t ino = ino_of_node(folio);
inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
if (!inode)
@@ -1964,7 +1996,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- if (!IS_INODE(&folio->page))
+ if (!IS_INODE(folio))
continue;
folio_lock(folio);
@@ -1975,10 +2007,10 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
goto unlock;
/* flush inline_data, if it's async context. */
- if (page_private_inline(&folio->page)) {
- clear_page_private_inline(&folio->page);
+ if (folio_test_f2fs_inline(folio)) {
+ folio_clear_f2fs_inline(folio);
folio_unlock(folio);
- flush_inline_data(sbi, ino_of_node(&folio->page));
+ flush_inline_data(sbi, ino_of_node(folio));
continue;
}
unlock:
@@ -2027,13 +2059,13 @@ next_step:
* 1. dentry dnodes
* 2. file dnodes
*/
- if (step == 0 && IS_DNODE(&folio->page))
+ if (step == 0 && IS_DNODE(folio))
continue;
- if (step == 1 && (!IS_DNODE(&folio->page) ||
- is_cold_node(&folio->page)))
+ if (step == 1 && (!IS_DNODE(folio) ||
+ is_cold_node(folio)))
continue;
- if (step == 2 && (!IS_DNODE(&folio->page) ||
- !is_cold_node(&folio->page)))
+ if (step == 2 && (!IS_DNODE(folio) ||
+ !is_cold_node(folio)))
continue;
lock_node:
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -2057,15 +2089,15 @@ continue_unlock:
goto write_node;
/* flush inline_data */
- if (page_private_inline(&folio->page)) {
- clear_page_private_inline(&folio->page);
+ if (folio_test_f2fs_inline(folio)) {
+ folio_clear_f2fs_inline(folio);
folio_unlock(folio);
- flush_inline_data(sbi, ino_of_node(&folio->page));
+ flush_inline_data(sbi, ino_of_node(folio));
goto lock_node;
}
/* flush dirty inode */
- if (IS_INODE(&folio->page) && flush_dirty_inode(folio))
+ if (IS_INODE(folio) && flush_dirty_inode(folio))
goto lock_node;
write_node:
f2fs_folio_wait_writeback(folio, NODE, true, true);
@@ -2073,8 +2105,8 @@ write_node:
if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
- set_fsync_mark(&folio->page, 0);
- set_dentry_mark(&folio->page, 0);
+ set_fsync_mark(folio, 0);
+ set_dentry_mark(folio, 0);
if (!__write_node_folio(folio, false, &submitted,
wbc, do_balance, io_type, NULL)) {
@@ -2201,12 +2233,12 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping,
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
#ifdef CONFIG_F2FS_CHECK_FS
- if (IS_INODE(&folio->page))
- f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page);
+ if (IS_INODE(folio))
+ f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio);
#endif
if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
return true;
}
return false;
@@ -2351,7 +2383,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
* - __remove_nid_from_list(PREALLOC_NID)
* - __insert_nid_to_list(FREE_NID)
*/
- ne = __lookup_nat_cache(nm_i, nid);
+ ne = __lookup_nat_cache(nm_i, nid, false);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
goto err_out;
@@ -2714,7 +2746,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio)
if (IS_ERR(ifolio))
return PTR_ERR(ifolio);
- ri = F2FS_INODE(&folio->page);
+ ri = F2FS_INODE(folio);
if (ri->i_inline & F2FS_INLINE_XATTR) {
if (!f2fs_has_inline_xattr(inode)) {
set_inode_flag(inode, FI_INLINE_XATTR);
@@ -2740,7 +2772,7 @@ update_inode:
return 0;
}
-int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
+int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
@@ -2778,8 +2810,8 @@ recover_xnid:
f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
- if (page) {
- memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page),
+ if (folio) {
+ memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio),
VALID_XATTR_BLOCK_SIZE);
folio_mark_dirty(xfolio);
}
@@ -2788,10 +2820,10 @@ recover_xnid:
return 0;
}
-int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio)
{
struct f2fs_inode *src, *dst;
- nid_t ino = ino_of_node(page);
+ nid_t ino = ino_of_node(folio);
struct node_info old_ni, new_ni;
struct folio *ifolio;
int err;
@@ -2814,11 +2846,11 @@ retry:
if (!folio_test_uptodate(ifolio))
folio_mark_uptodate(ifolio);
- fill_node_footer(&ifolio->page, ino, ino, 0, true);
- set_cold_node(&ifolio->page, false);
+ fill_node_footer(ifolio, ino, ino, 0, true);
+ set_cold_node(ifolio, false);
- src = F2FS_INODE(page);
- dst = F2FS_INODE(&ifolio->page);
+ src = F2FS_INODE(folio);
+ dst = F2FS_INODE(ifolio);
memcpy(dst, src, offsetof(struct f2fs_inode, i_ext));
dst->i_size = 0;
@@ -2884,7 +2916,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
if (IS_ERR(folio))
return PTR_ERR(folio);
- rn = F2FS_NODE(&folio->page);
+ rn = F2FS_NODE(folio);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
@@ -2904,6 +2936,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
int i;
+ bool init_dirty;
down_write(&curseg->journal_rwsem);
for (i = 0; i < nats_in_cursum(journal); i++) {
@@ -2914,12 +2947,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
if (f2fs_check_nid_range(sbi, nid))
continue;
+ init_dirty = false;
+
raw_ne = nat_in_journal(journal, i);
- ne = __lookup_nat_cache(nm_i, nid);
+ ne = __lookup_nat_cache(nm_i, nid, true);
if (!ne) {
+ init_dirty = true;
ne = __alloc_nat_entry(sbi, nid, true);
- __init_nat_entry(nm_i, ne, &raw_ne, true);
+ __init_nat_entry(nm_i, ne, &raw_ne, true, true);
}
/*
@@ -2934,7 +2970,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->nid_list_lock);
}
- __set_nat_cache_dirty(nm_i, ne);
+ __set_nat_cache_dirty(nm_i, ne, init_dirty);
}
update_nats_in_cursum(journal, -i);
up_write(&curseg->journal_rwsem);
@@ -2959,11 +2995,10 @@ add_out:
}
static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
- struct page *page)
+ const struct f2fs_nat_block *nat_blk)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
- struct f2fs_nat_block *nat_blk = page_address(page);
int valid = 0;
int i = 0;
@@ -3000,7 +3035,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
bool to_journal = true;
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
- struct page *page = NULL;
+ struct folio *folio = NULL;
/*
* there are two steps to flush nat entries:
@@ -3014,11 +3049,11 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
down_write(&curseg->journal_rwsem);
} else {
- page = get_next_nat_page(sbi, start_nid);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ folio = get_next_nat_folio(sbi, start_nid);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- nat_blk = page_address(page);
+ nat_blk = folio_address(folio);
f2fs_bug_on(sbi, !nat_blk);
}
@@ -3054,8 +3089,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
up_write(&curseg->journal_rwsem);
} else {
- __update_nat_bits(sbi, start_nid, page);
- f2fs_put_page(page, 1);
+ __update_nat_bits(sbi, start_nid, nat_blk);
+ f2fs_folio_put(folio, true);
}
/* Allow dirty nats by node block allocation in write_begin */
@@ -3395,10 +3430,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
}
kvfree(nm_i->free_nid_count);
- kvfree(nm_i->nat_bitmap);
+ kfree(nm_i->nat_bitmap);
kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kvfree(nm_i->nat_bitmap_mir);
+ kfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
kfree(nm_i);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 1446c433b3ec..030390543b54 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -31,7 +31,7 @@
/* control total # of nats */
#define DEF_NAT_CACHE_THRESHOLD 100000
-/* control total # of node writes used for roll-fowrad recovery */
+/* control total # of node writes used for roll-forward recovery */
#define DEF_RF_NODE_BLOCKS 0
/* vector size for gang look-up from nat cache that consists of radix tree */
@@ -243,41 +243,41 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
#endif
}
-static inline nid_t ino_of_node(struct page *node_page)
+static inline nid_t ino_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.ino);
}
-static inline nid_t nid_of_node(struct page *node_page)
+static inline nid_t nid_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.nid);
}
-static inline unsigned int ofs_of_node(const struct page *node_page)
+static inline unsigned int ofs_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
-static inline __u64 cpver_of_node(struct page *node_page)
+static inline __u64 cpver_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le64_to_cpu(rn->footer.cp_ver);
}
-static inline block_t next_blkaddr_of_node(struct folio *node_folio)
+static inline block_t next_blkaddr_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(&node_folio->page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.next_blkaddr);
}
-static inline void fill_node_footer(struct page *page, nid_t nid,
+static inline void fill_node_footer(const struct folio *folio, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int old_flag = 0;
if (reset)
@@ -293,17 +293,18 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
(old_flag & OFFSET_BIT_MASK));
}
-static inline void copy_node_footer(struct page *dst, struct page *src)
+static inline void copy_node_footer(const struct folio *dst,
+ const struct folio *src)
{
struct f2fs_node *src_rn = F2FS_NODE(src);
struct f2fs_node *dst_rn = F2FS_NODE(dst);
memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
}
-static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
+static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr)
{
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio));
+ struct f2fs_node *rn = F2FS_NODE(folio);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
@@ -313,19 +314,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
-static inline bool is_recoverable_dnode(struct page *page)
+static inline bool is_recoverable_dnode(const struct folio *folio)
{
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio));
__u64 cp_ver = cur_cp_version(ckpt);
/* Don't care crc part, if fsck.f2fs sets it. */
if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG))
- return (cp_ver << 32) == (cpver_of_node(page) << 32);
+ return (cp_ver << 32) == (cpver_of_node(folio) << 32);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
- return cp_ver == cpver_of_node(page);
+ return cp_ver == cpver_of_node(folio);
}
/*
@@ -349,9 +350,9 @@ static inline bool is_recoverable_dnode(struct page *page)
* `- indirect node ((6 + 2N) + (N - 1)(N + 1))
* `- direct node
*/
-static inline bool IS_DNODE(const struct page *node_page)
+static inline bool IS_DNODE(const struct folio *node_folio)
{
- unsigned int ofs = ofs_of_node(node_page);
+ unsigned int ofs = ofs_of_node(node_folio);
if (f2fs_has_xattr_block(ofs))
return true;
@@ -369,7 +370,7 @@ static inline bool IS_DNODE(const struct page *node_page)
static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i)
{
- struct f2fs_node *rn = F2FS_NODE(&folio->page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
f2fs_folio_wait_writeback(folio, NODE, true, true);
@@ -380,9 +381,9 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i)
return folio_mark_dirty(folio);
}
-static inline nid_t get_nid(struct page *p, int off, bool i)
+static inline nid_t get_nid(const struct folio *folio, int off, bool i)
{
- struct f2fs_node *rn = F2FS_NODE(p);
+ struct f2fs_node *rn = F2FS_NODE(folio);
if (i)
return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
@@ -396,19 +397,19 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
* - Mark cold data pages in page cache
*/
-static inline int is_node(const struct page *page, int type)
+static inline int is_node(const struct folio *folio, int type)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
return le32_to_cpu(rn->footer.flag) & BIT(type);
}
-#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT)
-#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
-#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
+#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT)
+#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT)
+#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT)
-static inline void set_cold_node(struct page *page, bool is_dir)
+static inline void set_cold_node(const struct folio *folio, bool is_dir)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (is_dir)
@@ -418,9 +419,9 @@ static inline void set_cold_node(struct page *page, bool is_dir)
rn->footer.flag = cpu_to_le32(flag);
}
-static inline void set_mark(struct page *page, int mark, int type)
+static inline void set_mark(struct folio *folio, int mark, int type)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
flag |= BIT(type);
@@ -429,8 +430,8 @@ static inline void set_mark(struct page *page, int mark, int type)
rn->footer.flag = cpu_to_le32(flag);
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+ f2fs_inode_chksum_set(F2FS_F_SB(folio), folio);
#endif
}
-#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
-#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
+#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT)
+#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 51ebed4e1521..4cb3a91801b4 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -157,10 +157,10 @@ static int init_recovered_filename(const struct inode *dir,
return 0;
}
-static int recover_dentry(struct inode *inode, struct page *ipage,
+static int recover_dentry(struct inode *inode, struct folio *ifolio,
struct list_head *dir_list)
{
- struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
+ struct f2fs_inode *raw_inode = F2FS_INODE(ifolio);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct f2fs_filename fname;
@@ -233,14 +233,14 @@ out:
else
name = raw_inode->i_name;
f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
+ __func__, ino_of_node(ifolio), name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
-static int recover_quota_data(struct inode *inode, struct page *page)
+static int recover_quota_data(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
struct iattr attr;
uid_t i_uid = le32_to_cpu(raw->i_uid);
gid_t i_gid = le32_to_cpu(raw->i_gid);
@@ -277,16 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
clear_inode_flag(inode, FI_DATA_EXIST);
}
-static int recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
struct f2fs_inode_info *fi = F2FS_I(inode);
char *name;
int err;
inode->i_mode = le16_to_cpu(raw->i_mode);
- err = recover_quota_data(inode, page);
+ err = recover_quota_data(inode, folio);
if (err)
return err;
@@ -333,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page)
if (file_enc_name(inode))
name = "<encrypted>";
else
- name = F2FS_INODE(page)->i_name;
+ name = F2FS_INODE(folio)->i_name;
f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ ino_of_node(folio), name, raw->i_inline);
return 0;
}
@@ -375,7 +375,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
if (IS_ERR(folio))
return PTR_ERR(folio);
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
*is_detecting = false;
return 0;
@@ -424,22 +424,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
break;
}
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
break;
}
- if (!is_fsync_dnode(&folio->page))
+ if (!is_fsync_dnode(folio))
goto next;
- entry = get_fsync_inode(head, ino_of_node(&folio->page));
+ entry = get_fsync_inode(head, ino_of_node(folio));
if (!entry) {
bool quota_inode = false;
if (!check_only &&
- IS_INODE(&folio->page) &&
- is_dent_dnode(&folio->page)) {
- err = f2fs_recover_inode_page(sbi, &folio->page);
+ IS_INODE(folio) &&
+ is_dent_dnode(folio)) {
+ err = f2fs_recover_inode_page(sbi, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
@@ -451,7 +451,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
- entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page),
+ entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
@@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
}
entry->blkaddr = blkaddr;
- if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page))
+ if (IS_INODE(folio) && is_dent_dnode(folio))
entry->last_dentry = blkaddr;
next:
/* check next segment */
@@ -527,7 +527,7 @@ got_it:
nid = le32_to_cpu(sum.nid);
ofs_in_node = le16_to_cpu(sum.ofs_in_node);
- max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode);
+ max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
if (ofs_in_node >= max_addrs) {
f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
ofs_in_node, dn->inode->i_ino, nid, max_addrs);
@@ -552,8 +552,8 @@ got_it:
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);
- offset = ofs_of_node(&node_folio->page);
- ino = ino_of_node(&node_folio->page);
+ offset = ofs_of_node(node_folio);
+ ino = ino_of_node(node_folio);
f2fs_folio_put(node_folio, true);
if (ino != dn->inode->i_ino) {
@@ -624,16 +624,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
{
struct dnode_of_data dn;
struct node_info ni;
- unsigned int start, end;
+ unsigned int start = 0, end = 0, index;
int err = 0, recovered = 0;
/* step 1: recover xattr */
- if (IS_INODE(&folio->page)) {
+ if (IS_INODE(folio)) {
err = f2fs_recover_inline_xattr(inode, folio);
if (err)
goto out;
- } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) {
- err = f2fs_recover_xattr_data(inode, &folio->page);
+ } else if (f2fs_has_xattr_block(ofs_of_node(folio))) {
+ err = f2fs_recover_xattr_data(inode, folio);
if (!err)
recovered++;
goto out;
@@ -648,8 +648,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* step 3: recover data indices */
- start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode);
- end = start + ADDRS_PER_PAGE(&folio->page, inode);
+ start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode);
+ end = start + ADDRS_PER_PAGE(folio, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
@@ -668,18 +668,18 @@ retry_dn:
if (err)
goto err;
- f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page));
+ f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
- if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) {
+ if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) {
f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
- inode->i_ino, ofs_of_node(&dn.node_folio->page),
- ofs_of_node(&folio->page));
+ inode->i_ino, ofs_of_node(dn.node_folio),
+ ofs_of_node(folio));
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
goto err;
}
- for (; start < end; start++, dn.ofs_in_node++) {
+ for (index = start; index < end; index++, dn.ofs_in_node++) {
block_t src, dest;
src = f2fs_data_blkaddr(&dn);
@@ -708,9 +708,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
- (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+ (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT)))
f2fs_i_size_write(inode,
- (loff_t)(start + 1) << PAGE_SHIFT);
+ (loff_t)(index + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -758,16 +758,18 @@ retry_prev:
}
}
- copy_node_footer(&dn.node_folio->page, &folio->page);
- fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino,
- ofs_of_node(&folio->page), false);
+ copy_node_footer(dn.node_folio, folio);
+ fill_node_footer(dn.node_folio, dn.nid, ni.ino,
+ ofs_of_node(folio), false);
folio_mark_dirty(dn.node_folio);
err:
f2fs_put_dnode(&dn);
out:
- f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
+ "range (%u, %u), recovered = %d, err = %d",
+ inode->i_ino, nid_of_node(folio),
+ file_keep_isize(inode) ? "keep" : "recover",
+ start, end, recovered, err);
return err;
}
@@ -778,6 +780,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
int err = 0;
block_t blkaddr;
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
+ unsigned int recoverable_dnode = 0;
+ unsigned int fsynced_dnode = 0;
+ unsigned int total_dnode = 0;
+ unsigned int recovered_inode = 0;
+ unsigned int recovered_dentry = 0;
+ unsigned int recovered_dnode = 0;
+
+ f2fs_notice(sbi, "do_recover_data: start to recover dnode");
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -796,38 +806,43 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
break;
}
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
break;
}
+ recoverable_dnode++;
- entry = get_fsync_inode(inode_list, ino_of_node(&folio->page));
+ entry = get_fsync_inode(inode_list, ino_of_node(folio));
if (!entry)
goto next;
+ fsynced_dnode++;
/*
* inode(x) | CP | inode(x) | dnode(F)
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (IS_INODE(&folio->page)) {
- err = recover_inode(entry->inode, &folio->page);
+ if (IS_INODE(folio)) {
+ err = recover_inode(entry->inode, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_inode++;
}
if (entry->last_dentry == blkaddr) {
- err = recover_dentry(entry->inode, &folio->page, dir_list);
+ err = recover_dentry(entry->inode, folio, dir_list);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_dentry++;
}
err = do_recover_data(sbi, entry->inode, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_dnode++;
if (entry->blkaddr == blkaddr)
list_move_tail(&entry->list, tmp_inode_list);
@@ -840,9 +855,15 @@ next:
f2fs_folio_put(folio, true);
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
+ total_dnode++;
}
if (!err)
err = f2fs_allocate_new_segments(sbi);
+
+ f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, "
+ "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d",
+ recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode,
+ recovered_dentry, recovered_dnode, err);
return err;
}
@@ -855,6 +876,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
+ "check_only: %d", check_only);
+
if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "recover fsync data on readonly fs");
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ae1223ef648f..cc82d42ef14c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -334,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
goto next;
}
- blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode),
+ blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode),
len);
index = off;
for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
@@ -455,7 +455,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
} else {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
- .init_gc_type = BG_GC,
+ .init_gc_type = f2fs_sb_has_blkzoned(sbi) ?
+ FG_GC : BG_GC,
.no_bg_gc = true,
.should_migrate_blocks = false,
.err_gc_skipped = false,
@@ -772,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
/* need not be added */
- if (IS_CURSEG(sbi, segno))
+ if (is_curseg(sbi, segno))
return;
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
@@ -799,7 +800,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
!valid_blocks) ||
valid_blocks == CAP_BLKS_PER_SEC(sbi));
- if (!IS_CURSEC(sbi, secno))
+ if (!is_cursec(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
}
}
@@ -838,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
return;
}
- if (!IS_CURSEC(sbi, secno))
+ if (!is_cursec(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
}
}
@@ -855,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned short valid_blocks, ckpt_valid_blocks;
unsigned int usable_blocks;
- if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
+ if (segno == NULL_SEGNO || is_curseg(sbi, segno))
return;
usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
@@ -888,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (IS_CURSEG(sbi, segno))
+ if (is_curseg(sbi, segno))
continue;
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
@@ -2107,7 +2108,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
if (!force) {
if (!f2fs_realtime_discard_enable(sbi) ||
(!se->valid_blocks &&
- !IS_CURSEG(sbi, cpc->trim_start)) ||
+ !is_curseg(sbi, cpc->trim_start)) ||
SM_I(sbi)->dcc_info->nr_discards >=
SM_I(sbi)->dcc_info->max_discards)
return false;
@@ -2235,7 +2236,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
next:
secno = GET_SEC_FROM_SEG(sbi, start);
start_segno = GET_SEG_FROM_SEC(sbi, secno);
- if (!IS_CURSEC(sbi, secno) &&
+ if (!is_cursec(sbi, secno) &&
!get_valid_blocks(sbi, start, true))
f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
BLKS_PER_SEC(sbi));
@@ -3619,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(fio->page) && is_cold_node(fio->page))
+ if (IS_DNODE(fio->folio) && is_cold_node(fio->folio))
return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
@@ -3665,8 +3666,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (file_is_cold(inode) || f2fs_need_compress_data(inode))
return CURSEG_COLD_DATA;
- type = __get_age_segment_type(inode,
- page_folio(fio->page)->index);
+ type = __get_age_segment_type(inode, fio->folio->index);
if (type != NO_CHECK_TYPE)
return type;
@@ -3677,8 +3677,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
} else {
- if (IS_DNODE(fio->page))
- return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
+ if (IS_DNODE(fio->folio))
+ return is_cold_node(fio->folio) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
return CURSEG_COLD_NODE;
}
@@ -3746,7 +3746,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
-int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio)
@@ -3850,10 +3850,10 @@ skip_new_segment:
up_write(&sit_i->sentry_lock);
- if (page && IS_NODESEG(curseg->seg_type)) {
- fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
+ if (folio && IS_NODESEG(curseg->seg_type)) {
+ fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg));
- f2fs_inode_chksum_set(sbi, page);
+ f2fs_inode_chksum_set(sbi, folio);
}
if (fio) {
@@ -3931,7 +3931,7 @@ static int log_type_to_seg_type(enum log_type type)
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
- struct folio *folio = page_folio(fio->page);
+ struct folio *folio = fio->folio;
enum log_type type = __get_segment_type(fio);
int seg_type = log_type_to_seg_type(type);
bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
@@ -3940,15 +3940,21 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
- if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio)) {
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
folio_end_writeback(folio);
if (f2fs_in_warm_node_list(fio->sbi, folio))
f2fs_del_fsync_node_entry(fio->sbi, folio);
+ f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi,
+ CP_ERROR_FLAG));
goto out;
}
+
+ f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi,
+ fio->new_blkaddr, DATA_GENERIC_ENHANCE));
+
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1);
@@ -3972,7 +3978,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = folio->index,
.new_blkaddr = folio->index,
- .page = folio_page(folio, 0),
+ .folio = folio,
.encrypted_page = NULL,
.in_list = 0,
};
@@ -4100,14 +4106,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg) {
/* for recovery flow */
- if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
+ if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) {
if (old_blkaddr == NULL_ADDR)
type = CURSEG_COLD_DATA;
else
type = CURSEG_WARM_DATA;
}
} else {
- if (IS_CURSEG(sbi, segno)) {
+ if (is_curseg(sbi, segno)) {
/* se->type is volatile as SSR allocation */
type = __f2fs_get_curseg(sbi, segno);
f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
@@ -4191,7 +4197,7 @@ void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type,
struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
/* submit cached LFS IO */
- f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type);
+ f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type);
/* submit cached IPU IO */
f2fs_submit_merged_ipu_write(sbi, NULL, folio);
if (ordered) {
@@ -5143,7 +5149,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
continue;
- if (IS_CURSEC(sbi, secno))
+ if (is_cursec(sbi, secno))
continue;
set_bit(secno, dirty_i->dirty_secmap);
}
@@ -5279,7 +5285,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
* Get # of valid block of the zone.
*/
valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
zone_segno, valid_block_cnt,
blk_zone_cond_str(zone->cond));
@@ -5806,9 +5812,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kvfree(sit_i->sit_bitmap);
+ kfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kvfree(sit_i->sit_bitmap_mir);
+ kfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
kfree(sit_i);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index db619fd2f51a..5e2ee5c686b1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -34,34 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
}
-#define IS_CURSEG(sbi, seg) \
- (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
-
-#define IS_CURSEC(sbi, secno) \
- (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
- SEGS_PER_SEC(sbi)))
-
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
@@ -318,6 +290,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
+static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (segno == CURSEG_I(sbi, i)->segno)
+ return true;
+ }
+ return false;
+}
+
+static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno))
+ return true;
+ }
+ return false;
+}
+
static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
unsigned int segno)
{
@@ -509,7 +503,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_segments++;
- if (!inmem && IS_CURSEC(sbi, secno))
+ if (!inmem && is_cursec(sbi, secno))
goto unlock_out;
/* check large section */
@@ -674,8 +668,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int data_blocks = 0;
- if (f2fs_lfs_mode(sbi) &&
- unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (f2fs_lfs_mode(sbi)) {
total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA);
data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi);
data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi);
@@ -684,7 +677,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
if (lower_p)
*lower_p = node_secs + dent_secs + data_secs;
if (upper_p)
- *upper_p = node_secs + dent_secs +
+ *upper_p = node_secs + dent_secs + data_secs +
(node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) +
(data_blocks ? 1 : 0);
if (curseg_p)
@@ -986,7 +979,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
- if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
+ if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno))
return true;
return false;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bbf1dad6843f..e16c4e2830c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -27,6 +27,8 @@
#include <linux/part_stat.h>
#include <linux/zstd.h>
#include <linux/lz4.h>
+#include <linux/ctype.h>
+#include <linux/fs_parser.h>
#include "f2fs.h"
#include "node.h"
@@ -125,29 +127,20 @@ enum {
Opt_disable_roll_forward,
Opt_norecovery,
Opt_discard,
- Opt_nodiscard,
Opt_noheap,
Opt_heap,
Opt_user_xattr,
- Opt_nouser_xattr,
Opt_acl,
- Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
- Opt_noinline_xattr,
Opt_inline_xattr_size,
Opt_inline_data,
Opt_inline_dentry,
- Opt_noinline_dentry,
Opt_flush_merge,
- Opt_noflush_merge,
Opt_barrier,
- Opt_nobarrier,
Opt_fastboot,
Opt_extent_cache,
- Opt_noextent_cache,
- Opt_noinline_data,
Opt_data_flush,
Opt_reserve_root,
Opt_resgid,
@@ -156,21 +149,13 @@ enum {
Opt_fault_injection,
Opt_fault_type,
Opt_lazytime,
- Opt_nolazytime,
Opt_quota,
- Opt_noquota,
Opt_usrquota,
Opt_grpquota,
Opt_prjquota,
Opt_usrjquota,
Opt_grpjquota,
Opt_prjjquota,
- Opt_offusrjquota,
- Opt_offgrpjquota,
- Opt_offprjjquota,
- Opt_jqfmt_vfsold,
- Opt_jqfmt_vfsv0,
- Opt_jqfmt_vfsv1,
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
@@ -180,107 +165,209 @@ enum {
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
Opt_checkpoint_merge,
- Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
- Opt_compress_extension,
Opt_nocompress_extension,
+ Opt_compress_extension,
Opt_compress_chksum,
Opt_compress_mode,
Opt_compress_cache,
Opt_atgc,
Opt_gc_merge,
- Opt_nogc_merge,
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
Opt_errors,
Opt_nat_bits,
+ Opt_jqfmt,
+ Opt_checkpoint,
Opt_err,
};
-static match_table_t f2fs_tokens = {
- {Opt_gc_background, "background_gc=%s"},
- {Opt_disable_roll_forward, "disable_roll_forward"},
- {Opt_norecovery, "norecovery"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_noheap, "no_heap"},
- {Opt_heap, "heap"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_active_logs, "active_logs=%u"},
- {Opt_disable_ext_identify, "disable_ext_identify"},
- {Opt_inline_xattr, "inline_xattr"},
- {Opt_noinline_xattr, "noinline_xattr"},
- {Opt_inline_xattr_size, "inline_xattr_size=%u"},
- {Opt_inline_data, "inline_data"},
- {Opt_inline_dentry, "inline_dentry"},
- {Opt_noinline_dentry, "noinline_dentry"},
- {Opt_flush_merge, "flush_merge"},
- {Opt_noflush_merge, "noflush_merge"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_fastboot, "fastboot"},
- {Opt_extent_cache, "extent_cache"},
- {Opt_noextent_cache, "noextent_cache"},
- {Opt_noinline_data, "noinline_data"},
- {Opt_data_flush, "data_flush"},
- {Opt_reserve_root, "reserve_root=%u"},
- {Opt_resgid, "resgid=%u"},
- {Opt_resuid, "resuid=%u"},
- {Opt_mode, "mode=%s"},
- {Opt_fault_injection, "fault_injection=%u"},
- {Opt_fault_type, "fault_type=%u"},
- {Opt_lazytime, "lazytime"},
- {Opt_nolazytime, "nolazytime"},
- {Opt_quota, "quota"},
- {Opt_noquota, "noquota"},
- {Opt_usrquota, "usrquota"},
- {Opt_grpquota, "grpquota"},
- {Opt_prjquota, "prjquota"},
- {Opt_usrjquota, "usrjquota=%s"},
- {Opt_grpjquota, "grpjquota=%s"},
- {Opt_prjjquota, "prjjquota=%s"},
- {Opt_offusrjquota, "usrjquota="},
- {Opt_offgrpjquota, "grpjquota="},
- {Opt_offprjjquota, "prjjquota="},
- {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
- {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
- {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_alloc, "alloc_mode=%s"},
- {Opt_fsync, "fsync_mode=%s"},
- {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
- {Opt_test_dummy_encryption, "test_dummy_encryption"},
- {Opt_inlinecrypt, "inlinecrypt"},
- {Opt_checkpoint_disable, "checkpoint=disable"},
- {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
- {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
- {Opt_checkpoint_enable, "checkpoint=enable"},
- {Opt_checkpoint_merge, "checkpoint_merge"},
- {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
- {Opt_compress_algorithm, "compress_algorithm=%s"},
- {Opt_compress_log_size, "compress_log_size=%u"},
- {Opt_compress_extension, "compress_extension=%s"},
- {Opt_nocompress_extension, "nocompress_extension=%s"},
- {Opt_compress_chksum, "compress_chksum"},
- {Opt_compress_mode, "compress_mode=%s"},
- {Opt_compress_cache, "compress_cache"},
- {Opt_atgc, "atgc"},
- {Opt_gc_merge, "gc_merge"},
- {Opt_nogc_merge, "nogc_merge"},
- {Opt_discard_unit, "discard_unit=%s"},
- {Opt_memory_mode, "memory=%s"},
- {Opt_age_extent_cache, "age_extent_cache"},
- {Opt_errors, "errors=%s"},
- {Opt_nat_bits, "nat_bits"},
+static const struct constant_table f2fs_param_background_gc[] = {
+ {"on", BGGC_MODE_ON},
+ {"off", BGGC_MODE_OFF},
+ {"sync", BGGC_MODE_SYNC},
+ {}
+};
+
+static const struct constant_table f2fs_param_mode[] = {
+ {"adaptive", FS_MODE_ADAPTIVE},
+ {"lfs", FS_MODE_LFS},
+ {"fragment:segment", FS_MODE_FRAGMENT_SEG},
+ {"fragment:block", FS_MODE_FRAGMENT_BLK},
+ {}
+};
+
+static const struct constant_table f2fs_param_jqfmt[] = {
+ {"vfsold", QFMT_VFS_OLD},
+ {"vfsv0", QFMT_VFS_V0},
+ {"vfsv1", QFMT_VFS_V1},
+ {}
+};
+
+static const struct constant_table f2fs_param_alloc_mode[] = {
+ {"default", ALLOC_MODE_DEFAULT},
+ {"reuse", ALLOC_MODE_REUSE},
+ {}
+};
+static const struct constant_table f2fs_param_fsync_mode[] = {
+ {"posix", FSYNC_MODE_POSIX},
+ {"strict", FSYNC_MODE_STRICT},
+ {"nobarrier", FSYNC_MODE_NOBARRIER},
+ {}
+};
+
+static const struct constant_table f2fs_param_compress_mode[] = {
+ {"fs", COMPR_MODE_FS},
+ {"user", COMPR_MODE_USER},
+ {}
+};
+
+static const struct constant_table f2fs_param_discard_unit[] = {
+ {"block", DISCARD_UNIT_BLOCK},
+ {"segment", DISCARD_UNIT_SEGMENT},
+ {"section", DISCARD_UNIT_SECTION},
+ {}
+};
+
+static const struct constant_table f2fs_param_memory_mode[] = {
+ {"normal", MEMORY_MODE_NORMAL},
+ {"low", MEMORY_MODE_LOW},
+ {}
+};
+
+static const struct constant_table f2fs_param_errors[] = {
+ {"remount-ro", MOUNT_ERRORS_READONLY},
+ {"continue", MOUNT_ERRORS_CONTINUE},
+ {"panic", MOUNT_ERRORS_PANIC},
+ {}
+};
+
+static const struct fs_parameter_spec f2fs_param_specs[] = {
+ fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc),
+ fsparam_flag("disable_roll_forward", Opt_disable_roll_forward),
+ fsparam_flag("norecovery", Opt_norecovery),
+ fsparam_flag_no("discard", Opt_discard),
+ fsparam_flag("no_heap", Opt_noheap),
+ fsparam_flag("heap", Opt_heap),
+ fsparam_flag_no("user_xattr", Opt_user_xattr),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_s32("active_logs", Opt_active_logs),
+ fsparam_flag("disable_ext_identify", Opt_disable_ext_identify),
+ fsparam_flag_no("inline_xattr", Opt_inline_xattr),
+ fsparam_s32("inline_xattr_size", Opt_inline_xattr_size),
+ fsparam_flag_no("inline_data", Opt_inline_data),
+ fsparam_flag_no("inline_dentry", Opt_inline_dentry),
+ fsparam_flag_no("flush_merge", Opt_flush_merge),
+ fsparam_flag_no("barrier", Opt_barrier),
+ fsparam_flag("fastboot", Opt_fastboot),
+ fsparam_flag_no("extent_cache", Opt_extent_cache),
+ fsparam_flag("data_flush", Opt_data_flush),
+ fsparam_u32("reserve_root", Opt_reserve_root),
+ fsparam_gid("resgid", Opt_resgid),
+ fsparam_uid("resuid", Opt_resuid),
+ fsparam_enum("mode", Opt_mode, f2fs_param_mode),
+ fsparam_s32("fault_injection", Opt_fault_injection),
+ fsparam_u32("fault_type", Opt_fault_type),
+ fsparam_flag_no("lazytime", Opt_lazytime),
+ fsparam_flag_no("quota", Opt_quota),
+ fsparam_flag("usrquota", Opt_usrquota),
+ fsparam_flag("grpquota", Opt_grpquota),
+ fsparam_flag("prjquota", Opt_prjquota),
+ fsparam_string_empty("usrjquota", Opt_usrjquota),
+ fsparam_string_empty("grpjquota", Opt_grpjquota),
+ fsparam_string_empty("prjjquota", Opt_prjjquota),
+ fsparam_flag("nat_bits", Opt_nat_bits),
+ fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt),
+ fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode),
+ fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode),
+ fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption),
+ fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption),
+ fsparam_flag("inlinecrypt", Opt_inlinecrypt),
+ fsparam_string("checkpoint", Opt_checkpoint),
+ fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge),
+ fsparam_string("compress_algorithm", Opt_compress_algorithm),
+ fsparam_u32("compress_log_size", Opt_compress_log_size),
+ fsparam_string("compress_extension", Opt_compress_extension),
+ fsparam_string("nocompress_extension", Opt_nocompress_extension),
+ fsparam_flag("compress_chksum", Opt_compress_chksum),
+ fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode),
+ fsparam_flag("compress_cache", Opt_compress_cache),
+ fsparam_flag("atgc", Opt_atgc),
+ fsparam_flag_no("gc_merge", Opt_gc_merge),
+ fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit),
+ fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode),
+ fsparam_flag("age_extent_cache", Opt_age_extent_cache),
+ fsparam_enum("errors", Opt_errors, f2fs_param_errors),
+ {}
+};
+
+/* Resort to a match_table for this interestingly formatted option */
+static match_table_t f2fs_checkpoint_tokens = {
+ {Opt_checkpoint_disable, "disable"},
+ {Opt_checkpoint_disable_cap, "disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "disable:%u%%"},
+ {Opt_checkpoint_enable, "enable"},
{Opt_err, NULL},
};
+#define F2FS_SPEC_background_gc (1 << 0)
+#define F2FS_SPEC_inline_xattr_size (1 << 1)
+#define F2FS_SPEC_active_logs (1 << 2)
+#define F2FS_SPEC_reserve_root (1 << 3)
+#define F2FS_SPEC_resgid (1 << 4)
+#define F2FS_SPEC_resuid (1 << 5)
+#define F2FS_SPEC_mode (1 << 6)
+#define F2FS_SPEC_fault_injection (1 << 7)
+#define F2FS_SPEC_fault_type (1 << 8)
+#define F2FS_SPEC_jqfmt (1 << 9)
+#define F2FS_SPEC_alloc_mode (1 << 10)
+#define F2FS_SPEC_fsync_mode (1 << 11)
+#define F2FS_SPEC_checkpoint_disable_cap (1 << 12)
+#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13)
+#define F2FS_SPEC_compress_level (1 << 14)
+#define F2FS_SPEC_compress_algorithm (1 << 15)
+#define F2FS_SPEC_compress_log_size (1 << 16)
+#define F2FS_SPEC_compress_extension (1 << 17)
+#define F2FS_SPEC_nocompress_extension (1 << 18)
+#define F2FS_SPEC_compress_chksum (1 << 19)
+#define F2FS_SPEC_compress_mode (1 << 20)
+#define F2FS_SPEC_discard_unit (1 << 21)
+#define F2FS_SPEC_memory_mode (1 << 22)
+#define F2FS_SPEC_errors (1 << 23)
+
+struct f2fs_fs_context {
+ struct f2fs_mount_info info;
+ unsigned int opt_mask; /* Bits changed */
+ unsigned int spec_mask;
+ unsigned short qname_mask;
+};
+
+#define F2FS_CTX_INFO(ctx) ((ctx)->info)
+
+static inline void ctx_set_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ ctx->info.opt |= flag;
+ ctx->opt_mask |= flag;
+}
+
+static inline void ctx_clear_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ ctx->info.opt &= ~flag;
+ ctx->opt_mask |= flag;
+}
+
+static inline bool ctx_test_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ return ctx->info.opt & flag;
+}
+
void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
- const char *fmt, ...)
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -292,11 +379,19 @@ void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
if (limit_rate)
- printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (sbi)
+ printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk_ratelimited("%c%cF2FS-fs: %pV\n",
+ KERN_SOH_ASCII, level, &vaf);
else
- printk("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (sbi)
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk("%c%cF2FS-fs: %pV\n",
+ KERN_SOH_ASCII, level, &vaf);
va_end(args);
}
@@ -390,159 +485,90 @@ static void init_once(void *foo)
#ifdef CONFIG_QUOTA
static const char * const quotatypes[] = INITQFNAMES;
#define QTYPE2NAME(t) (quotatypes[t])
-static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype,
- substring_t *args)
+/*
+ * Note the name of the specified quota file.
+ */
+static int f2fs_note_qf_name(struct fs_context *fc, int qtype,
+ struct fs_parameter *param)
{
- struct super_block *sb = sbi->sb;
+ struct f2fs_fs_context *ctx = fc->fs_private;
char *qname;
- int ret = -EINVAL;
- if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
+ if (param->size < 1) {
+ f2fs_err(NULL, "Missing quota name");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sbi)) {
- f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
+ if (strchr(param->string, '/')) {
+ f2fs_err(NULL, "quotafile must be on filesystem root");
+ return -EINVAL;
+ }
+ if (ctx->info.s_qf_names[qtype]) {
+ if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) {
+ f2fs_err(NULL, "Quota file already specified");
+ return -EINVAL;
+ }
return 0;
}
- qname = match_strdup(args);
+ qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
if (!qname) {
- f2fs_err(sbi, "Not enough memory for storing quotafile name");
+ f2fs_err(NULL, "Not enough memory for storing quotafile name");
return -ENOMEM;
}
- if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
- if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
- ret = 0;
- else
- f2fs_err(sbi, "%s quota file already specified",
- QTYPE2NAME(qtype));
- goto errout;
- }
- if (strchr(qname, '/')) {
- f2fs_err(sbi, "quotafile must be on filesystem root");
- goto errout;
- }
- F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
- set_opt(sbi, QUOTA);
+ F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname;
+ ctx->qname_mask |= 1 << qtype;
return 0;
-errout:
- kfree(qname);
- return ret;
}
-static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype)
+/*
+ * Clear the name of the specified quota file.
+ */
+static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype)
{
- struct super_block *sb = sbi->sb;
+ struct f2fs_fs_context *ctx = fc->fs_private;
- if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
- return -EINVAL;
- }
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
- F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
+ kfree(ctx->info.s_qf_names[qtype]);
+ ctx->info.s_qf_names[qtype] = NULL;
+ ctx->qname_mask |= 1 << qtype;
return 0;
}
-static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
+static void f2fs_unnote_qf_name_all(struct fs_context *fc)
{
- /*
- * We do the test below only for project quotas. 'usrquota' and
- * 'grpquota' mount options are allowed even without quota feature
- * to support legacy quotas in quota files.
- */
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
- f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
- return -1;
- }
- if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
- F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
- F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) {
- if (test_opt(sbi, USRQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
- clear_opt(sbi, USRQUOTA);
-
- if (test_opt(sbi, GRPQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
- clear_opt(sbi, GRPQUOTA);
-
- if (test_opt(sbi, PRJQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
- clear_opt(sbi, PRJQUOTA);
-
- if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
- test_opt(sbi, PRJQUOTA)) {
- f2fs_err(sbi, "old and new quota format mixing");
- return -1;
- }
-
- if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_err(sbi, "journaled quota format not specified");
- return -1;
- }
- }
+ int i;
- if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
- F2FS_OPTION(sbi).s_jquota_fmt = 0;
- }
- return 0;
+ for (i = 0; i < MAXQUOTAS; i++)
+ f2fs_unnote_qf_name(fc, i);
}
#endif
-static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi,
- const char *opt,
- const substring_t *arg,
- bool is_remount)
+static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct f2fs_fs_context *ctx)
{
- struct fs_parameter param = {
- .type = fs_value_is_string,
- .string = arg->from ? arg->from : "",
- };
- struct fscrypt_dummy_policy *policy =
- &F2FS_OPTION(sbi).dummy_enc_policy;
int err;
if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
- f2fs_warn(sbi, "test_dummy_encryption option not supported");
+ f2fs_warn(NULL, "test_dummy_encryption option not supported");
return -EINVAL;
}
-
- if (!f2fs_sb_has_encrypt(sbi)) {
- f2fs_err(sbi, "Encrypt feature is off");
- return -EINVAL;
- }
-
- /*
- * This mount option is just for testing, and it's not worthwhile to
- * implement the extra complexity (e.g. RCU protection) that would be
- * needed to allow it to be set or changed during remount. We do allow
- * it to be specified during remount, but only if there is no change.
- */
- if (is_remount && !fscrypt_is_dummy_policy_set(policy)) {
- f2fs_warn(sbi, "Can't set test_dummy_encryption on remount");
- return -EINVAL;
- }
-
- err = fscrypt_parse_test_dummy_encryption(&param, policy);
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->info.dummy_enc_policy);
if (err) {
- if (err == -EEXIST)
- f2fs_warn(sbi,
- "Can't change test_dummy_encryption on remount");
- else if (err == -EINVAL)
- f2fs_warn(sbi, "Value of option \"%s\" is unrecognized",
- opt);
+ if (err == -EINVAL)
+ f2fs_warn(NULL, "Value of option \"%s\" is unrecognized",
+ param->key);
+ else if (err == -EEXIST)
+ f2fs_warn(NULL, "Conflicting test_dummy_encryption options");
else
- f2fs_warn(sbi, "Error processing option \"%s\" [%d]",
- opt, err);
+ f2fs_warn(NULL, "Error processing option \"%s\" [%d]",
+ param->key, err);
return -EINVAL;
}
- f2fs_warn(sbi, "Test dummy encryption mode enabled");
return 0;
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
-static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
+static bool is_compress_extension_exist(struct f2fs_mount_info *info,
const char *new_ext, bool is_ext)
{
unsigned char (*ext)[F2FS_EXTENSION_LEN];
@@ -550,11 +576,11 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
int i;
if (is_ext) {
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ ext = info->extensions;
+ ext_cnt = info->compress_ext_cnt;
} else {
- ext = F2FS_OPTION(sbi).noextensions;
- ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ ext = info->noextensions;
+ ext_cnt = info->nocompress_ext_cnt;
}
for (i = 0; i < ext_cnt; i++) {
@@ -572,28 +598,28 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
* extension will be treated as special cases and will not be compressed.
* 3. Don't allow the non-compress extension specifies all files.
*/
-static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi)
+static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN],
+ int noext_cnt,
+ unsigned char (*ext)[F2FS_EXTENSION_LEN],
+ int ext_cnt)
{
- unsigned char (*ext)[F2FS_EXTENSION_LEN];
- unsigned char (*noext)[F2FS_EXTENSION_LEN];
- int ext_cnt, noext_cnt, index = 0, no_index = 0;
-
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
- noext = F2FS_OPTION(sbi).noextensions;
- noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ int index = 0, no_index = 0;
if (!noext_cnt)
return 0;
for (no_index = 0; no_index < noext_cnt; no_index++) {
+ if (strlen(noext[no_index]) == 0)
+ continue;
if (!strcasecmp("*", noext[no_index])) {
- f2fs_info(sbi, "Don't allow the nocompress extension specifies all files");
+ f2fs_info(NULL, "Don't allow the nocompress extension specifies all files");
return -EINVAL;
}
for (index = 0; index < ext_cnt; index++) {
+ if (strlen(ext[index]) == 0)
+ continue;
if (!strcasecmp(ext[index], noext[no_index])) {
- f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension",
+ f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension",
ext[index]);
return -EINVAL;
}
@@ -603,58 +629,62 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_F2FS_FS_LZ4
-static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
+static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str)
{
#ifdef CONFIG_F2FS_FS_LZ4HC
unsigned int level;
if (strlen(str) == 3) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
str += 3;
if (str[0] != ':') {
- f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
if (kstrtouint(str + 1, 10, &level))
return -EINVAL;
if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) {
- f2fs_info(sbi, "invalid lz4hc compress level: %d", level);
+ f2fs_info(NULL, "invalid lz4hc compress level: %d", level);
return -EINVAL;
}
- F2FS_OPTION(sbi).compress_level = level;
+ F2FS_CTX_INFO(ctx).compress_level = level;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
#else
if (strlen(str) == 3) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
- f2fs_info(sbi, "kernel doesn't support lz4hc compression");
+ f2fs_info(NULL, "kernel doesn't support lz4hc compression");
return -EINVAL;
#endif
}
#endif
#ifdef CONFIG_F2FS_FS_ZSTD
-static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
+static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str)
{
int level;
int len = 4;
if (strlen(str) == len) {
- F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
str += len;
if (str[0] != ':') {
- f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
if (kstrtoint(str + 1, 10, &level))
@@ -662,685 +692,750 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
/* f2fs does not support negative compress level now */
if (level < 0) {
- f2fs_info(sbi, "do not support negative compress level: %d", level);
+ f2fs_info(NULL, "do not support negative compress level: %d", level);
return -ERANGE;
}
if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
- f2fs_info(sbi, "invalid zstd compress level: %d", level);
+ f2fs_info(NULL, "invalid zstd compress level: %d", level);
return -EINVAL;
}
- F2FS_OPTION(sbi).compress_level = level;
+ F2FS_CTX_INFO(ctx).compress_level = level;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
#endif
#endif
-static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount)
+static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- substring_t args[MAX_OPT_ARGS];
+ struct f2fs_fs_context *ctx = fc->fs_private;
#ifdef CONFIG_F2FS_FS_COMPRESSION
unsigned char (*ext)[F2FS_EXTENSION_LEN];
unsigned char (*noext)[F2FS_EXTENSION_LEN];
int ext_cnt, noext_cnt;
+ char *name;
#endif
- char *p, *name;
- int arg = 0;
- kuid_t uid;
- kgid_t gid;
- int ret;
-
- if (!options)
- return 0;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ substring_t args[MAX_OPT_ARGS];
+ struct fs_parse_result result;
+ int token, ret, arg;
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, f2fs_tokens, args);
+ token = fs_parse(fc, f2fs_param_specs, param, &result);
+ if (token < 0)
+ return token;
- switch (token) {
- case Opt_gc_background:
- name = match_strdup(&args[0]);
-
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "on")) {
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
- } else if (!strcmp(name, "off")) {
- if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_warn(sbi, "zoned devices need bggc");
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
- } else if (!strcmp(name, "sync")) {
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_disable_roll_forward:
- set_opt(sbi, DISABLE_ROLL_FORWARD);
- break;
- case Opt_norecovery:
- /* requires ro mount, checked in f2fs_default_check */
- set_opt(sbi, NORECOVERY);
- break;
- case Opt_discard:
- if (!f2fs_hw_support_discard(sbi)) {
- f2fs_warn(sbi, "device does not support discard");
- break;
- }
- set_opt(sbi, DISCARD);
- break;
- case Opt_nodiscard:
- if (f2fs_hw_should_discard(sbi)) {
- f2fs_warn(sbi, "discard is required for zoned block devices");
- return -EINVAL;
- }
- clear_opt(sbi, DISCARD);
- break;
- case Opt_noheap:
- case Opt_heap:
- f2fs_warn(sbi, "heap/no_heap options were deprecated");
- break;
+ switch (token) {
+ case Opt_gc_background:
+ F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_background_gc;
+ break;
+ case Opt_disable_roll_forward:
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD);
+ break;
+ case Opt_norecovery:
+ /* requires ro mount, checked in f2fs_validate_options */
+ ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY);
+ break;
+ case Opt_discard:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_DISCARD);
+ break;
+ case Opt_noheap:
+ case Opt_heap:
+ f2fs_warn(NULL, "heap/no_heap options were deprecated");
+ break;
#ifdef CONFIG_F2FS_FS_XATTR
- case Opt_user_xattr:
- set_opt(sbi, XATTR_USER);
- break;
- case Opt_nouser_xattr:
- clear_opt(sbi, XATTR_USER);
- break;
- case Opt_inline_xattr:
- set_opt(sbi, INLINE_XATTR);
- break;
- case Opt_noinline_xattr:
- clear_opt(sbi, INLINE_XATTR);
- break;
- case Opt_inline_xattr_size:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- set_opt(sbi, INLINE_XATTR_SIZE);
- F2FS_OPTION(sbi).inline_xattr_size = arg;
- break;
+ case Opt_user_xattr:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER);
+ break;
+ case Opt_inline_xattr:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR);
+ break;
+ case Opt_inline_xattr_size:
+ if (result.int_32 < MIN_INLINE_XATTR_SIZE ||
+ result.int_32 > MAX_INLINE_XATTR_SIZE) {
+ f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u",
+ (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE);
+ return -EINVAL;
+ }
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE);
+ F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_inline_xattr_size;
+ break;
#else
- case Opt_user_xattr:
- case Opt_nouser_xattr:
- case Opt_inline_xattr:
- case Opt_noinline_xattr:
- case Opt_inline_xattr_size:
- f2fs_info(sbi, "xattr options not supported");
- break;
+ case Opt_user_xattr:
+ case Opt_inline_xattr:
+ case Opt_inline_xattr_size:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- case Opt_acl:
- set_opt(sbi, POSIX_ACL);
- break;
- case Opt_noacl:
- clear_opt(sbi, POSIX_ACL);
- break;
+ case Opt_acl:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL);
+ break;
#else
- case Opt_acl:
- case Opt_noacl:
- f2fs_info(sbi, "acl options not supported");
- break;
+ case Opt_acl:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
- case Opt_active_logs:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg != 2 && arg != 4 &&
- arg != NR_CURSEG_PERSIST_TYPE)
- return -EINVAL;
- F2FS_OPTION(sbi).active_logs = arg;
- break;
- case Opt_disable_ext_identify:
- set_opt(sbi, DISABLE_EXT_IDENTIFY);
- break;
- case Opt_inline_data:
- set_opt(sbi, INLINE_DATA);
- break;
- case Opt_inline_dentry:
- set_opt(sbi, INLINE_DENTRY);
- break;
- case Opt_noinline_dentry:
- clear_opt(sbi, INLINE_DENTRY);
- break;
- case Opt_flush_merge:
- set_opt(sbi, FLUSH_MERGE);
- break;
- case Opt_noflush_merge:
- clear_opt(sbi, FLUSH_MERGE);
- break;
- case Opt_nobarrier:
- set_opt(sbi, NOBARRIER);
- break;
- case Opt_barrier:
- clear_opt(sbi, NOBARRIER);
- break;
- case Opt_fastboot:
- set_opt(sbi, FASTBOOT);
- break;
- case Opt_extent_cache:
- set_opt(sbi, READ_EXTENT_CACHE);
- break;
- case Opt_noextent_cache:
- if (f2fs_sb_has_device_alias(sbi)) {
- f2fs_err(sbi, "device aliasing requires extent cache");
- return -EINVAL;
- }
- clear_opt(sbi, READ_EXTENT_CACHE);
- break;
- case Opt_noinline_data:
- clear_opt(sbi, INLINE_DATA);
- break;
- case Opt_data_flush:
- set_opt(sbi, DATA_FLUSH);
- break;
- case Opt_reserve_root:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_info(sbi, "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
- } else {
- F2FS_OPTION(sbi).root_reserved_blocks = arg;
- set_opt(sbi, RESERVE_ROOT);
- }
- break;
- case Opt_resuid:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- uid = make_kuid(current_user_ns(), arg);
- if (!uid_valid(uid)) {
- f2fs_err(sbi, "Invalid uid value %d", arg);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).s_resuid = uid;
- break;
- case Opt_resgid:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- gid = make_kgid(current_user_ns(), arg);
- if (!gid_valid(gid)) {
- f2fs_err(sbi, "Invalid gid value %d", arg);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).s_resgid = gid;
- break;
- case Opt_mode:
- name = match_strdup(&args[0]);
-
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "adaptive")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
- } else if (!strcmp(name, "lfs")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- } else if (!strcmp(name, "fragment:segment")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
- } else if (!strcmp(name, "fragment:block")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
+ case Opt_active_logs:
+ if (result.int_32 != 2 && result.int_32 != 4 &&
+ result.int_32 != NR_CURSEG_PERSIST_TYPE)
+ return -EINVAL;
+ ctx->spec_mask |= F2FS_SPEC_active_logs;
+ F2FS_CTX_INFO(ctx).active_logs = result.int_32;
+ break;
+ case Opt_disable_ext_identify:
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY);
+ break;
+ case Opt_inline_data:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA);
+ break;
+ case Opt_inline_dentry:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY);
+ break;
+ case Opt_flush_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE);
+ break;
+ case Opt_barrier:
+ if (result.negated)
+ ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER);
+ else
+ ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER);
+ break;
+ case Opt_fastboot:
+ ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT);
+ break;
+ case Opt_extent_cache:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE);
+ break;
+ case Opt_data_flush:
+ ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH);
+ break;
+ case Opt_reserve_root:
+ ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
+ F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_reserve_root;
+ break;
+ case Opt_resuid:
+ F2FS_CTX_INFO(ctx).s_resuid = result.uid;
+ ctx->spec_mask |= F2FS_SPEC_resuid;
+ break;
+ case Opt_resgid:
+ F2FS_CTX_INFO(ctx).s_resgid = result.gid;
+ ctx->spec_mask |= F2FS_SPEC_resgid;
+ break;
+ case Opt_mode:
+ F2FS_CTX_INFO(ctx).fs_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_mode;
+ break;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- case Opt_fault_injection:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE))
- return -EINVAL;
- set_opt(sbi, FAULT_INJECTION);
- break;
+ case Opt_fault_injection:
+ F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_fault_injection;
+ ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION);
+ break;
- case Opt_fault_type:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE))
- return -EINVAL;
- set_opt(sbi, FAULT_INJECTION);
- break;
+ case Opt_fault_type:
+ if (result.uint_32 > BIT(FAULT_MAX))
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_fault_type;
+ ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION);
+ break;
#else
- case Opt_fault_injection:
- case Opt_fault_type:
- f2fs_info(sbi, "fault injection options not supported");
- break;
+ case Opt_fault_injection:
+ case Opt_fault_type:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
- case Opt_lazytime:
- set_opt(sbi, LAZYTIME);
- break;
- case Opt_nolazytime:
- clear_opt(sbi, LAZYTIME);
- break;
+ case Opt_lazytime:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME);
+ break;
#ifdef CONFIG_QUOTA
- case Opt_quota:
- case Opt_usrquota:
- set_opt(sbi, USRQUOTA);
- break;
- case Opt_grpquota:
- set_opt(sbi, GRPQUOTA);
- break;
- case Opt_prjquota:
- set_opt(sbi, PRJQUOTA);
- break;
- case Opt_usrjquota:
- ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_grpjquota:
- ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_prjjquota:
- ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_offusrjquota:
- ret = f2fs_clear_qf_name(sbi, USRQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_offgrpjquota:
- ret = f2fs_clear_qf_name(sbi, GRPQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_offprjjquota:
- ret = f2fs_clear_qf_name(sbi, PRJQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_jqfmt_vfsold:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD;
- break;
- case Opt_jqfmt_vfsv0:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0;
- break;
- case Opt_jqfmt_vfsv1:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1;
- break;
- case Opt_noquota:
- clear_opt(sbi, QUOTA);
- clear_opt(sbi, USRQUOTA);
- clear_opt(sbi, GRPQUOTA);
- clear_opt(sbi, PRJQUOTA);
- break;
+ case Opt_quota:
+ if (result.negated) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ } else
+ ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ break;
+ case Opt_usrquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ break;
+ case Opt_grpquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ break;
+ case Opt_prjquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ break;
+ case Opt_usrjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, USRQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, USRQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_grpjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, GRPQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, GRPQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_prjjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, PRJQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, PRJQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_jqfmt:
+ F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_jqfmt;
+ break;
#else
- case Opt_quota:
- case Opt_usrquota:
- case Opt_grpquota:
- case Opt_prjquota:
- case Opt_usrjquota:
- case Opt_grpjquota:
- case Opt_prjjquota:
- case Opt_offusrjquota:
- case Opt_offgrpjquota:
- case Opt_offprjjquota:
- case Opt_jqfmt_vfsold:
- case Opt_jqfmt_vfsv0:
- case Opt_jqfmt_vfsv1:
- case Opt_noquota:
- f2fs_info(sbi, "quota operations not supported");
- break;
+ case Opt_quota:
+ case Opt_usrquota:
+ case Opt_grpquota:
+ case Opt_prjquota:
+ case Opt_usrjquota:
+ case Opt_grpjquota:
+ case Opt_prjjquota:
+ f2fs_info(NULL, "quota operations not supported");
+ break;
#endif
- case Opt_alloc:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
-
- if (!strcmp(name, "default")) {
- F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
- } else if (!strcmp(name, "reuse")) {
- F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_fsync:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "posix")) {
- F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
- } else if (!strcmp(name, "strict")) {
- F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
- } else if (!strcmp(name, "nobarrier")) {
- F2FS_OPTION(sbi).fsync_mode =
- FSYNC_MODE_NOBARRIER;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_test_dummy_encryption:
- ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0],
- is_remount);
- if (ret)
- return ret;
- break;
- case Opt_inlinecrypt:
+ case Opt_alloc:
+ F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_alloc_mode;
+ break;
+ case Opt_fsync:
+ F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_fsync_mode;
+ break;
+ case Opt_test_dummy_encryption:
+ ret = f2fs_parse_test_dummy_encryption(param, ctx);
+ if (ret)
+ return ret;
+ break;
+ case Opt_inlinecrypt:
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
- set_opt(sbi, INLINECRYPT);
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT);
#else
- f2fs_info(sbi, "inline encryption not supported");
+ f2fs_info(NULL, "inline encryption not supported");
#endif
- break;
+ break;
+ case Opt_checkpoint:
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].from = args[0].to = NULL;
+ arg = 0;
+
+ /* revert to match_table for checkpoint= options */
+ token = match_token(param->string, f2fs_checkpoint_tokens, args);
+ switch (token) {
case Opt_checkpoint_disable_cap_perc:
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg < 0 || arg > 100)
return -EINVAL;
- F2FS_OPTION(sbi).unusable_cap_perc = arg;
- set_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_CTX_INFO(ctx).unusable_cap_perc = arg;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable_cap:
if (args->from && match_int(args, &arg))
return -EINVAL;
- F2FS_OPTION(sbi).unusable_cap = arg;
- set_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_CTX_INFO(ctx).unusable_cap = arg;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable:
- set_opt(sbi, DISABLE_CHECKPOINT);
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
- clear_opt(sbi, DISABLE_CHECKPOINT);
- break;
- case Opt_checkpoint_merge:
- set_opt(sbi, MERGE_CHECKPOINT);
- break;
- case Opt_nocheckpoint_merge:
- clear_opt(sbi, MERGE_CHECKPOINT);
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case Opt_checkpoint_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT);
+ break;
#ifdef CONFIG_F2FS_FS_COMPRESSION
- case Opt_compress_algorithm:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "lzo")) {
+ case Opt_compress_algorithm:
+ name = param->string;
+ if (!strcmp(name, "lzo")) {
#ifdef CONFIG_F2FS_FS_LZO
- F2FS_OPTION(sbi).compress_level = 0;
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZO;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lzo compression");
+ f2fs_info(NULL, "kernel doesn't support lzo compression");
#endif
- } else if (!strncmp(name, "lz4", 3)) {
+ } else if (!strncmp(name, "lz4", 3)) {
#ifdef CONFIG_F2FS_FS_LZ4
- ret = f2fs_set_lz4hc_level(sbi, name);
- if (ret) {
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZ4;
+ ret = f2fs_set_lz4hc_level(ctx, name);
+ if (ret)
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lz4 compression");
+ f2fs_info(NULL, "kernel doesn't support lz4 compression");
#endif
- } else if (!strncmp(name, "zstd", 4)) {
+ } else if (!strncmp(name, "zstd", 4)) {
#ifdef CONFIG_F2FS_FS_ZSTD
- ret = f2fs_set_zstd_level(sbi, name);
- if (ret) {
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_ZSTD;
+ ret = f2fs_set_zstd_level(ctx, name);
+ if (ret)
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support zstd compression");
+ f2fs_info(NULL, "kernel doesn't support zstd compression");
#endif
- } else if (!strcmp(name, "lzo-rle")) {
+ } else if (!strcmp(name, "lzo-rle")) {
#ifdef CONFIG_F2FS_FS_LZORLE
- F2FS_OPTION(sbi).compress_level = 0;
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZORLE;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lzorle compression");
+ f2fs_info(NULL, "kernel doesn't support lzorle compression");
#endif
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
+ } else
+ return -EINVAL;
+ break;
+ case Opt_compress_log_size:
+ if (result.uint_32 < MIN_COMPRESS_LOG_SIZE ||
+ result.uint_32 > MAX_COMPRESS_LOG_SIZE) {
+ f2fs_err(NULL,
+ "Compress cluster log size is out of range");
+ return -EINVAL;
+ }
+ F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_compress_log_size;
+ break;
+ case Opt_compress_extension:
+ name = param->string;
+ ext = F2FS_CTX_INFO(ctx).extensions;
+ ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN ||
+ ext_cnt >= COMPRESS_EXT_NUM) {
+ f2fs_err(NULL, "invalid extension length/number");
+ return -EINVAL;
+ }
+
+ if (is_compress_extension_exist(&ctx->info, name, true))
break;
- case Opt_compress_log_size:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg < MIN_COMPRESS_LOG_SIZE ||
- arg > MAX_COMPRESS_LOG_SIZE) {
- f2fs_err(sbi,
- "Compress cluster log size is out of range");
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_log_size = arg;
+
+ ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN);
+ if (ret < 0)
+ return ret;
+ F2FS_CTX_INFO(ctx).compress_ext_cnt++;
+ ctx->spec_mask |= F2FS_SPEC_compress_extension;
+ break;
+ case Opt_nocompress_extension:
+ name = param->string;
+ noext = F2FS_CTX_INFO(ctx).noextensions;
+ noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN ||
+ noext_cnt >= COMPRESS_EXT_NUM) {
+ f2fs_err(NULL, "invalid extension length/number");
+ return -EINVAL;
+ }
+
+ if (is_compress_extension_exist(&ctx->info, name, false))
break;
- case Opt_compress_extension:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN);
+ if (ret < 0)
+ return ret;
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt++;
+ ctx->spec_mask |= F2FS_SPEC_nocompress_extension;
+ break;
+ case Opt_compress_chksum:
+ F2FS_CTX_INFO(ctx).compress_chksum = true;
+ ctx->spec_mask |= F2FS_SPEC_compress_chksum;
+ break;
+ case Opt_compress_mode:
+ F2FS_CTX_INFO(ctx).compress_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_compress_mode;
+ break;
+ case Opt_compress_cache:
+ ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE);
+ break;
+#else
+ case Opt_compress_algorithm:
+ case Opt_compress_log_size:
+ case Opt_compress_extension:
+ case Opt_nocompress_extension:
+ case Opt_compress_chksum:
+ case Opt_compress_mode:
+ case Opt_compress_cache:
+ f2fs_info(NULL, "compression options not supported");
+ break;
+#endif
+ case Opt_atgc:
+ ctx_set_opt(ctx, F2FS_MOUNT_ATGC);
+ break;
+ case Opt_gc_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE);
+ break;
+ case Opt_discard_unit:
+ F2FS_CTX_INFO(ctx).discard_unit = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_discard_unit;
+ break;
+ case Opt_memory_mode:
+ F2FS_CTX_INFO(ctx).memory_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_memory_mode;
+ break;
+ case Opt_age_extent_cache:
+ ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE);
+ break;
+ case Opt_errors:
+ F2FS_CTX_INFO(ctx).errors = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_errors;
+ break;
+ case Opt_nat_bits:
+ ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS);
+ break;
+ }
+ return 0;
+}
- if (strlen(name) >= F2FS_EXTENSION_LEN ||
- ext_cnt >= COMPRESS_EXT_NUM) {
- f2fs_err(sbi,
- "invalid extension length/number");
- kfree(name);
- return -EINVAL;
- }
+/*
+ * Check quota settings consistency.
+ */
+static int f2fs_check_quota_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ #ifdef CONFIG_QUOTA
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ bool quota_feature = f2fs_sb_has_quota_ino(sbi);
+ bool quota_turnon = sb_any_quota_loaded(sb);
+ char *old_qname, *new_qname;
+ bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota;
+ int i;
- if (is_compress_extension_exist(sbi, name, true)) {
- kfree(name);
- break;
- }
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) &&
+ !f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
+ return -EINVAL;
+ }
- ret = strscpy(ext[ext_cnt], name);
- if (ret < 0) {
- kfree(name);
- return ret;
- }
- F2FS_OPTION(sbi).compress_ext_cnt++;
- kfree(name);
- break;
- case Opt_nocompress_extension:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
+ if (ctx->qname_mask) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (!(ctx->qname_mask & (1 << i)))
+ continue;
- noext = F2FS_OPTION(sbi).noextensions;
- noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ old_qname = F2FS_OPTION(sbi).s_qf_names[i];
+ new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i];
+ if (quota_turnon &&
+ !!old_qname != !!new_qname)
+ goto err_jquota_change;
- if (strlen(name) >= F2FS_EXTENSION_LEN ||
- noext_cnt >= COMPRESS_EXT_NUM) {
- f2fs_err(sbi,
- "invalid extension length/number");
- kfree(name);
- return -EINVAL;
+ if (old_qname) {
+ if (strcmp(old_qname, new_qname) == 0) {
+ ctx->qname_mask &= ~(1 << i);
+ continue;
+ }
+ goto err_jquota_specified;
}
- if (is_compress_extension_exist(sbi, name, false)) {
- kfree(name);
- break;
+ if (quota_feature) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
+ ctx->qname_mask &= ~(1 << i);
+ kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]);
+ F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL;
}
+ }
+ }
+
+ /* Make sure we don't mix old and new quota format */
+ usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA];
+ grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA];
+ prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA];
+ usrquota = test_opt(sbi, USRQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ grpquota = test_opt(sbi, GRPQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ prjquota = test_opt(sbi, PRJQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+
+ if (usr_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ usrquota = false;
+ }
+ if (grp_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ grpquota = false;
+ }
+ if (prj_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ prjquota = false;
+ }
+ if (usr_qf_name || grp_qf_name || prj_qf_name) {
+ if (grpquota || usrquota || prjquota) {
+ f2fs_err(sbi, "old and new quota format mixing");
+ return -EINVAL;
+ }
+ if (!(ctx->spec_mask & F2FS_SPEC_jqfmt ||
+ F2FS_OPTION(sbi).s_jquota_fmt)) {
+ f2fs_err(sbi, "journaled quota format not specified");
+ return -EINVAL;
+ }
+ }
+ return 0;
+
+err_jquota_change:
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
+ return -EINVAL;
+err_jquota_specified:
+ f2fs_err(sbi, "%s quota file already specified",
+ QTYPE2NAME(i));
+ return -EINVAL;
- ret = strscpy(noext[noext_cnt], name);
- if (ret < 0) {
- kfree(name);
- return ret;
- }
- F2FS_OPTION(sbi).nocompress_ext_cnt++;
- kfree(name);
- break;
- case Opt_compress_chksum:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- F2FS_OPTION(sbi).compress_chksum = true;
- break;
- case Opt_compress_mode:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "fs")) {
- F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
- } else if (!strcmp(name, "user")) {
- F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_compress_cache:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- set_opt(sbi, COMPRESS_CACHE);
- break;
#else
- case Opt_compress_algorithm:
- case Opt_compress_log_size:
- case Opt_compress_extension:
- case Opt_nocompress_extension:
- case Opt_compress_chksum:
- case Opt_compress_mode:
- case Opt_compress_cache:
- f2fs_info(sbi, "compression options not supported");
- break;
+ if (f2fs_readonly(sbi->sb))
+ return 0;
+ if (f2fs_sb_has_quota_ino(sbi)) {
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+
+ return 0;
#endif
- case Opt_atgc:
- set_opt(sbi, ATGC);
- break;
- case Opt_gc_merge:
- set_opt(sbi, GC_MERGE);
- break;
- case Opt_nogc_merge:
- clear_opt(sbi, GC_MERGE);
- break;
- case Opt_discard_unit:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "block")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_BLOCK;
- } else if (!strcmp(name, "segment")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SEGMENT;
- } else if (!strcmp(name, "section")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SECTION;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_memory_mode:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "normal")) {
- F2FS_OPTION(sbi).memory_mode =
- MEMORY_MODE_NORMAL;
- } else if (!strcmp(name, "low")) {
- F2FS_OPTION(sbi).memory_mode =
- MEMORY_MODE_LOW;
- } else {
- kfree(name);
- return -EINVAL;
+}
+
+static int f2fs_check_test_dummy_encryption(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy))
+ return 0;
+
+ if (!f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi, "Encrypt feature is off");
+ return -EINVAL;
+ }
+
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy,
+ &F2FS_CTX_INFO(ctx).dummy_enc_policy))
+ return 0;
+ f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static inline bool test_compression_spec(unsigned int mask)
+{
+ return mask & (F2FS_SPEC_compress_algorithm
+ | F2FS_SPEC_compress_log_size
+ | F2FS_SPEC_compress_extension
+ | F2FS_SPEC_nocompress_extension
+ | F2FS_SPEC_compress_chksum
+ | F2FS_SPEC_compress_mode);
+}
+
+static inline void clear_compression_spec(struct f2fs_fs_context *ctx)
+{
+ ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm
+ | F2FS_SPEC_compress_log_size
+ | F2FS_SPEC_compress_extension
+ | F2FS_SPEC_nocompress_extension
+ | F2FS_SPEC_compress_chksum
+ | F2FS_SPEC_compress_mode);
+}
+
+static int f2fs_check_compression(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i, cnt;
+
+ if (!f2fs_sb_has_compression(sbi)) {
+ if (test_compression_spec(ctx->spec_mask) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE))
+ f2fs_info(sbi, "Image doesn't support compression");
+ clear_compression_spec(ctx);
+ ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE;
+ return 0;
+ }
+ if (ctx->spec_mask & F2FS_SPEC_compress_extension) {
+ cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+ for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) {
+ if (is_compress_extension_exist(&F2FS_OPTION(sbi),
+ F2FS_CTX_INFO(ctx).extensions[i], true)) {
+ F2FS_CTX_INFO(ctx).extensions[i][0] = '\0';
+ cnt--;
}
- kfree(name);
- break;
- case Opt_age_extent_cache:
- set_opt(sbi, AGE_EXTENT_CACHE);
- break;
- case Opt_errors:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "remount-ro")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_READONLY;
- } else if (!strcmp(name, "continue")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_CONTINUE;
- } else if (!strcmp(name, "panic")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_PANIC;
- } else {
- kfree(name);
- return -EINVAL;
+ }
+ if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) {
+ f2fs_err(sbi, "invalid extension length/number");
+ return -EINVAL;
+ }
+ }
+ if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) {
+ cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+ for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) {
+ if (is_compress_extension_exist(&F2FS_OPTION(sbi),
+ F2FS_CTX_INFO(ctx).noextensions[i], false)) {
+ F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0';
+ cnt--;
}
- kfree(name);
- break;
- case Opt_nat_bits:
- set_opt(sbi, NAT_BITS);
- break;
- default:
- f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
- p);
+ }
+ if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) {
+ f2fs_err(sbi, "invalid noextension length/number");
return -EINVAL;
}
}
+
+ if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions,
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt,
+ F2FS_CTX_INFO(ctx).extensions,
+ F2FS_CTX_INFO(ctx).compress_ext_cnt)) {
+ f2fs_err(sbi, "new noextensions conflicts with new extensions");
+ return -EINVAL;
+ }
+ if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions,
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt,
+ F2FS_OPTION(sbi).extensions,
+ F2FS_OPTION(sbi).compress_ext_cnt)) {
+ f2fs_err(sbi, "new noextensions conflicts with old extensions");
+ return -EINVAL;
+ }
+ if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions,
+ F2FS_OPTION(sbi).nocompress_ext_cnt,
+ F2FS_CTX_INFO(ctx).extensions,
+ F2FS_CTX_INFO(ctx).compress_ext_cnt)) {
+ f2fs_err(sbi, "new extensions conflicts with old noextensions");
+ return -EINVAL;
+ }
+#endif
return 0;
}
-static int f2fs_default_check(struct f2fs_sb_info *sbi)
+static int f2fs_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb)
{
-#ifdef CONFIG_QUOTA
- if (f2fs_check_quota_options(sbi))
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int err;
+
+ if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb))
return -EINVAL;
-#else
- if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+
+ if (f2fs_hw_should_discard(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_DISCARD) &&
+ !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) {
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
- if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+
+ if (!f2fs_hw_support_discard(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_DISCARD) &&
+ ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) {
+ f2fs_warn(sbi, "device does not support discard");
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD);
+ ctx->opt_mask &= ~F2FS_MOUNT_DISCARD;
+ }
+
+ if (f2fs_sb_has_device_alias(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) &&
+ !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
return -EINVAL;
}
-#endif
+
+ if (test_opt(sbi, RESERVE_ROOT) &&
+ (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) &&
+ ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) {
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
+ ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
+ ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
+ }
+
+ err = f2fs_check_test_dummy_encryption(fc, sb);
+ if (err)
+ return err;
+
+ err = f2fs_check_compression(fc, sb);
+ if (err)
+ return err;
+
+ err = f2fs_check_quota_consistency(fc, sb);
+ if (err)
+ return err;
if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
@@ -1354,15 +1449,19 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
* devices, but mandatory for host-managed zoned block devices.
*/
if (f2fs_sb_has_blkzoned(sbi)) {
+ if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) {
+ f2fs_warn(sbi, "zoned devices need bggc");
+ return -EINVAL;
+ }
#ifdef CONFIG_BLK_DEV_ZONED
- if (F2FS_OPTION(sbi).discard_unit !=
- DISCARD_UNIT_SECTION) {
+ if ((ctx->spec_mask & F2FS_SPEC_discard_unit) &&
+ F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) {
f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SECTION;
+ F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION;
}
- if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) {
+ if ((ctx->spec_mask & F2FS_SPEC_mode) &&
+ F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) {
f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature");
return -EINVAL;
}
@@ -1372,43 +1471,25 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
#endif
}
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (f2fs_test_compress_extension(sbi)) {
- f2fs_err(sbi, "invalid compress or nocompress extension");
- return -EINVAL;
- }
-#endif
-
- if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- int min_size, max_size;
-
+ if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) {
if (!f2fs_sb_has_extra_attr(sbi) ||
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
- if (!test_opt(sbi, INLINE_XATTR)) {
+ if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) {
f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
-
- min_size = MIN_INLINE_XATTR_SIZE;
- max_size = MAX_INLINE_XATTR_SIZE;
-
- if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
- F2FS_OPTION(sbi).inline_xattr_size > max_size) {
- f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
- min_size, max_size);
- return -EINVAL;
- }
}
- if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
+ if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) &&
+ F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) {
f2fs_err(sbi, "LFS is not compatible with ATGC");
return -EINVAL;
}
- if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) {
+ if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) {
f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode");
return -EINVAL;
}
@@ -1417,12 +1498,190 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
}
+ return 0;
+}
+
+static void f2fs_apply_quota_options(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ bool quota_feature = f2fs_sb_has_quota_ino(sbi);
+ char *qname;
+ int i;
+
+ if (quota_feature)
+ return;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (!(ctx->qname_mask & (1 << i)))
+ continue;
+
+ qname = F2FS_CTX_INFO(ctx).s_qf_names[i];
+ if (qname) {
+ qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i],
+ GFP_KERNEL | __GFP_NOFAIL);
+ set_opt(sbi, QUOTA);
+ }
+ F2FS_OPTION(sbi).s_qf_names[i] = qname;
+ }
+
+ if (ctx->spec_mask & F2FS_SPEC_jqfmt)
+ F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt;
+
+ if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
+ F2FS_OPTION(sbi).s_jquota_fmt = 0;
+ }
+#endif
+}
+
+static void f2fs_apply_test_dummy_encryption(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy))
+ return;
+ swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy);
+ f2fs_warn(sbi, "Test dummy encryption mode enabled");
+}
- if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) {
- f2fs_err(sbi, "norecovery requires readonly mount");
+static void f2fs_apply_compression(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN];
+ unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN];
+ int ctx_cnt, sbi_cnt, i;
+
+ if (ctx->spec_mask & F2FS_SPEC_compress_level)
+ F2FS_OPTION(sbi).compress_level =
+ F2FS_CTX_INFO(ctx).compress_level;
+ if (ctx->spec_mask & F2FS_SPEC_compress_algorithm)
+ F2FS_OPTION(sbi).compress_algorithm =
+ F2FS_CTX_INFO(ctx).compress_algorithm;
+ if (ctx->spec_mask & F2FS_SPEC_compress_log_size)
+ F2FS_OPTION(sbi).compress_log_size =
+ F2FS_CTX_INFO(ctx).compress_log_size;
+ if (ctx->spec_mask & F2FS_SPEC_compress_chksum)
+ F2FS_OPTION(sbi).compress_chksum =
+ F2FS_CTX_INFO(ctx).compress_chksum;
+ if (ctx->spec_mask & F2FS_SPEC_compress_mode)
+ F2FS_OPTION(sbi).compress_mode =
+ F2FS_CTX_INFO(ctx).compress_mode;
+ if (ctx->spec_mask & F2FS_SPEC_compress_extension) {
+ ctx_ext = F2FS_CTX_INFO(ctx).extensions;
+ ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+ sbi_ext = F2FS_OPTION(sbi).extensions;
+ sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ for (i = 0; i < ctx_cnt; i++) {
+ if (strlen(ctx_ext[i]) == 0)
+ continue;
+ strscpy(sbi_ext[sbi_cnt], ctx_ext[i]);
+ sbi_cnt++;
+ }
+ F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt;
+ }
+ if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) {
+ ctx_ext = F2FS_CTX_INFO(ctx).noextensions;
+ ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+ sbi_ext = F2FS_OPTION(sbi).noextensions;
+ sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ for (i = 0; i < ctx_cnt; i++) {
+ if (strlen(ctx_ext[i]) == 0)
+ continue;
+ strscpy(sbi_ext[sbi_cnt], ctx_ext[i]);
+ sbi_cnt++;
+ }
+ F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt;
+ }
+#endif
+}
+
+static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ F2FS_OPTION(sbi).opt &= ~ctx->opt_mask;
+ F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt;
+
+ if (ctx->spec_mask & F2FS_SPEC_background_gc)
+ F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode;
+ if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size)
+ F2FS_OPTION(sbi).inline_xattr_size =
+ F2FS_CTX_INFO(ctx).inline_xattr_size;
+ if (ctx->spec_mask & F2FS_SPEC_active_logs)
+ F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs;
+ if (ctx->spec_mask & F2FS_SPEC_reserve_root)
+ F2FS_OPTION(sbi).root_reserved_blocks =
+ F2FS_CTX_INFO(ctx).root_reserved_blocks;
+ if (ctx->spec_mask & F2FS_SPEC_resgid)
+ F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
+ if (ctx->spec_mask & F2FS_SPEC_resuid)
+ F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid;
+ if (ctx->spec_mask & F2FS_SPEC_mode)
+ F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (ctx->spec_mask & F2FS_SPEC_fault_injection)
+ (void)f2fs_build_fault_attr(sbi,
+ F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE);
+ if (ctx->spec_mask & F2FS_SPEC_fault_type)
+ (void)f2fs_build_fault_attr(sbi, 0,
+ F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE);
+#endif
+ if (ctx->spec_mask & F2FS_SPEC_alloc_mode)
+ F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode;
+ if (ctx->spec_mask & F2FS_SPEC_fsync_mode)
+ F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode;
+ if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap)
+ F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap;
+ if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc)
+ F2FS_OPTION(sbi).unusable_cap_perc =
+ F2FS_CTX_INFO(ctx).unusable_cap_perc;
+ if (ctx->spec_mask & F2FS_SPEC_discard_unit)
+ F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit;
+ if (ctx->spec_mask & F2FS_SPEC_memory_mode)
+ F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode;
+ if (ctx->spec_mask & F2FS_SPEC_errors)
+ F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors;
+
+ f2fs_apply_compression(fc, sb);
+ f2fs_apply_test_dummy_encryption(fc, sb);
+ f2fs_apply_quota_options(fc, sb);
+}
+
+static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount)
+{
+ if (f2fs_sb_has_device_alias(sbi) &&
+ !test_opt(sbi, READ_EXTENT_CACHE)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
return -EINVAL;
}
+ if (!remount)
+ return 0;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
+#endif
+ if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) {
+ f2fs_warn(sbi, "LFS is not compatible with IPU");
+ return -EINVAL;
+ }
return 0;
}
@@ -1442,6 +1701,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
atomic_set(&fi->i_compr_blocks, 0);
+ atomic_set(&fi->open_count, 0);
init_f2fs_rwsem(&fi->i_sem);
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
@@ -1718,7 +1978,7 @@ static void f2fs_put_super(struct super_block *sb)
destroy_percpu_info(sbi);
f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kvfree(sbi->write_io[i]);
+ kfree(sbi->write_io[i]);
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
@@ -2329,11 +2589,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
f2fs_flush_ckpt_thread(sbi);
}
-static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
unsigned long old_sb_flags;
+ unsigned int flags = fc->sb_flags;
int err;
bool need_restart_gc = false, need_stop_gc = false;
bool need_restart_flush = false, need_stop_flush = false;
@@ -2379,7 +2640,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
#endif
/* recover superblocks we couldn't write due to previous RO mount */
- if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
+ if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
err);
@@ -2389,23 +2650,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
default_options(sbi, true);
- /* parse mount options */
- err = parse_options(sbi, data, true);
+ err = f2fs_check_opt_consistency(fc, sb);
if (err)
goto restore_opts;
-#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) &&
- sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
- f2fs_err(sbi,
- "zoned: max open zones %u is too small, need at least %u open zones",
- sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
- err = -EINVAL;
- goto restore_opts;
- }
-#endif
+ f2fs_apply_options(fc, sb);
- err = f2fs_default_check(sbi);
+ err = f2fs_sanity_check_options(sbi, true);
if (err)
goto restore_opts;
@@ -2416,20 +2667,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
*/
- if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
+ if (f2fs_readonly(sb) && (flags & SB_RDONLY))
goto skip;
- if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) {
+ if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) {
err = -EROFS;
goto restore_opts;
}
#ifdef CONFIG_QUOTA
- if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) {
+ if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) {
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
- } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
+ } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) {
/* dquot_resume needs RW */
sb->s_flags &= ~SB_RDONLY;
if (sb_any_quota_suspended(sb)) {
@@ -2441,12 +2692,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
- if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) {
- err = -EINVAL;
- f2fs_warn(sbi, "LFS is not compatible with IPU");
- goto restore_opts;
- }
-
/* disallow enable atgc dynamically */
if (no_atgc == !!test_opt(sbi, ATGC)) {
err = -EINVAL;
@@ -2485,7 +2730,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+ if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
@@ -2496,7 +2741,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if background_gc = off is passed in mount
* option. Also sync the filesystem.
*/
- if ((*flags & SB_RDONLY) ||
+ if ((flags & SB_RDONLY) ||
(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
!test_opt(sbi, GC_MERGE))) {
if (sbi->gc_thread) {
@@ -2510,7 +2755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & SB_RDONLY) {
+ if (flags & SB_RDONLY) {
sync_inodes_sb(sb);
set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -2523,7 +2768,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
*/
- if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
need_restart_flush = true;
@@ -2565,11 +2810,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* triggered while remount and we need to take care of it before
* returning from remount.
*/
- if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
!test_opt(sbi, MERGE_CHECKPOINT)) {
f2fs_stop_ckpt_thread(sbi);
} else {
- /* Flush if the prevous checkpoint, if exists. */
+ /* Flush if the previous checkpoint, if exists. */
f2fs_flush_ckpt_thread(sbi);
err = f2fs_start_ckpt_thread(sbi);
@@ -2592,7 +2837,7 @@ skip:
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
- *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
+ fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
sbi->umount_lock_holder = NULL;
return 0;
@@ -3263,7 +3508,6 @@ static const struct super_operations f2fs_sops = {
.freeze_fs = f2fs_freeze,
.unfreeze_fs = f2fs_unfreeze,
.statfs = f2fs_statfs,
- .remount_fs = f2fs_remount,
.shutdown = f2fs_shutdown,
};
@@ -3451,6 +3695,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio,
f2fs_bug_on(sbi, 1);
ret = submit_bio_wait(bio);
+ bio_put(bio);
folio_end_writeback(folio);
return ret;
@@ -4522,14 +4767,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sbi->readdir_ra = true;
}
-static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
+static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct f2fs_fs_context *ctx = fc->fs_private;
struct f2fs_sb_info *sbi;
struct f2fs_super_block *raw_super;
struct inode *root;
int err;
bool skip_recovery = false, need_fsck = false;
- char *options = NULL;
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
int retry_cnt = 1;
@@ -4592,18 +4837,14 @@ try_onemore:
sizeof(raw_super->uuid));
default_options(sbi, false);
- /* parse mount options */
- options = kstrdup((const char *)data, GFP_KERNEL);
- if (data && !options) {
- err = -ENOMEM;
- goto free_sb_buf;
- }
- err = parse_options(sbi, options, false);
+ err = f2fs_check_opt_consistency(fc, sb);
if (err)
- goto free_options;
+ goto free_sb_buf;
+
+ f2fs_apply_options(fc, sb);
- err = f2fs_default_check(sbi);
+ err = f2fs_sanity_check_options(sbi, false);
if (err)
goto free_options;
@@ -4770,6 +5011,10 @@ try_onemore:
/* get segno of first zoned block device */
sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi);
+ sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ?
+ ZONED_PIN_SEC_REQUIRED_COUNT :
+ GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi));
+
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
if (__exist_node_summaries(sbi))
@@ -4930,7 +5175,6 @@ reset_checkpoint:
if (err)
goto sync_free_meta;
}
- kvfree(options);
/* recover broken superblock */
if (recovery) {
@@ -5013,7 +5257,7 @@ free_iostat:
f2fs_destroy_iostat(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kvfree(sbi->write_io[i]);
+ kfree(sbi->write_io[i]);
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
@@ -5024,8 +5268,8 @@ free_options:
for (i = 0; i < MAXQUOTAS; i++)
kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
- kvfree(options);
+ /* no need to free dummy_enc_policy, we just keep it in ctx when failed */
+ swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy);
free_sb_buf:
kfree(raw_super);
free_sbi:
@@ -5041,12 +5285,39 @@ free_sbi:
return err;
}
-static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+static int f2fs_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
+ return get_tree_bdev(fc, f2fs_fill_super);
}
+static int f2fs_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+
+ return __f2fs_remount(fc, sb);
+}
+
+static void f2fs_fc_free(struct fs_context *fc)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+
+ if (!ctx)
+ return;
+
+#ifdef CONFIG_QUOTA
+ f2fs_unnote_qf_name_all(fc);
+#endif
+ fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations f2fs_context_ops = {
+ .parse_param = f2fs_parse_param,
+ .get_tree = f2fs_get_tree,
+ .reconfigure = f2fs_reconfigure,
+ .free = f2fs_fc_free,
+};
+
static void kill_f2fs_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -5088,10 +5359,24 @@ static void kill_f2fs_super(struct super_block *sb)
}
}
+static int f2fs_init_fs_context(struct fs_context *fc)
+{
+ struct f2fs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->fs_private = ctx;
+ fc->ops = &f2fs_context_ops;
+
+ return 0;
+}
+
static struct file_system_type f2fs_fs_type = {
.owner = THIS_MODULE,
.name = "f2fs",
- .mount = f2fs_mount,
+ .init_fs_context = f2fs_init_fs_context,
.kill_sb = kill_f2fs_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 75134d69a0bd..f736052dea50 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -628,6 +628,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -824,6 +845,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "reserved_pin_section")) {
+ if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) {
+ if (t < 1 || t > SEGS_PER_SEC(sbi))
+ return -EINVAL;
+ sbi->gc_thread->boost_gc_multiple = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) {
+ if (t > GC_GREEDY)
+ return -EINVAL;
+ sbi->gc_thread->boost_gc_greedy = (unsigned int)t;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -1050,6 +1092,8 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent);
GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent);
GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio);
+GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple);
+GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy);
/* SM_INFO ATTR */
SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments);
@@ -1130,6 +1174,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec);
F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
#endif
F2FS_SBI_GENERAL_RW_ATTR(carve_out);
+F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
/* STAT_INFO ATTR */
#ifdef CONFIG_F2FS_STAT_FS
@@ -1220,6 +1265,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_no_zoned_gc_percent),
ATTR_LIST(gc_boost_zoned_gc_percent),
ATTR_LIST(gc_valid_thresh_ratio),
+ ATTR_LIST(gc_boost_gc_multiple),
+ ATTR_LIST(gc_boost_gc_greedy),
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
@@ -1323,6 +1370,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(last_age_weight),
ATTR_LIST(max_read_extent_count),
ATTR_LIST(carve_out),
+ ATTR_LIST(reserved_pin_section),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 1db348f8f887..a7061c2ad8e4 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -356,7 +356,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
if (!fat_valid_entry(sbi, entry)) {
fatent_brelse(fatent);
- fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
+ fat_fs_error_ratelimit(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index c7a2d27120ba..950da09f0961 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -158,9 +158,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
mark_inode_dirty(inode);
}
if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
- fat_fs_error(sb, "clusters badly computed (%d != %llu)",
- new_fclus,
- (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
+ fat_fs_error_ratelimit(
+ sb, "clusters badly computed (%d != %llu)", new_fclus,
+ (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
fat_cache_inval_inode(inode);
}
inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 47189476b553..5d6edafbed20 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
/* limit length to what the device mapping allows */
end = disk_addr + *len;
- if (end >= map->start + map->len)
- *len = map->start + map->len - disk_addr;
+ if (end >= map->disk_offset + map->len)
+ *len = map->disk_offset + map->len - disk_addr;
retry:
if (!bio) {
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index cab8809f0e0f..44306ac22353 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -257,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
+ u64 disk_chunk;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
- div_u64_rem(chunk, dev->nr_children, &chunk_idx);
+ disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
@@ -273,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
- disk_offset = div_u64(offset, dev->nr_children);
+ disk_offset = disk_chunk * dev->chunk_size;
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 8f7cff7a4293..315949a7e92d 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -520,10 +521,71 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
}
-static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+/**
+ * ext_tree_try_encode_commit - try to encode all extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid
+ */
+static int
+ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count, __u64 *lastbyte)
{
struct pnfs_block_extent *be;
+
+ spin_lock(&bl->bl_ext_lock);
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ (*count)++;
+ if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
+ spin_unlock(&bl->bl_ext_lock);
+ return -ENOSPC;
+ }
+ }
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ if (bl->bl_scsi_layout)
+ p = encode_scsi_range(be, p);
+ else
+ p = encode_block_extent(be, p);
+ be->be_tag = EXTENT_COMMITTING;
+ }
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ spin_unlock(&bl->bl_ext_lock);
+
+ return 0;
+}
+
+/**
+ * ext_tree_encode_commit - encode as much as possible extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid
+ */
+static int
+ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+ size_t buffer_size, size_t *count, __u64 *lastbyte)
+{
+ struct pnfs_block_extent *be, *be_prev;
int ret = 0;
spin_lock(&bl->bl_ext_lock);
@@ -534,9 +596,9 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
(*count)++;
if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
- /* keep counting.. */
+ (*count)--;
ret = -ENOSPC;
- continue;
+ break;
}
if (bl->bl_scsi_layout)
@@ -544,14 +606,30 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
else
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
+ be_prev = be;
+ }
+ if (!ret) {
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ } else {
+ *lastbyte = be_prev->be_f_offset + be_prev->be_length;
+ *lastbyte <<= SECTOR_SHIFT;
+ *lastbyte -= 1;
}
- *lastbyte = bl->bl_lwb - 1;
- bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock);
return ret;
}
+/**
+ * ext_tree_prepare_commit - encode extents that need to be committed
+ * @arg: layout commit data
+ *
+ * Return values:
+ * %0: Success, all required extents are encoded
+ * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
+ * %-ENOMEM: Out of memory, extents not encoded
+ */
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
@@ -560,20 +638,18 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
__be32 *start_p;
int ret;
- dprintk("%s enter\n", __func__);
-
arg->layoutupdate_page = alloc_page(GFP_NOFS);
if (!arg->layoutupdate_page)
return -ENOMEM;
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
-retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
+ ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(bl, count);
+ buffer_size = NFS_SERVER(arg->inode)->wsize;
count = 0;
arg->layoutupdate_pages =
@@ -588,7 +664,8 @@ retry:
return -ENOMEM;
}
- goto retry;
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
}
*start_p = cpu_to_be32(count);
@@ -607,8 +684,9 @@ retry:
}
}
- dprintk("%s found %zu ranges\n", __func__, count);
- return 0;
+ trace_bl_ext_tree_prepare_commit(ret, count,
+ arg->lastbytewritten, !!ret);
+ return ret;
}
void
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index cf35ad3f818a..8fb4a950dd55 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -682,6 +682,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
}
EXPORT_SYMBOL_GPL(nfs_init_client);
+static void nfs4_server_set_init_caps(struct nfs_server *server)
+{
+#if IS_ENABLED(CONFIG_NFS_V4)
+ /* Set the basic capabilities */
+ server->caps = server->nfs_client->cl_mvops->init_caps;
+ if (server->flags & NFS_MOUNT_NORDIRPLUS)
+ server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
+
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping &&
+ server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+#endif
+}
+
+void nfs_server_set_init_caps(struct nfs_server *server)
+{
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ break;
+ case 3:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ if (!(server->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ break;
+ default:
+ nfs4_server_set_init_caps(server);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_server_set_init_caps);
+
/*
* Create a version 2 or 3 client
*/
@@ -726,7 +764,6 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
switch (clp->rpc_ops->version) {
case 2:
@@ -762,6 +799,8 @@ static int nfs_init_server(struct nfs_server *server,
if (error < 0)
goto error;
+ nfs_server_set_init_caps(server);
+
/* Preserve the values of mount_server-related mount options */
if (ctx->mount_server.addrlen) {
memcpy(&server->mountd_address, &ctx->mount_server.address,
@@ -814,7 +853,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wsize = max_rpc_payload;
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
- server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
@@ -831,7 +869,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->maxfilesize = fsinfo->maxfilesize;
- server->time_delta = fsinfo->time_delta;
server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
@@ -936,7 +973,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
- target->caps = source->caps;
target->options = source->options;
target->auth_info = source->auth_info;
target->port = source->port;
@@ -1007,6 +1043,7 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->ss_src_copies);
atomic_set(&server->active, 0);
+ atomic_long_set(&server->nr_active_delegations, 0);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
@@ -1170,6 +1207,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (error < 0)
goto out_free_server;
+ nfs_server_set_init_caps(server);
+
/* probe the filesystem info for this server filesystem */
error = nfs_probe_server(server, fh);
if (error < 0)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 10ef46e29b25..9d3a5f29f17f 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,8 +27,15 @@
#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
-static atomic_long_t nfs_active_delegations;
static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+
+static struct hlist_head *nfs_delegation_hash(struct nfs_server *server,
+ const struct nfs_fh *fhandle)
+{
+ return server->delegation_hash_table +
+ (nfs_fhandle_hash(fhandle) & server->delegation_hash_mask);
+}
static void __nfs_free_delegation(struct nfs_delegation *delegation)
{
@@ -37,11 +44,12 @@ static void __nfs_free_delegation(struct nfs_delegation *delegation)
kfree_rcu(delegation, rcu);
}
-static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+static void nfs_mark_delegation_revoked(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
- atomic_long_dec(&nfs_active_delegations);
+ atomic_long_dec(&server->nr_active_delegations);
if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
nfs_clear_verifier_delegated(delegation->inode);
}
@@ -59,9 +67,10 @@ static void nfs_put_delegation(struct nfs_delegation *delegation)
__nfs_free_delegation(delegation);
}
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+static void nfs_free_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(server, delegation);
nfs_put_delegation(delegation);
}
@@ -237,34 +246,34 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (delegation != NULL) {
- spin_lock(&delegation->lock);
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- switch (deleg_type) {
- case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
- case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
- set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
- break;
- default:
- clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
- }
- clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
- if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
- &delegation->flags))
- atomic_long_inc(&nfs_active_delegations);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- } else {
+ if (!delegation) {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
pagemod_limit, deleg_type);
+ return;
}
+
+ spin_lock(&delegation->lock);
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ break;
+ default:
+ clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ }
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ atomic_long_inc(&NFS_SERVER(inode)->nr_active_delegations);
+ spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
}
static int nfs_do_return_delegation(struct inode *inode,
@@ -355,6 +364,8 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
+ trace_nfs4_detach_delegation(&nfsi->vfs_inode, delegation->type);
+
if (deleg_cur == NULL || delegation != deleg_cur)
return NULL;
@@ -363,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
spin_unlock(&delegation->lock);
return NULL;
}
+ hlist_del_init_rcu(&delegation->hash);
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@@ -410,7 +422,8 @@ nfs_update_delegation_cred(struct nfs_delegation *delegation,
}
static void
-nfs_update_inplace_delegation(struct nfs_delegation *delegation,
+nfs_update_inplace_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation,
const struct nfs_delegation *update)
{
if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) {
@@ -423,7 +436,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
nfs_update_delegation_cred(delegation, update->cred);
/* smp_mb__before_atomic() is implicit due to xchg() */
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
}
}
}
@@ -478,7 +491,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
spin_lock(&old_delegation->lock);
- nfs_update_inplace_delegation(old_delegation,
+ nfs_update_inplace_delegation(server, old_delegation,
delegation);
spin_unlock(&old_delegation->lock);
goto out;
@@ -524,10 +537,12 @@ add_new:
spin_unlock(&inode->i_lock);
list_add_tail_rcu(&delegation->super_list, &server->delegations);
+ hlist_add_head_rcu(&delegation->hash,
+ nfs_delegation_hash(server, &NFS_I(inode)->fh));
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
trace_nfs4_set_delegation(inode, type);
@@ -541,7 +556,7 @@ out:
__nfs_free_delegation(delegation);
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
- nfs_free_delegation(freeme);
+ nfs_free_delegation(server, freeme);
}
return status;
}
@@ -592,6 +607,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
+ trace_nfs_delegation_need_return(delegation);
+
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
@@ -751,7 +768,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
- nfs_free_delegation(delegation);
+ nfs_free_delegation(NFS_SERVER(inode), delegation);
}
}
@@ -837,7 +854,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode)
if (!delegation)
goto out;
if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
- atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+ atomic_long_read(&NFS_SERVER(inode)->nr_active_delegations) >=
+ nfs_delegation_watermark) {
spin_lock(&delegation->lock);
if (delegation->inode &&
list_empty(&NFS_I(inode)->open_files) &&
@@ -1013,7 +1031,7 @@ static void nfs_revoke_delegation(struct inode *inode,
}
spin_unlock(&delegation->lock);
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
ret = true;
out:
rcu_read_unlock();
@@ -1045,7 +1063,7 @@ void nfs_delegation_mark_returned(struct inode *inode,
delegation->stateid.seqid = stateid->seqid;
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
spin_unlock(&delegation->lock);
if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode)))
@@ -1158,11 +1176,12 @@ static struct inode *
nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
+ struct hlist_head *head = nfs_delegation_hash(server, fhandle);
struct nfs_delegation *delegation;
struct super_block *freeme = NULL;
struct inode *res = NULL;
- list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ hlist_for_each_entry_rcu(delegation, head, hash) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
@@ -1265,7 +1284,7 @@ restart:
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
- nfs_free_delegation(delegation);
+ nfs_free_delegation(server, delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
@@ -1570,4 +1589,17 @@ out:
return ret;
}
-module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+int nfs4_delegation_hash_alloc(struct nfs_server *server)
+{
+ int delegation_buckets, i;
+
+ delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16);
+ server->delegation_hash_mask = delegation_buckets - 1;
+ server->delegation_hash_table = kmalloc_array(delegation_buckets,
+ sizeof(*server->delegation_hash_table), GFP_KERNEL);
+ if (!server->delegation_hash_table)
+ return -ENOMEM;
+ for (i = 0; i < delegation_buckets; i++)
+ INIT_HLIST_HEAD(&server->delegation_hash_table[i]);
+ return 0;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 8ff5ab9c5c25..08ec2e9c68a4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -14,6 +14,7 @@
* NFSv4 delegation
*/
struct nfs_delegation {
+ struct hlist_node hash;
struct list_head super_list;
const struct cred *cred;
struct inode *inode;
@@ -123,4 +124,6 @@ static inline int nfs_have_delegated_mtime(struct inode *inode)
NFS_DELEGATION_FLAG_TIME);
}
+int nfs4_delegation_hash_alloc(struct nfs_server *server);
+
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d0e0b435a843..d81217923936 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1828,9 +1828,7 @@ static void block_revalidate(struct dentry *dentry)
static void unblock_revalidate(struct dentry *dentry)
{
- /* store_release ensures wait_var_event() sees the update */
- smp_store_release(&dentry->d_fsdata, NULL);
- wake_up_var(&dentry->d_fsdata);
+ store_release_wake_up(&dentry->d_fsdata, NULL);
}
/*
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index e9c233b6fd20..a10dd5f9d078 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
{
struct nfs_fattr *fattr = NULL;
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
- size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ size_t fh_size = offsetof(struct nfs_fh, data);
const struct nfs_rpc_ops *rpc_ops;
struct dentry *dentry;
struct inode *inode;
- int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+ int len = EMBED_FH_OFF;
u32 *p = fid->raw;
int ret;
+ /* Initial check of bounds */
+ if (fh_len < len + XDR_QUADLEN(fh_size) ||
+ fh_len > XDR_QUADLEN(NFS_MAXFHSIZE))
+ return NULL;
+ /* Calculate embedded filehandle size */
+ fh_size += server_fh->size;
+ len += XDR_QUADLEN(fh_size);
/* NULL translates to ESTALE */
if (fh_len < len || fh_type != len)
return NULL;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 4bea008dbebd..8dc921d83538 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -762,14 +762,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
- struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds))
continue;
if (check_device &&
@@ -777,10 +777,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
continue;
*best_idx = idx;
- return ds;
+ break;
}
- return NULL;
+ return ds;
}
static struct nfs4_pnfs_ds *
@@ -942,7 +942,7 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
- if (!ds) {
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -1867,6 +1867,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
+ bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
@@ -1874,8 +1875,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1923,7 +1926,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1945,11 +1948,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
+ bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -2000,7 +2006,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -2043,7 +2049,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 656d5c50bbce..30365ec782bb 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
- struct nfs4_pnfs_ds *ds = NULL;
+ struct nfs4_pnfs_ds *ds;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
- int status;
+ int status = -EAGAIN;
if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
goto noconnect;
@@ -418,7 +418,7 @@ noconnect:
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;
+ ds = ERR_PTR(status);
out:
return ds;
}
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 13f71ca8c974..9e94d18448ff 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -96,6 +96,8 @@ enum nfs_param {
Opt_wsize,
Opt_write,
Opt_xprtsec,
+ Opt_cert_serial,
+ Opt_privkey_serial,
};
enum {
@@ -221,6 +223,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_enum ("write", Opt_write, nfs_param_enums_write),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_string("xprtsec", Opt_xprtsec),
+ fsparam_s32("cert_serial", Opt_cert_serial),
+ fsparam_s32("privkey_serial", Opt_privkey_serial),
{}
};
@@ -551,6 +555,32 @@ static int nfs_parse_version_string(struct fs_context *fc,
return 0;
}
+#ifdef CONFIG_KEYS
+static int nfs_tls_key_verify(key_serial_t key_id)
+{
+ struct key *key = key_lookup(key_id);
+ int error = 0;
+
+ if (IS_ERR(key)) {
+ pr_err("key id %08x not found\n", key_id);
+ return PTR_ERR(key);
+ }
+ if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
+ test_bit(KEY_FLAG_INVALIDATED, &key->flags)) {
+ pr_err("key id %08x revoked\n", key_id);
+ error = -EKEYREVOKED;
+ }
+
+ key_put(key);
+ return error;
+}
+#else
+static inline int nfs_tls_key_verify(key_serial_t key_id)
+{
+ return -ENOENT;
+}
+#endif /* CONFIG_KEYS */
+
/*
* Parse a single mount parameter.
*/
@@ -807,6 +837,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
if (ret < 0)
return ret;
break;
+ case Opt_cert_serial:
+ ret = nfs_tls_key_verify(result.int_32);
+ if (ret < 0)
+ return ret;
+ ctx->xprtsec.cert_serial = result.int_32;
+ break;
+ case Opt_privkey_serial:
+ ret = nfs_tls_key_verify(result.int_32);
+ if (ret < 0)
+ return ret;
+ ctx->xprtsec.privkey_serial = result.int_32;
+ break;
case Opt_proto:
if (!param->string)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a2fa6bc4d74e..338ef77ae423 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -197,6 +197,7 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~(NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_BTIME |
NFS_INO_INVALID_XATTR);
flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
}
@@ -522,6 +523,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode_set_atime(inode, 0, 0);
inode_set_mtime(inode, 0, 0);
inode_set_ctime(inode, 0, 0);
+ memset(&nfsi->btime, 0, sizeof(nfsi->btime));
inode_set_iversion_raw(inode, 0);
inode->i_size = 0;
clear_nlink(inode);
@@ -545,6 +547,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode_set_ctime_to_ts(inode, fattr->ctime);
else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
+ if (fattr->valid & NFS_ATTR_FATTR_BTIME)
+ nfsi->btime = fattr->btime;
+ else if (fattr_supported & NFS_ATTR_FATTR_BTIME)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr);
else
@@ -931,6 +937,7 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
static u32 nfs_get_valid_attrmask(struct inode *inode)
{
+ u64 fattr_valid = NFS_SERVER(inode)->fattr_valid;
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
u32 reply_mask = STATX_INO | STATX_TYPE;
@@ -950,6 +957,9 @@ static u32 nfs_get_valid_attrmask(struct inode *inode)
reply_mask |= STATX_UID | STATX_GID;
if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
reply_mask |= STATX_BLOCKS;
+ if (!(cache_validity & NFS_INO_INVALID_BTIME) &&
+ (fattr_valid & NFS_ATTR_FATTR_BTIME))
+ reply_mask |= STATX_BTIME;
if (!(cache_validity & NFS_INO_INVALID_CHANGE))
reply_mask |= STATX_CHANGE_COOKIE;
return reply_mask;
@@ -960,6 +970,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
{
struct inode *inode = d_inode(path->dentry);
struct nfs_server *server = NFS_SERVER(inode);
+ u64 fattr_valid = server->fattr_valid;
unsigned long cache_validity;
int err = 0;
bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
@@ -970,9 +981,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
- STATX_INO | STATX_SIZE | STATX_BLOCKS |
+ STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME |
STATX_CHANGE_COOKIE;
+ if (!(fattr_valid & NFS_ATTR_FATTR_BTIME))
+ request_mask &= ~STATX_BTIME;
+
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
if (readdirplus_enabled)
nfs_readdirplus_parent_cache_hit(path->dentry);
@@ -1004,7 +1018,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
/* Is the user requesting attributes that might need revalidation? */
if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
STATX_MTIME|STATX_UID|STATX_GID|
- STATX_SIZE|STATX_BLOCKS|
+ STATX_SIZE|STATX_BLOCKS|STATX_BTIME|
STATX_CHANGE_COOKIE)))
goto out_no_revalidate;
@@ -1028,6 +1042,8 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+ if (request_mask & STATX_BTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_BTIME;
if (do_update) {
if (readdirplus_enabled)
@@ -1049,6 +1065,7 @@ out_no_revalidate:
stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
+ stat->btime = NFS_I(inode)->btime;
out:
trace_nfs_getattr_exit(inode, err);
return err;
@@ -1943,7 +1960,7 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
- NFS_INO_INVALID_NLINK;
+ NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME;
unsigned long cache_validity = NFS_I(inode)->cache_validity;
enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type;
@@ -2209,7 +2226,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
bool attr_changed = false;
bool have_delegation;
- dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
+ dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
@@ -2304,7 +2321,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_INVALID_BLOCKS
| NFS_INO_INVALID_NLINK
| NFS_INO_INVALID_MODE
- | NFS_INO_INVALID_OTHER;
+ | NFS_INO_INVALID_OTHER
+ | NFS_INO_INVALID_BTIME;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
attr_changed = true;
@@ -2338,6 +2356,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_CTIME;
+ if (fattr->valid & NFS_ATTR_FATTR_BTIME)
+ nfsi->btime = fattr->btime;
+ else if (fattr_supported & NFS_ATTR_FATTR_BTIME)
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BTIME;
+
/* Check if our cached file size is stale */
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
new_isize = nfs_size_to_loff_t(fattr->size);
@@ -2625,6 +2649,35 @@ static struct pernet_operations nfs_net_ops = {
.size = sizeof(struct nfs_net),
};
+#ifdef CONFIG_KEYS
+static struct key *nfs_keyring;
+
+static int __init nfs_init_keyring(void)
+{
+ nfs_keyring = keyring_alloc(".nfs",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+ current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ (KEY_USR_ALL & ~KEY_USR_SETATTR),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ return PTR_ERR_OR_ZERO(nfs_keyring);
+}
+
+static void nfs_exit_keyring(void)
+{
+ key_put(nfs_keyring);
+}
+#else
+static inline int nfs_init_keyring(void)
+{
+ return 0;
+}
+
+static inline void nfs_exit_keyring(void)
+{
+}
+#endif /* CONFIG_KEYS */
+
/*
* Initialize NFS
*/
@@ -2632,6 +2685,10 @@ static int __init init_nfs_fs(void)
{
int err;
+ err = nfs_init_keyring();
+ if (err)
+ return err;
+
err = nfs_sysfs_init();
if (err < 0)
goto out10;
@@ -2692,6 +2749,7 @@ out7:
out9:
nfs_sysfs_exit();
out10:
+ nfs_exit_keyring();
return err;
}
@@ -2707,6 +2765,7 @@ static void __exit exit_nfs_fs(void)
nfs_fs_proc_exit();
nfsiod_stop();
nfs_sysfs_exit();
+ nfs_exit_keyring();
}
/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 26551ff09a52..74d712b58423 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -207,7 +207,6 @@ struct nfs_mount_request {
};
extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
-extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern const struct rpc_program nfs_program;
@@ -232,7 +231,7 @@ extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
extern struct nfs_server *nfs_create_server(struct fs_context *);
-extern void nfs4_server_set_init_caps(struct nfs_server *);
+extern void nfs_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
@@ -671,9 +670,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
static inline gfp_t nfs_io_gfp_mask(void)
{
- if (current->flags & PF_WQ_WORKER)
- return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
- return GFP_KERNEL;
+ gfp_t ret = current_gfp_context(GFP_KERNEL);
+
+ /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */
+ if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL)
+ ret |= __GFP_NORETRY | __GFP_NOWARN;
+ return ret;
}
/*
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 510d0a16cfe9..bd5fca285899 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -500,14 +500,13 @@ nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
u32 *verf = (u32 *)verifier->data;
- int seq = 0;
+ unsigned int seq;
do {
- read_seqbegin_or_lock(&clp->cl_boot_lock, &seq);
+ seq = read_seqbegin(&clp->cl_boot_lock);
verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
- } while (need_seqretry(&clp->cl_boot_lock, seq));
- done_seqretry(&clp->cl_boot_lock, seq);
+ } while (read_seqretry(&clp->cl_boot_lock, seq));
}
static void
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 57c9dd700b58..db8dfb920394 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -223,74 +223,6 @@ out_mnt_err:
goto out;
}
-/**
- * nfs_umount - Notify a server that we have unmounted this export
- * @info: pointer to umount request arguments
- *
- * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
- * use UDP.
- */
-void nfs_umount(const struct nfs_mount_request *info)
-{
- static const struct rpc_timeout nfs_umnt_timeout = {
- .to_initval = 1 * HZ,
- .to_maxval = 3 * HZ,
- .to_retries = 2,
- };
- struct rpc_create_args args = {
- .net = info->net,
- .protocol = IPPROTO_UDP,
- .address = (struct sockaddr *)info->sap,
- .addrsize = info->salen,
- .timeout = &nfs_umnt_timeout,
- .servername = info->hostname,
- .program = &mnt_program,
- .version = info->version,
- .authflavor = RPC_AUTH_UNIX,
- .flags = RPC_CLNT_CREATE_NOPING,
- .cred = current_cred(),
- };
- struct rpc_message msg = {
- .rpc_argp = info->dirpath,
- };
- struct rpc_clnt *clnt;
- int status;
-
- if (strlen(info->dirpath) > MNTPATHLEN)
- return;
-
- if (info->noresvport)
- args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
-
- clnt = rpc_create(&args);
- if (IS_ERR(clnt))
- goto out_clnt_err;
-
- dprintk("NFS: sending UMNT request for %s:%s\n",
- (info->hostname ? info->hostname : "server"), info->dirpath);
-
- if (info->version == NFS_MNT3_VERSION)
- msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
- else
- msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
-
- status = rpc_call_sync(clnt, &msg, 0);
- rpc_shutdown_client(clnt);
-
- if (unlikely(status < 0))
- goto out_call_err;
-
- return;
-
-out_clnt_err:
- dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
- PTR_ERR(clnt));
- return;
-
-out_call_err:
- dprintk("NFS: UMNT request failed, status=%d\n", status);
-}
-
/*
* XDR encode/decode functions for MOUNT
*/
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d3ca91f60fc1..c34c89af9c7d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -63,7 +63,7 @@ struct nfs4_minor_version_ops {
bool (*match_stateid)(const nfs4_stateid *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
- struct nfs_fsinfo *);
+ struct nfs_fattr *);
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
@@ -296,7 +296,8 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int);
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *);
-extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fattr *, bool);
extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred);
extern int nfs4_destroy_clientid(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 162c85a83a14..6fddf43d729c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -802,6 +802,7 @@ static void nfs4_destroy_server(struct nfs_server *server)
unset_pnfs_layoutdriver(server);
nfs4_purge_state_owners(server, &freeme);
nfs4_free_state_owners(&freeme);
+ kfree(server->delegation_hash_table);
}
/*
@@ -895,55 +896,40 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
* Set up an NFS4 client
*/
static int nfs4_set_client(struct nfs_server *server,
- const char *hostname,
- const struct sockaddr_storage *addr,
- const size_t addrlen,
- const char *ip_addr,
- int proto, const struct rpc_timeout *timeparms,
- u32 minorversion, unsigned int nconnect,
- unsigned int max_connect,
- struct net *net,
- struct xprtsec_parms *xprtsec)
+ struct nfs_client_initdata *cl_init)
{
- struct nfs_client_initdata cl_init = {
- .hostname = hostname,
- .addr = addr,
- .addrlen = addrlen,
- .ip_addr = ip_addr,
- .nfs_mod = &nfs_v4,
- .proto = proto,
- .minorversion = minorversion,
- .net = net,
- .timeparms = timeparms,
- .cred = server->cred,
- .xprtsec = *xprtsec,
- };
struct nfs_client *clp;
- if (minorversion == 0)
- __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
- else
- cl_init.max_connect = max_connect;
- switch (proto) {
+ cl_init->nfs_mod = &nfs_v4;
+ cl_init->cred = server->cred;
+
+ if (cl_init->minorversion == 0) {
+ __set_bit(NFS_CS_REUSEPORT, &cl_init->init_flags);
+ cl_init->max_connect = 0;
+ }
+
+ switch (cl_init->proto) {
case XPRT_TRANSPORT_RDMA:
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_TCP_TLS:
- cl_init.nconnect = nconnect;
+ break;
+ default:
+ cl_init->nconnect = 0;
}
if (server->flags & NFS_MOUNT_NORESVPORT)
- __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_NORESVPORT, &cl_init->init_flags);
if (server->options & NFS_OPTION_MIGRATION)
- __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+ __set_bit(NFS_CS_MIGRATION, &cl_init->init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
- __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
- server->port = rpc_get_port((struct sockaddr *)addr);
+ __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init->init_flags);
+ server->port = rpc_get_port((struct sockaddr *)cl_init->addr);
if (server->flags & NFS_MOUNT_NETUNREACH_FATAL)
- __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
+ __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags);
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init);
+ clp = nfs_get_client(cl_init);
if (IS_ERR(clp))
return PTR_ERR(clp);
@@ -1088,29 +1074,15 @@ static void nfs4_session_limit_xasize(struct nfs_server *server)
#endif
}
-void nfs4_server_set_init_caps(struct nfs_server *server)
-{
- /* Set the basic capabilities */
- server->caps |= server->nfs_client->cl_mvops->init_caps;
- if (server->flags & NFS_MOUNT_NORDIRPLUS)
- server->caps &= ~NFS_CAP_READDIRPLUS;
- if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
- server->caps &= ~NFS_CAP_READ_PLUS;
-
- /*
- * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
- * authentication.
- */
- if (nfs4_disable_idmapping &&
- server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
- server->caps |= NFS_CAP_UIDGID_NOMAP;
-}
-
static int nfs4_server_common_setup(struct nfs_server *server,
struct nfs_fh *mntfh, bool auth_probe)
{
int error;
+ error = nfs4_delegation_hash_alloc(server);
+ if (error)
+ return error;
+
/* data servers support only a subset of NFSv4.1 */
if (is_ds_only_client(server->nfs_client))
return -EPROTONOSUPPORT;
@@ -1118,14 +1090,14 @@ static int nfs4_server_common_setup(struct nfs_server *server,
/* We must ensure the session is initialised first */
error = nfs4_init_session(server->nfs_client);
if (error < 0)
- goto out;
+ return error;
- nfs4_server_set_init_caps(server);
+ nfs_server_set_init_caps(server);
/* Probe the root fh to retrieve its FSID and filehandle */
error = nfs4_get_rootfh(server, mntfh, auth_probe);
if (error < 0)
- goto out;
+ return error;
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
@@ -1134,7 +1106,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
error = nfs_probe_server(server, mntfh);
if (error < 0)
- goto out;
+ return error;
nfs4_session_limit_rwsize(server);
nfs4_session_limit_xasize(server);
@@ -1145,8 +1117,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
nfs_server_insert_lists(server);
server->mount_time = jiffies;
server->destroy = nfs4_destroy_server;
-out:
- return error;
+ return 0;
}
/*
@@ -1156,6 +1127,19 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
+ struct nfs_client_initdata cl_init = {
+ .hostname = ctx->nfs_server.hostname,
+ .addr = &ctx->nfs_server._address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .ip_addr = ctx->client_address,
+ .proto = ctx->nfs_server.protocol,
+ .minorversion = ctx->minorversion,
+ .net = fc->net_ns,
+ .timeparms = &timeparms,
+ .xprtsec = ctx->xprtsec,
+ .nconnect = ctx->nfs_server.nconnect,
+ .max_connect = ctx->nfs_server.max_connect,
+ };
int error;
nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
@@ -1175,18 +1159,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- ctx->client_address,
- ctx->nfs_server.protocol,
- &timeparms,
- ctx->minorversion,
- ctx->nfs_server.nconnect,
- ctx->nfs_server.max_connect,
- fc->net_ns,
- &ctx->xprtsec);
+ error = nfs4_set_client(server, &cl_init);
if (error < 0)
return error;
@@ -1246,18 +1219,28 @@ error:
struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
- struct nfs_client *parent_client;
- struct nfs_server *server, *parent_server;
- int proto, error;
+ struct nfs_server *parent_server = NFS_SB(ctx->clone_data.sb);
+ struct nfs_client *parent_client = parent_server->nfs_client;
+ struct nfs_client_initdata cl_init = {
+ .hostname = ctx->nfs_server.hostname,
+ .addr = &ctx->nfs_server._address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .ip_addr = parent_client->cl_ipaddr,
+ .minorversion = parent_client->cl_mvops->minor_version,
+ .net = parent_client->cl_net,
+ .timeparms = parent_server->client->cl_timeout,
+ .xprtsec = parent_client->cl_xprtsec,
+ .nconnect = parent_client->cl_nconnect,
+ .max_connect = parent_client->cl_max_connect,
+ };
+ struct nfs_server *server;
bool auth_probe;
+ int error;
server = nfs_alloc_server();
if (!server)
return ERR_PTR(-ENOMEM);
- parent_server = NFS_SB(ctx->clone_data.sb);
- parent_client = parent_server->nfs_client;
-
server->cred = get_cred(parent_server->cred);
/* Initialise the client representation from the parent server */
@@ -1266,38 +1249,17 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
/* Get a client representation */
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- parent_client->cl_ipaddr,
- XPRT_TRANSPORT_RDMA,
- parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version,
- parent_client->cl_nconnect,
- parent_client->cl_max_connect,
- parent_client->cl_net,
- &parent_client->cl_xprtsec);
+ cl_init.proto = XPRT_TRANSPORT_RDMA;
+ error = nfs4_set_client(server, &cl_init);
if (!error)
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
- proto = XPRT_TRANSPORT_TCP;
+ cl_init.proto = XPRT_TRANSPORT_TCP;
if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
- proto = XPRT_TRANSPORT_TCP_TLS;
+ cl_init.proto = XPRT_TRANSPORT_TCP_TLS;
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
- error = nfs4_set_client(server,
- ctx->nfs_server.hostname,
- &ctx->nfs_server._address,
- ctx->nfs_server.addrlen,
- parent_client->cl_ipaddr,
- proto,
- parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version,
- parent_client->cl_nconnect,
- parent_client->cl_max_connect,
- parent_client->cl_net,
- &parent_client->cl_xprtsec);
+ error = nfs4_set_client(server, &cl_init);
if (error < 0)
goto error;
@@ -1353,6 +1315,19 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
char buf[INET6_ADDRSTRLEN + 1];
struct sockaddr_storage address;
struct sockaddr *localaddr = (struct sockaddr *)&address;
+ struct nfs_client_initdata cl_init = {
+ .hostname = hostname,
+ .addr = sap,
+ .addrlen = salen,
+ .ip_addr = buf,
+ .proto = clp->cl_proto,
+ .minorversion = clp->cl_minorversion,
+ .net = net,
+ .timeparms = clnt->cl_timeout,
+ .xprtsec = clp->cl_xprtsec,
+ .nconnect = clp->cl_nconnect,
+ .max_connect = clp->cl_max_connect,
+ };
int error;
error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
@@ -1368,11 +1343,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
nfs_server_remove_lists(server);
set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
- error = nfs4_set_client(server, hostname, sap, salen, buf,
- clp->cl_proto, clnt->cl_timeout,
- clp->cl_minorversion,
- clp->cl_nconnect, clp->cl_max_connect,
- net, &clp->cl_xprtsec);
+ error = nfs4_set_client(server, &cl_init);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 5c749b6117bb..1d6b5f4230c9 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -253,7 +253,6 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
struct nfs_server *server = NFS_SERVER(dst_inode);
struct inode *src_inode = file_inode(src_file);
unsigned int bs = server->clone_blksize;
- bool same_inode = false;
int ret;
/* NFS does not support deduplication. */
@@ -275,20 +274,8 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
goto out;
}
- if (src_inode == dst_inode)
- same_inode = true;
-
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
- if (same_inode) {
- inode_lock(src_inode);
- } else if (dst_inode < src_inode) {
- inode_lock_nested(dst_inode, I_MUTEX_PARENT);
- inode_lock_nested(src_inode, I_MUTEX_CHILD);
- } else {
- inode_lock_nested(src_inode, I_MUTEX_PARENT);
- inode_lock_nested(dst_inode, I_MUTEX_CHILD);
- }
-
+ lock_two_nondirectories(src_inode, dst_inode);
/* flush all pending writes on both src and dst so that server
* has the latest data */
ret = nfs_sync_inode(src_inode);
@@ -306,15 +293,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
out_unlock:
- if (same_inode) {
- inode_unlock(src_inode);
- } else if (dst_inode < src_inode) {
- inode_unlock(src_inode);
- inode_unlock(dst_inode);
- } else {
- inode_unlock(dst_inode);
- inode_unlock(src_inode);
- }
+ unlock_two_nondirectories(src_inode, dst_inode);
out:
return ret < 0 ? ret : count;
}
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 1a69479a3a59..e67ea345de69 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -12,30 +12,28 @@
int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe)
{
- struct nfs_fsinfo fsinfo;
+ struct nfs_fattr *fattr = nfs_alloc_fattr();
int ret = -ENOMEM;
- fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL)
+ if (fattr == NULL)
goto out;
/* Start by getting the root filehandle from the server */
- ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe);
+ ret = nfs4_proc_get_rootfh(server, mntfh, fattr, auth_probe);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
- || !S_ISDIR(fsinfo.fattr->mode)) {
+ if (!(fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
ret = -ENOTDIR;
goto out;
}
- memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
+ memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
out:
- nfs_free_fattr(fsinfo.fattr);
+ nfs_free_fattr(fattr);
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 341740fa293d..7d2b67e06cc3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -222,6 +222,7 @@ const u32 nfs4_fattr_bitmap[3] = {
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY
| FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -243,6 +244,7 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY,
FATTR4_WORD2_MDSTHRESHOLD
@@ -323,6 +325,9 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
if (!(cache_validity & NFS_INO_INVALID_OTHER))
dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
+ if (!(cache_validity & NFS_INO_INVALID_BTIME))
+ dst[1] &= ~FATTR4_WORD1_TIME_CREATE;
+
if (nfs_have_delegated_mtime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
@@ -1307,7 +1312,8 @@ nfs4_update_changeattr_locked(struct inode *inode,
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR;
+ NFS_INO_INVALID_MODE | NFS_INO_INVALID_BTIME |
+ NFS_INO_INVALID_XATTR;
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
}
nfsi->attrtimeo_timestamp = jiffies;
@@ -4047,6 +4053,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_CREATE))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_BTIME;
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
@@ -4082,7 +4092,7 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
};
int err;
- nfs4_server_set_init_caps(server);
+ nfs_server_set_init_caps(server);
do {
err = nfs4_handle_exception(server,
_nfs4_server_capabilities(server, fhandle),
@@ -4230,15 +4240,18 @@ out:
}
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
- u32 bitmask[3];
+ u32 bitmask[3] = {
+ [0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE |
+ FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID,
+ };
struct nfs4_lookup_root_arg args = {
.bitmask = bitmask,
};
struct nfs4_lookup_res res = {
.server = server,
- .fattr = info->fattr,
+ .fattr = fattr,
.fh = fhandle,
};
struct rpc_message msg = {
@@ -4247,27 +4260,20 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = &res,
};
- bitmask[0] = nfs4_fattr_bitmap[0];
- bitmask[1] = nfs4_fattr_bitmap[1];
- /*
- * Process the label in the upcoming getfattr
- */
- bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL;
-
- nfs_fattr_init(info->fattr);
+ nfs_fattr_init(fattr);
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_lookup_root(server, fhandle, info);
- trace_nfs4_lookup_root(server, fhandle, info->fattr, err);
+ err = _nfs4_lookup_root(server, fhandle, fattr);
+ trace_nfs4_lookup_root(server, fhandle, fattr, err);
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
@@ -4280,8 +4286,9 @@ out:
return err;
}
-static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info, rpc_authflavor_t flavor)
+static int nfs4_lookup_root_sec(struct nfs_server *server,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
{
struct rpc_auth_create_args auth_args = {
.pseudoflavor = flavor,
@@ -4291,7 +4298,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
auth = rpcauth_create(&auth_args, server->client);
if (IS_ERR(auth))
return -EACCES;
- return nfs4_lookup_root(server, fhandle, info);
+ return nfs4_lookup_root(server, fhandle, fattr);
}
/*
@@ -4304,7 +4311,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
* negative errno value.
*/
static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fattr *fattr)
{
/* Per 3530bis 15.33.5 */
static const rpc_authflavor_t flav_array[] = {
@@ -4320,8 +4327,9 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
if (server->auth_info.flavor_len > 0) {
/* try each flavor specified by user */
for (i = 0; i < server->auth_info.flavor_len; i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
- server->auth_info.flavors[i]);
+ status = nfs4_lookup_root_sec(
+ server, fhandle, fattr,
+ server->auth_info.flavors[i]);
if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
continue;
break;
@@ -4329,7 +4337,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
} else {
/* no flavors specified by user, try default list */
for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
- status = nfs4_lookup_root_sec(server, fhandle, info,
+ status = nfs4_lookup_root_sec(server, fhandle, fattr,
flav_array[i]);
if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
continue;
@@ -4353,28 +4361,22 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* nfs4_proc_get_rootfh - get file handle for server's pseudoroot
* @server: initialized nfs_server handle
* @fhandle: we fill in the pseudo-fs root file handle
- * @info: we fill in an FSINFO struct
+ * @fattr: we fill in a bare bones struct fattr
* @auth_probe: probe the auth flavours
*
* Returns zero on success, or a negative errno.
*/
int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info,
- bool auth_probe)
+ struct nfs_fattr *fattr, bool auth_probe)
{
int status = 0;
if (!auth_probe)
- status = nfs4_lookup_root(server, fhandle, info);
+ status = nfs4_lookup_root(server, fhandle, fattr);
if (auth_probe || status == NFS4ERR_WRONGSEC)
- status = server->nfs_client->cl_mvops->find_root_sec(server,
- fhandle, info);
-
- if (status == 0)
- status = nfs4_server_capabilities(server, fhandle);
- if (status == 0)
- status = nfs4_do_fsinfo(server, fhandle, info);
+ status = server->nfs_client->cl_mvops->find_root_sec(
+ server, fhandle, fattr);
return nfs4_map_errors(status);
}
@@ -5781,6 +5783,8 @@ void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+ if (cache_validity & NFS_INO_INVALID_BTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_CREATE;
if (cache_validity & NFS_INO_INVALID_SIZE)
bitmask[0] |= FATTR4_WORD0_SIZE;
@@ -10339,10 +10343,10 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
* Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
* possible) as per RFC3530bis and RFC5661 Security Considerations sections
*/
-static int
-_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info,
- struct nfs4_secinfo_flavors *flavors, bool use_integrity)
+static int _nfs41_proc_secinfo_no_name(struct nfs_server *server,
+ struct nfs_fh *fhandle,
+ struct nfs4_secinfo_flavors *flavors,
+ bool use_integrity)
{
struct nfs41_secinfo_no_name_args args = {
.style = SECINFO_STYLE_CURRENT_FH,
@@ -10386,9 +10390,9 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int
-nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+static int nfs41_proc_secinfo_no_name(struct nfs_server *server,
+ struct nfs_fh *fhandle,
+ struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -10400,7 +10404,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
/* try to use integrity protection with machine cred */
if (_nfs4_is_integrity_protected(server->nfs_client))
- err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ err = _nfs41_proc_secinfo_no_name(server, fhandle,
flavors, true);
/*
@@ -10410,7 +10414,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
* the current filesystem's rpc_client and the user cred.
*/
if (err == -NFS4ERR_WRONGSEC)
- err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ err = _nfs41_proc_secinfo_no_name(server, fhandle,
flavors, false);
switch (err) {
@@ -10426,9 +10430,8 @@ out:
return err;
}
-static int
-nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+static int nfs41_find_root_sec(struct nfs_server *server,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
int err;
struct page *page;
@@ -10444,14 +10447,14 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
}
flavors = page_address(page);
- err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+ err = nfs41_proc_secinfo_no_name(server, fhandle, flavors);
/*
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
- err = nfs4_find_root_sec(server, fhandle, info);
+ err = nfs4_find_root_sec(server, fhandle, fattr);
goto out_freepage;
}
if (err)
@@ -10476,8 +10479,8 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
flavor = RPC_AUTH_MAXFLAVOR;
if (flavor != RPC_AUTH_MAXFLAVOR) {
- err = nfs4_lookup_root_sec(server, fhandle,
- info, flavor);
+ err = nfs4_lookup_root_sec(server, fhandle, fattr,
+ flavor);
if (!err)
break;
}
@@ -10680,6 +10683,8 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
static bool nfs41_match_stateid(const nfs4_stateid *s1,
const nfs4_stateid *s2)
{
+ trace_nfs41_match_stateid(s1, s2);
+
if (s1->type != s2->type)
return false;
@@ -10697,6 +10702,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1,
static bool nfs4_match_stateid(const nfs4_stateid *s1,
const nfs4_stateid *s2)
{
+ trace_nfs4_match_stateid(s1, s2);
+
return nfs4_stateid_match(s1, s2);
}
@@ -10867,7 +10874,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2, error3, error4;
+ ssize_t error, error2, error3, error4 = 0;
size_t left = size;
error = generic_listxattr(dentry, list, left);
@@ -10895,9 +10902,11 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
left -= error3;
}
- error4 = security_inode_listsecurity(d_inode(dentry), list, left);
- if (error4 < 0)
- return error4;
+ if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
+ error4 = security_inode_listsecurity(d_inode(dentry), list, left);
+ if (error4 < 0)
+ return error4;
+ }
error += error2 + error3 + error4;
if (size && error > size)
@@ -10951,6 +10960,26 @@ static const struct inode_operations nfs4_file_inode_operations = {
.listxattr = nfs4_listxattr,
};
+static struct nfs_server *nfs4_clone_server(struct nfs_server *source,
+ struct nfs_fh *fh, struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
+{
+ struct nfs_server *server;
+ int error;
+
+ server = nfs_clone_server(source, fh, fattr, flavor);
+ if (IS_ERR(server))
+ return server;
+
+ error = nfs4_delegation_hash_alloc(server);
+ if (error) {
+ nfs_free_server(server);
+ return ERR_PTR(error);
+ }
+
+ return server;
+}
+
const struct nfs_rpc_ops nfs_v4_clientops = {
.version = 4, /* protocol version */
.dentry_ops = &nfs4_dentry_operations,
@@ -11003,7 +11032,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.init_client = nfs4_init_client,
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
- .clone_server = nfs_clone_server,
+ .clone_server = nfs4_clone_server,
.discover_trunking = nfs4_discover_trunking,
.enable_swap = nfs4_enable_swap,
.disable_swap = nfs4_disable_swap,
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index 389941ccc9c9..987c92d6364b 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -26,11 +26,13 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_ds_connect);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bl_ext_tree_prepare_commit);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err);
EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index deab4c0e21a0..9776d220cec3 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -14,6 +14,8 @@
#include <trace/misc/fs.h>
#include <trace/misc/nfs.h>
+#include "delegation.h"
+
#define show_nfs_fattr_flags(valid) \
__print_flags((unsigned long)valid, "|", \
{ NFS_ATTR_FATTR_TYPE, "TYPE" }, \
@@ -30,7 +32,8 @@
{ NFS_ATTR_FATTR_CTIME, "CTIME" }, \
{ NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \
{ NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \
- { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" })
+ { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }, \
+ { NFS_ATTR_FATTR_BTIME, "BTIME" })
DECLARE_EVENT_CLASS(nfs4_clientid_event,
TP_PROTO(
@@ -273,6 +276,32 @@ TRACE_EVENT(nfs4_cb_offload,
show_nfs_stable_how(__entry->cb_how)
)
);
+
+TRACE_EVENT(pnfs_ds_connect,
+ TP_PROTO(
+ char *ds_remotestr,
+ int status
+ ),
+
+ TP_ARGS(ds_remotestr, status),
+
+ TP_STRUCT__entry(
+ __string(ds_ips, ds_remotestr)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ds_ips);
+ __entry->status = status;
+ ),
+
+ TP_printk(
+ "ds_ips=%s, status=%d",
+ __get_str(ds_ips),
+ __entry->status
+ )
+);
+
#endif /* CONFIG_NFS_V4_1 */
TRACE_EVENT(nfs4_setup_sequence,
@@ -956,6 +985,52 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
TP_ARGS(inode, fmode))
DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation);
DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation);
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation);
+
+#define show_delegation_flags(flags) \
+ __print_flags(flags, "|", \
+ { BIT(NFS_DELEGATION_NEED_RECLAIM), "NEED_RECLAIM" }, \
+ { BIT(NFS_DELEGATION_RETURN), "RETURN" }, \
+ { BIT(NFS_DELEGATION_RETURN_IF_CLOSED), "RETURN_IF_CLOSED" }, \
+ { BIT(NFS_DELEGATION_REFERENCED), "REFERENCED" }, \
+ { BIT(NFS_DELEGATION_RETURNING), "RETURNING" }, \
+ { BIT(NFS_DELEGATION_REVOKED), "REVOKED" }, \
+ { BIT(NFS_DELEGATION_TEST_EXPIRED), "TEST_EXPIRED" }, \
+ { BIT(NFS_DELEGATION_INODE_FREEING), "INODE_FREEING" }, \
+ { BIT(NFS_DELEGATION_RETURN_DELAYED), "RETURN_DELAYED" })
+
+DECLARE_EVENT_CLASS(nfs4_delegation_event,
+ TP_PROTO(
+ const struct nfs_delegation *delegation
+ ),
+
+ TP_ARGS(delegation),
+
+ TP_STRUCT__entry(
+ __field(u32, fhandle)
+ __field(unsigned int, fmode)
+ __field(unsigned long, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(delegation->inode));
+ __entry->fmode = delegation->type;
+ __entry->flags = delegation->flags;
+ ),
+
+ TP_printk(
+ "fhandle=0x%08x fmode=%s flags=%s",
+ __entry->fhandle, show_fs_fmode_flags(__entry->fmode),
+ show_delegation_flags(__entry->flags)
+ )
+);
+#define DEFINE_NFS4_DELEGATION_EVENT(name) \
+ DEFINE_EVENT(nfs4_delegation_event, name, \
+ TP_PROTO( \
+ const struct nfs_delegation *delegation \
+ ), \
+ TP_ARGS(delegation))
+DEFINE_NFS4_DELEGATION_EVENT(nfs_delegation_need_return);
TRACE_EVENT(nfs4_delegreturn_exit,
TP_PROTO(
@@ -1449,6 +1524,63 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);
+#define show_stateid_type(type) \
+ __print_symbolic(type, \
+ { NFS4_INVALID_STATEID_TYPE, "INVALID" }, \
+ { NFS4_SPECIAL_STATEID_TYPE, "SPECIAL" }, \
+ { NFS4_OPEN_STATEID_TYPE, "OPEN" }, \
+ { NFS4_LOCK_STATEID_TYPE, "LOCK" }, \
+ { NFS4_DELEGATION_STATEID_TYPE, "DELEGATION" }, \
+ { NFS4_LAYOUT_STATEID_TYPE, "LAYOUT" }, \
+ { NFS4_PNFS_DS_STATEID_TYPE, "PNFS_DS" }, \
+ { NFS4_REVOKED_STATEID_TYPE, "REVOKED" }, \
+ { NFS4_FREED_STATEID_TYPE, "FREED" })
+
+DECLARE_EVENT_CLASS(nfs4_match_stateid_event,
+ TP_PROTO(
+ const nfs4_stateid *s1,
+ const nfs4_stateid *s2
+ ),
+
+ TP_ARGS(s1, s2),
+
+ TP_STRUCT__entry(
+ __field(int, s1_seq)
+ __field(int, s2_seq)
+ __field(u32, s1_hash)
+ __field(u32, s2_hash)
+ __field(int, s1_type)
+ __field(int, s2_type)
+ ),
+
+ TP_fast_assign(
+ __entry->s1_seq = s1->seqid;
+ __entry->s1_hash = nfs_stateid_hash(s1);
+ __entry->s1_type = s1->type;
+ __entry->s2_seq = s2->seqid;
+ __entry->s2_hash = nfs_stateid_hash(s2);
+ __entry->s2_type = s2->type;
+ ),
+
+ TP_printk(
+ "s1=%s:%x:%u s2=%s:%x:%u",
+ show_stateid_type(__entry->s1_type),
+ __entry->s1_hash, __entry->s1_seq,
+ show_stateid_type(__entry->s2_type),
+ __entry->s2_hash, __entry->s2_seq
+ )
+);
+
+#define DEFINE_NFS4_MATCH_STATEID_EVENT(name) \
+ DEFINE_EVENT(nfs4_match_stateid_event, name, \
+ TP_PROTO( \
+ const nfs4_stateid *s1, \
+ const nfs4_stateid *s2 \
+ ), \
+ TP_ARGS(s1, s2))
+DEFINE_NFS4_MATCH_STATEID_EVENT(nfs41_match_stateid);
+DEFINE_NFS4_MATCH_STATEID_EVENT(nfs4_match_stateid);
+
DECLARE_EVENT_CLASS(nfs4_idmap_event,
TP_PROTO(
const char *name,
@@ -2163,6 +2295,40 @@ TRACE_EVENT(ff_layout_commit_error,
)
);
+TRACE_EVENT(bl_ext_tree_prepare_commit,
+ TP_PROTO(
+ int ret,
+ size_t count,
+ u64 lwb,
+ bool not_all_ranges
+ ),
+
+ TP_ARGS(ret, count, lwb, not_all_ranges),
+
+ TP_STRUCT__entry(
+ __field(int, ret)
+ __field(size_t, count)
+ __field(u64, lwb)
+ __field(bool, not_all_ranges)
+ ),
+
+ TP_fast_assign(
+ __entry->ret = ret;
+ __entry->count = count;
+ __entry->lwb = lwb;
+ __entry->not_all_ranges = not_all_ranges;
+ ),
+
+ TP_printk(
+ "ret=%d, found %zu ranges, lwb=%llu%s",
+ __entry->ret,
+ __entry->count,
+ __entry->lwb,
+ __entry->not_all_ranges ? ", not all ranges encoded" :
+ ""
+ )
+);
+
DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class,
TP_PROTO(
const struct block_device *bdev,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 318afde38057..49ff98571fa5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1623,6 +1623,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_CREATE
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY;
attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
@@ -4207,6 +4208,24 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
+static int decode_attr_time_create(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
+{
+ int status = 0;
+
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_CREATE - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_TIME_CREATE)) {
+ status = decode_attr_time(xdr, time);
+ if (status == 0)
+ status = NFS_ATTR_FATTR_BTIME;
+ bitmap[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ }
+ dprintk("%s: btime=%lld\n", __func__, time->tv_sec);
+ return status;
+}
+
static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time)
{
int status = 0;
@@ -4781,6 +4800,11 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
+ status = decode_attr_time_create(xdr, bitmap, &fattr->btime);
+ if (status < 0)
+ goto xdr_error;
+ fattr->valid |= status;
+
status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
if (status < 0)
goto xdr_error;
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 7a058bd8c566..96b1323318c2 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -32,7 +32,8 @@
{ NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
{ NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
{ NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
- { NFS_INO_INVALID_MODE, "INVALID_MODE" })
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" }, \
+ { NFS_INO_INVALID_BTIME, "INVALID_BTIME" })
#define nfs_show_nfsi_flags(v) \
__print_flags(v, "|", \
@@ -56,6 +57,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event,
__field(u32, fhandle)
__field(u64, fileid)
__field(u64, version)
+ __field(unsigned long, cache_validity)
),
TP_fast_assign(
@@ -64,14 +66,17 @@ DECLARE_EVENT_CLASS(nfs_inode_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
+ __entry->cache_validity = nfsi->cache_validity;
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cache_validity=0x%lx (%s)",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- (unsigned long long)__entry->version
+ (unsigned long long)__entry->version,
+ __entry->cache_validity,
+ nfs_show_cache_validity(__entry->cache_validity)
)
);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1a7ec68bde15..a3135b5af7ee 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -306,7 +306,6 @@ void
pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode;
- unsigned long i_state;
if (!lo)
return;
@@ -317,12 +316,11 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
if (!list_empty(&lo->plh_segs))
WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
pnfs_detach_layout_hdr(lo);
- i_state = inode->i_state;
+ /* Notify pnfs_destroy_layout_final() that we're done */
+ if (inode->i_state & (I_FREEING | I_CLEAR))
+ wake_up_var_locked(lo, &inode->i_lock);
spin_unlock(&inode->i_lock);
pnfs_free_layout_hdr(lo);
- /* Notify pnfs_destroy_layout_final() that we're done */
- if (i_state & (I_FREEING | I_CLEAR))
- wake_up_var(lo);
}
}
@@ -809,23 +807,17 @@ void pnfs_destroy_layout(struct nfs_inode *nfsi)
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
-static bool pnfs_layout_removed(struct nfs_inode *nfsi,
- struct pnfs_layout_hdr *lo)
-{
- bool ret;
-
- spin_lock(&nfsi->vfs_inode.i_lock);
- ret = nfsi->layout != lo;
- spin_unlock(&nfsi->vfs_inode.i_lock);
- return ret;
-}
-
void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
+ struct inode *inode = &nfsi->vfs_inode;
- if (lo)
- wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
+ if (lo) {
+ spin_lock(&inode->i_lock);
+ wait_var_event_spinlock(lo, nfsi->layout != lo,
+ &inode->i_lock);
+ spin_unlock(&inode->i_lock);
+ }
}
static bool
@@ -3340,6 +3332,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
int status;
+ bool mark_as_dirty = false;
if (!pnfs_layoutcommit_outstanding(inode))
return 0;
@@ -3391,19 +3384,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (ld->prepare_layoutcommit) {
status = ld->prepare_layoutcommit(&data->args);
if (status) {
- put_cred(data->cred);
+ if (status != -ENOSPC)
+ put_cred(data->cred);
spin_lock(&inode->i_lock);
set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- goto out_unlock;
+ if (status != -ENOSPC)
+ goto out_unlock;
+ spin_unlock(&inode->i_lock);
+ mark_as_dirty = true;
}
}
status = nfs4_proc_layoutcommit(data, sync);
out:
- if (status)
+ if (status || mark_as_dirty)
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index b4ccdf78d4dd..7b32afb29782 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -17,6 +17,7 @@
#include "internal.h"
#include "pnfs.h"
#include "netns.h"
+#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
@@ -1007,8 +1008,10 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
err = nfs4_wait_ds_connect(ds);
if (err || ds->ds_clp)
goto out;
- if (nfs4_test_deviceid_unavailable(devid))
- return -ENODEV;
+ if (nfs4_test_deviceid_unavailable(devid)) {
+ err = -ENODEV;
+ goto out;
+ }
} while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
if (ds->ds_clp)
@@ -1038,11 +1041,12 @@ out:
if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
WARN_ON_ONCE(ds->ds_clp ||
!nfs4_test_deviceid_unavailable(devid));
- return -EINVAL;
- }
- err = nfs_client_init_status(ds->ds_clp);
+ err = -EINVAL;
+ } else
+ err = nfs_client_init_status(ds->ds_clp);
}
+ trace_pnfs_ds_connect(ds->ds_remotestr, err);
return err;
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cf1d720b8251..fa5c41d0989a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -2113,8 +2113,12 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
* that we can safely release the inode reference while holding
* the folio lock.
*/
- if (folio_test_private(src))
- return -EBUSY;
+ if (folio_test_private(src)) {
+ if (mode == MIGRATE_SYNC)
+ nfs_wb_folio(src->mapping->host, src);
+ if (folio_test_private(src))
+ return -EBUSY;
+ }
if (folio_test_private_2(src)) { /* [DEPRECATED] */
if (mode == MIGRATE_ASYNC)
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 05c7c16e37ab..dd715cdb6c04 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -177,7 +177,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
/* nfs_close_local_fh() is doing the
* close and we must wait. until it unlinks
*/
- wait_var_event_spinlock(nfl,
+ wait_var_event_spinlock(nfs_uuid,
list_first_entry_or_null(
&nfs_uuid->files,
struct nfs_file_localio,
@@ -198,8 +198,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
/* Now we can allow racing nfs_close_local_fh() to
* skip the locking.
*/
- RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
- wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ store_release_wake_up(&nfl->nfs_uuid, RCU_INITIALIZER(NULL));
}
/* Remove client from nn->local_clients */
@@ -243,15 +242,20 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients,
}
EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
-static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl)
+static int nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl)
{
+ int ret = 0;
+
/* Add nfl to nfs_uuid->files if it isn't already */
spin_lock(&nfs_uuid->lock);
- if (list_empty(&nfl->list)) {
+ if (rcu_access_pointer(nfs_uuid->net) == NULL) {
+ ret = -ENXIO;
+ } else if (list_empty(&nfl->list)) {
rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid);
list_add_tail(&nfl->list, &nfs_uuid->files);
}
spin_unlock(&nfs_uuid->lock);
+ return ret;
}
/*
@@ -285,11 +289,13 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
}
rcu_read_unlock();
/* We have an implied reference to net thanks to nfsd_net_try_get */
- localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
- cred, nfs_fh, pnf, fmode);
+ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred,
+ nfs_fh, pnf, fmode);
+ if (!IS_ERR(localio) && nfs_uuid_add_file(uuid, nfl) < 0) {
+ /* Delete the cached file when racing with nfs_uuid_put() */
+ nfs_to_nfsd_file_put_local(pnf);
+ }
nfs_to_nfsd_net_put(net);
- if (!IS_ERR(localio))
- nfs_uuid_add_file(uuid, nfl);
return localio;
}
@@ -314,7 +320,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl)
rcu_read_unlock();
return;
}
- if (list_empty(&nfs_uuid->files)) {
+ if (list_empty(&nfl->list)) {
/* nfs_uuid_put() has started closing files, wait for it
* to finished
*/
@@ -338,7 +344,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl)
*/
spin_lock(&nfs_uuid->lock);
list_del_init(&nfl->list);
- wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ wake_up_var_locked(nfs_uuid, &nfs_uuid->lock);
spin_unlock(&nfs_uuid->lock);
}
EXPORT_SYMBOL_GPL(nfs_close_local_fh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 2203438738f6..76c86f1c2b1c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 7799f4d16ce9..8c9c4825f984 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -798,6 +798,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
}
}
+ if (le16_to_cpu(el->l_next_free_rec) == 0) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %lu has empty extent list at depth %u\n",
+ inode->i_ino,
+ le16_to_cpu(el->l_tree_depth));
+ goto out;
+ }
+
found = 0;
for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
rec = &el->l_recs[i];
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 67fc62a49a76..00f52812dbb0 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2632,7 +2632,7 @@ again:
dlm_reco_master_ready(dlm),
msecs_to_jiffies(1000));
if (!dlm_reco_master_ready(dlm)) {
- mlog(0, "%s: reco master taking awhile\n",
+ mlog(0, "%s: reco master taking a while\n",
dlm->name);
goto again;
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 12e5d1f73325..14bf440ea4df 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -50,8 +50,6 @@ struct ocfs2_find_inode_args
unsigned int fi_sysfile_type;
};
-static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
-
static int ocfs2_read_locked_inode(struct inode *inode,
struct ocfs2_find_inode_args *args);
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque);
@@ -250,14 +248,77 @@ bail:
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
{
struct ocfs2_find_inode_args *args = opaque;
+#ifdef CONFIG_LOCKDEP
+ static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
ocfs2_file_ip_alloc_sem_key;
+#endif
inode->i_ino = args->fi_ino;
OCFS2_I(inode)->ip_blkno = args->fi_blkno;
- if (args->fi_sysfile_type != 0)
+#ifdef CONFIG_LOCKDEP
+ switch (args->fi_sysfile_type) {
+ case BAD_BLOCK_SYSTEM_INODE:
+ break;
+ case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GLOBAL_INODE_ALLOC_SYSTEM_INODE]);
+ break;
+ case SLOT_MAP_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[SLOT_MAP_SYSTEM_INODE]);
+ break;
+ case HEARTBEAT_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[HEARTBEAT_SYSTEM_INODE]);
+ break;
+ case GLOBAL_BITMAP_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GLOBAL_BITMAP_SYSTEM_INODE]);
+ break;
+ case USER_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]);
+ break;
+ case GROUP_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GROUP_QUOTA_SYSTEM_INODE]);
+ break;
+ case ORPHAN_DIR_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]);
+ break;
+ case EXTENT_ALLOC_SYSTEM_INODE:
lockdep_set_class(&inode->i_rwsem,
- &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+ &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]);
+ break;
+ case INODE_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[INODE_ALLOC_SYSTEM_INODE]);
+ break;
+ case JOURNAL_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[JOURNAL_SYSTEM_INODE]);
+ break;
+ case LOCAL_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_ALLOC_SYSTEM_INODE]);
+ break;
+ case TRUNCATE_LOG_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[TRUNCATE_LOG_SYSTEM_INODE]);
+ break;
+ case LOCAL_USER_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_USER_QUOTA_SYSTEM_INODE]);
+ break;
+ case LOCAL_GROUP_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_GROUP_QUOTA_SYSTEM_INODE]);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown sysfile type %d\n", args->fi_sysfile_type);
+ }
if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
@@ -267,6 +328,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
else
lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
&ocfs2_file_ip_alloc_sem_key);
+#endif
return 0;
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 369c7d27befd..cbe2f8ed8897 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
*/
credits += OCFS2_INODE_UPDATE_CREDITS + 1;
+ inode_lock(tl_inode);
+
/*
* ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
* logic, while we still need to lock the global_bitmap.
@@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
if (!gb_inode) {
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
ret = -EIO;
- goto out;
+ goto out_unlock_tl_inode;
}
inode_lock(gb_inode);
@@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
mlog_errno(ret);
- goto out_unlock_gb_mutex;
+ goto out_unlock_gb_inode;
}
- inode_lock(tl_inode);
-
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
- goto out_unlock_tl_inode;
+ goto out_unlock;
}
new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -703,15 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
out_commit:
ocfs2_commit_trans(osb, handle);
brelse(gd_bh);
-
-out_unlock_tl_inode:
- inode_unlock(tl_inode);
-
+out_unlock:
ocfs2_inode_unlock(gb_inode, 1);
-out_unlock_gb_mutex:
+out_unlock_gb_inode:
inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
+out_unlock_tl_inode:
+ inode_unlock(tl_inode);
out:
if (context->meta_ac) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 99278c8f0e24..c90b254da75e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -142,6 +142,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
bail_add:
ret = d_splice_alias(inode, dentry);
+ if (IS_ERR(ret))
+ goto bail_unlock;
if (inode) {
/*
@@ -154,15 +156,16 @@ bail_add:
* NOTE: This dentry already has ->d_op set from
* ocfs2_get_parent() and ocfs2_get_dentry()
*/
- if (!IS_ERR_OR_NULL(ret))
+ if (ret)
dentry = ret;
status = ocfs2_dentry_attach_lock(dentry, inode,
OCFS2_I(dir)->ip_blkno);
if (status) {
mlog_errno(status);
+ if (ret)
+ dput(ret);
ret = ERR_PTR(status);
- goto bail_unlock;
}
} else
ocfs2_dentry_attach_gen(dentry);
@@ -1452,8 +1455,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
trace_ocfs2_rename_over_existing(
- (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
- (unsigned long long)newfe_bh->b_blocknr : 0ULL);
+ (unsigned long long)newfe_blkno, newfe_bh,
+ (unsigned long long)newfe_bh->b_blocknr);
if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 77edcd70f72c..0f045e45fa0c 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -360,7 +360,6 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
struct ocfs2_control_message_setn *msg)
{
long nodenum;
- char *ptr = NULL;
struct ocfs2_control_private *p = file->private_data;
if (ocfs2_control_get_handshake_state(file) !=
@@ -375,8 +374,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
return -EINVAL;
msg->space = msg->newline = '\0';
- nodenum = simple_strtol(msg->nodestr, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->nodestr, 16, &nodenum))
return -EINVAL;
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
@@ -391,7 +389,6 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
struct ocfs2_control_message_setv *msg)
{
long major, minor;
- char *ptr = NULL;
struct ocfs2_control_private *p = file->private_data;
struct ocfs2_protocol_version *max =
&ocfs2_user_plugin.sp_max_proto;
@@ -409,11 +406,9 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
return -EINVAL;
msg->space1 = msg->space2 = msg->newline = '\0';
- major = simple_strtol(msg->major, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->major, 16, &major))
return -EINVAL;
- minor = simple_strtol(msg->minor, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->minor, 16, &minor))
return -EINVAL;
/*
@@ -441,7 +436,6 @@ static int ocfs2_control_do_down_msg(struct file *file,
struct ocfs2_control_message_down *msg)
{
long nodenum;
- char *p = NULL;
if (ocfs2_control_get_handshake_state(file) !=
OCFS2_CONTROL_HANDSHAKE_VALID)
@@ -456,8 +450,7 @@ static int ocfs2_control_do_down_msg(struct file *file,
return -EINVAL;
msg->space1 = msg->space2 = msg->newline = '\0';
- nodenum = simple_strtol(msg->nodestr, &p, 16);
- if (!p || *p)
+ if (kstrtol(msg->nodestr, 16, &nodenum))
return -EINVAL;
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 10d01eb09c43..f188bd900eb2 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1490,10 +1490,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
return -EINVAL;
dump = vzalloc(sizeof(*dump));
- if (!dump) {
- ret = -ENOMEM;
- goto out_err;
- }
+ if (!dump)
+ return -ENOMEM;
/* Keep size of the buffer page aligned so that it can be mmaped */
data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
@@ -1519,22 +1517,19 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
dump->size = data_size;
/* Add the dump to driver sysfs list and update the elfcore hdr */
- mutex_lock(&vmcore_mutex);
- if (vmcore_opened)
- pr_warn_once("Unexpected adding of device dump\n");
- if (vmcore_open) {
- ret = -EBUSY;
- goto unlock;
- }
+ scoped_guard(mutex, &vmcore_mutex) {
+ if (vmcore_opened)
+ pr_warn_once("Unexpected adding of device dump\n");
+ if (vmcore_open) {
+ ret = -EBUSY;
+ goto out_err;
+ }
- list_add_tail(&dump->list, &vmcoredd_list);
- vmcoredd_update_size(data_size);
- mutex_unlock(&vmcore_mutex);
+ list_add_tail(&dump->list, &vmcoredd_list);
+ vmcoredd_update_size(data_size);
+ }
return 0;
-unlock:
- mutex_unlock(&vmcore_mutex);
-
out_err:
vfree(buf);
vfree(dump);
diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile
index 22023e30915b..4c97b31a25c2 100644
--- a/fs/smb/client/Makefile
+++ b/fs/smb/client/Makefile
@@ -32,6 +32,6 @@ cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o
-cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o
+cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o cifstransport.o
cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index f1cea365b6f1..beb4f18f05ef 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -60,7 +60,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
return;
cifs_dbg(VFS, "Dump pending requests:\n");
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n",
mid_entry->mid_state,
@@ -83,7 +83,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
mid_entry->resp_buf, 62);
}
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
#endif /* CONFIG_CIFS_DEBUG2 */
}
@@ -412,6 +412,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
#ifdef CONFIG_CIFS_SMB_DIRECT
+ struct smbdirect_socket *sc;
struct smbdirect_socket_parameters *sp;
#endif
@@ -436,7 +437,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_printf(m, "\nSMBDirect transport not available");
goto skip_rdma;
}
- sp = &server->smbd_conn->socket.parameters;
+ sc = &server->smbd_conn->socket;
+ sp = &sc->parameters;
seq_printf(m, "\nSMBDirect (in hex) protocol version: %x "
"transport status: %x",
@@ -465,15 +467,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_printf(m, "\nRead Queue count_reassembly_queue: %x "
"count_enqueue_reassembly_queue: %x "
"count_dequeue_reassembly_queue: %x "
- "fragment_reassembly_remaining: %x "
"reassembly_data_length: %x "
"reassembly_queue_length: %x",
server->smbd_conn->count_reassembly_queue,
server->smbd_conn->count_enqueue_reassembly_queue,
server->smbd_conn->count_dequeue_reassembly_queue,
- server->smbd_conn->fragment_reassembly_remaining,
- server->smbd_conn->reassembly_data_length,
- server->smbd_conn->reassembly_queue_length);
+ sc->recv_io.reassembly.data_length,
+ sc->recv_io.reassembly.queue_length);
seq_printf(m, "\nCurrent Credits send_credits: %x "
"receive_credits: %x receive_credit_target: %x",
atomic_read(&server->smbd_conn->send_credits),
@@ -481,10 +481,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
server->smbd_conn->receive_credit_target);
seq_printf(m, "\nPending send_pending: %x ",
atomic_read(&server->smbd_conn->send_pending));
- seq_printf(m, "\nReceive buffers count_receive_queue: %x "
- "count_empty_packet_queue: %x",
- server->smbd_conn->count_receive_queue,
- server->smbd_conn->count_empty_packet_queue);
+ seq_printf(m, "\nReceive buffers count_receive_queue: %x ",
+ server->smbd_conn->count_receive_queue);
seq_printf(m, "\nMR responder_resources: %x "
"max_frmr_depth: %x mr_type: %x",
server->smbd_conn->responder_resources,
@@ -672,7 +670,7 @@ skip_rdma:
seq_printf(m, "\n\tServer ConnectionId: 0x%llx",
chan_server->conn_id);
- spin_lock(&chan_server->mid_lock);
+ spin_lock(&chan_server->mid_queue_lock);
list_for_each_entry(mid_entry, &chan_server->pending_mid_q, qhead) {
seq_printf(m, "\n\t\tState: %d com: %d pid: %d cbdata: %p mid %llu",
mid_entry->mid_state,
@@ -681,7 +679,7 @@ skip_rdma:
mid_entry->callback_data,
mid_entry->mid);
}
- spin_unlock(&chan_server->mid_lock);
+ spin_unlock(&chan_server->mid_queue_lock);
}
spin_unlock(&ses->chan_lock);
seq_puts(m, "\n--\n");
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 0fdadd668a81..3bd85ab2deb1 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -77,7 +77,7 @@ unsigned int global_secflags = CIFSSEC_DEF;
unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */
unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */
unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */
-spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */
+DEFINE_SPINLOCK(GlobalMid_Lock); /* protects above & list operations on midQ entries */
/*
* Global counters, updated atomically
@@ -97,7 +97,7 @@ atomic_t total_buf_alloc_count;
atomic_t total_small_buf_alloc_count;
#endif/* STATS2 */
struct list_head cifs_tcp_ses_list;
-spinlock_t cifs_tcp_ses_lock;
+DEFINE_SPINLOCK(cifs_tcp_ses_lock);
static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
module_param(CIFSMaxBufSize, uint, 0444);
@@ -723,7 +723,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
else
seq_puts(s, ",nativesocket");
seq_show_option(s, "symlink",
- cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb)));
+ cifs_symlink_type_str(cifs_symlink_type(cifs_sb)));
seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize);
seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize);
@@ -1863,8 +1863,6 @@ init_cifs(void)
GlobalCurrentXid = 0;
GlobalTotalActiveXid = 0;
GlobalMaxActiveXid = 0;
- spin_lock_init(&cifs_tcp_ses_lock);
- spin_lock_init(&GlobalMid_Lock);
cifs_lock_secret = get_random_u32();
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 19dd901fe8ab..e6830ab3a546 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -732,7 +732,8 @@ struct TCP_Server_Info {
#endif
wait_queue_head_t response_q;
wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
- spinlock_t mid_lock; /* protect mid queue and it's entries */
+ spinlock_t mid_queue_lock; /* protect mid queue */
+ spinlock_t mid_counter_lock;
struct list_head pending_mid_q;
bool noblocksnd; /* use blocking sendmsg */
bool noautotune; /* do not autotune send buf sizes */
@@ -770,7 +771,7 @@ struct TCP_Server_Info {
/* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
unsigned int capabilities; /* selective disabling of caps by smb sess */
int timeAdj; /* Adjust for difference in server time zone in sec */
- __u64 CurrentMid; /* multiplex id - rotating counter, protected by GlobalMid_Lock */
+ __u64 current_mid; /* multiplex id - rotating counter, protected by mid_counter_lock */
char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -1729,9 +1730,10 @@ struct mid_q_entry {
unsigned int resp_buf_size;
int mid_state; /* wish this were enum but can not pass to wait_event */
int mid_rc; /* rc for MID_RC */
- unsigned int mid_flags;
__le16 command; /* smb command code */
unsigned int optype; /* operation type */
+ bool wait_cancelled:1; /* Cancelled while waiting for response */
+ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
@@ -1893,10 +1895,6 @@ static inline bool is_replayable_error(int error)
#define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */
#define MID_RC 0x80 /* mid_rc contains custom rc */
-/* Flags */
-#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */
-#define MID_DELETED 2 /* Mid has been dequeued/deleted */
-
/* Types of response buffer returned from SendReceive2 */
#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
#define CIFS_SMALL_BUFFER 1
@@ -2007,9 +2005,9 @@ require use of the stronger protocol */
* GlobalCurrentXid
* GlobalTotalActiveXid
* TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change)
- * TCP_Server_Info->mid_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session
- * ->CurrentMid
- * (any changes in mid_q_entry fields)
+ * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session
+ * mid_q_entry->deleted_from_q
+ * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session
* TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session
* ->credits
* ->echo_credits
@@ -2377,4 +2375,9 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen)
return ret;
}
+#define CIFS_REPARSE_SUPPORT(tcon) \
+ ((tcon)->posix_extensions || \
+ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \
+ FILE_SUPPORTS_REPARSE_POINTS))
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 40ec0634377f..c34c533b2efa 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -116,16 +116,31 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
int * /* bytes returned */ , const int);
extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
char *in_buf, int flags);
+int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server);
extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *,
struct TCP_Server_Info *,
struct smb_rqst *);
extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
struct smb_rqst *);
+int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst);
extern int cifs_check_receive(struct mid_q_entry *mid,
struct TCP_Server_Info *server, bool log_error);
+int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
+ unsigned int *instance);
extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
size_t size, size_t *num,
struct cifs_credits *credits);
+
+static inline int
+send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+ struct mid_q_entry *mid)
+{
+ return server->ops->send_cancel ?
+ server->ops->send_cancel(server, rqst, mid) : 0;
+}
+
+int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ);
extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
struct kvec *, int /* nvec to send */,
int * /* type of buf returned */, const int flags,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6c890db06593..d20766f664c4 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -2751,7 +2751,7 @@ int cifs_query_reparse_point(const unsigned int xid,
if (cap_unix(tcon->ses))
return -EOPNOTSUPP;
- if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS))
+ if (!CIFS_REPARSE_SUPPORT(tcon))
return -EOPNOTSUPP;
oparms = (struct cifs_open_parms) {
@@ -2879,7 +2879,7 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data,
* attempt to create reparse point. This will prevent creating unusable
* empty object on the server.
*/
- if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS))
+ if (!CIFS_REPARSE_SUPPORT(tcon))
return ERR_PTR(-EOPNOTSUPP);
#ifndef CONFIG_CIFS_XATTR
diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c
new file mode 100644
index 000000000000..352dafb888dd
--- /dev/null
+++ b/fs/smb/client/cifstransport.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ *
+ * Copyright (C) International Business Machines Corp., 2002,2008
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Jeremy Allison (jra@samba.org) 2006.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/gfp.h>
+#include <linux/wait.h>
+#include <linux/net.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/tcp.h>
+#include <linux/bvec.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+#include <linux/processor.h>
+#include <linux/mempool.h>
+#include <linux/sched/signal.h>
+#include <linux/task_io_accounting_ops.h>
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+#include "smb2proto.h"
+#include "smbdirect.h"
+#include "compress.h"
+
+/* Max number of iovectors we can use off the stack when sending requests. */
+#define CIFS_MAX_IOV_SIZE 8
+
+static struct mid_q_entry *
+alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
+{
+ struct mid_q_entry *temp;
+
+ if (server == NULL) {
+ cifs_dbg(VFS, "%s: null TCP session\n", __func__);
+ return NULL;
+ }
+
+ temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
+ memset(temp, 0, sizeof(struct mid_q_entry));
+ kref_init(&temp->refcount);
+ temp->mid = get_mid(smb_buffer);
+ temp->pid = current->pid;
+ temp->command = cpu_to_le16(smb_buffer->Command);
+ cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
+ /* easier to use jiffies */
+ /* when mid allocated can be before when sent */
+ temp->when_alloc = jiffies;
+ temp->server = server;
+
+ /*
+ * The default is for the mid to be synchronous, so the
+ * default callback just wakes up the current task.
+ */
+ get_task_struct(current);
+ temp->creator = current;
+ temp->callback = cifs_wake_up_task;
+ temp->callback_data = current;
+
+ atomic_inc(&mid_count);
+ temp->mid_state = MID_REQUEST_ALLOCATED;
+ return temp;
+}
+
+int
+smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
+ unsigned int smb_buf_length)
+{
+ struct kvec iov[2];
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
+
+ iov[0].iov_base = smb_buffer;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)smb_buffer + 4;
+ iov[1].iov_len = smb_buf_length;
+
+ return __smb_send_rqst(server, 1, &rqst);
+}
+
+static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
+ struct mid_q_entry **ppmidQ)
+{
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_NEW) {
+ if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
+ (in_buf->Command != SMB_COM_NEGOTIATE)) {
+ spin_unlock(&ses->ses_lock);
+ return -EAGAIN;
+ }
+ /* else ok - we are setting up session */
+ }
+
+ if (ses->ses_status == SES_EXITING) {
+ /* check if SMB session is bad because we are setting it up */
+ if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
+ spin_unlock(&ses->ses_lock);
+ return -EAGAIN;
+ }
+ /* else ok - we are shutting down session */
+ }
+ spin_unlock(&ses->ses_lock);
+
+ *ppmidQ = alloc_mid(in_buf, ses->server);
+ if (*ppmidQ == NULL)
+ return -ENOMEM;
+ spin_lock(&ses->server->mid_queue_lock);
+ list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q);
+ spin_unlock(&ses->server->mid_queue_lock);
+ return 0;
+}
+
+struct mid_q_entry *
+cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+ int rc;
+ struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
+ struct mid_q_entry *mid;
+
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return ERR_PTR(-EIO);
+
+ /* enable signing if server requires it */
+ if (server->sign)
+ hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
+
+ mid = alloc_mid(hdr, server);
+ if (mid == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ rc = cifs_sign_rqst(rqst, server, &mid->sequence_number);
+ if (rc) {
+ release_mid(mid);
+ return ERR_PTR(rc);
+ }
+
+ return mid;
+}
+
+/*
+ *
+ * Send an SMB Request. No response info (other than return code)
+ * needs to be parsed.
+ *
+ * flags indicate the type of request buffer and how long to wait
+ * and whether to log NT STATUS code (error) before mapping it to POSIX error
+ *
+ */
+int
+SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
+ char *in_buf, int flags)
+{
+ int rc;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int resp_buf_type;
+
+ iov[0].iov_base = in_buf;
+ iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
+ flags |= CIFS_NO_RSP_BUF;
+ rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
+ cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc);
+
+ return rc;
+}
+
+int
+cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
+ bool log_error)
+{
+ unsigned int len = get_rfc1002_length(mid->resp_buf) + 4;
+
+ dump_smb(mid->resp_buf, min_t(u32, 92, len));
+
+ /* convert the length into a more usable form */
+ if (server->sign) {
+ struct kvec iov[2];
+ int rc = 0;
+ struct smb_rqst rqst = { .rq_iov = iov,
+ .rq_nvec = 2 };
+
+ iov[0].iov_base = mid->resp_buf;
+ iov[0].iov_len = 4;
+ iov[1].iov_base = (char *)mid->resp_buf + 4;
+ iov[1].iov_len = len - 4;
+ /* FIXME: add code to kill session */
+ rc = cifs_verify_signature(&rqst, server,
+ mid->sequence_number);
+ if (rc)
+ cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n",
+ rc);
+ }
+
+ /* BB special case reconnect tid and uid here? */
+ return map_and_check_smb_error(mid, log_error);
+}
+
+struct mid_q_entry *
+cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored,
+ struct smb_rqst *rqst)
+{
+ int rc;
+ struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
+ struct mid_q_entry *mid;
+
+ if (rqst->rq_iov[0].iov_len != 4 ||
+ rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
+ return ERR_PTR(-EIO);
+
+ rc = allocate_mid(ses, hdr, &mid);
+ if (rc)
+ return ERR_PTR(rc);
+ rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number);
+ if (rc) {
+ delete_mid(mid);
+ return ERR_PTR(rc);
+ }
+ return mid;
+}
+
+int
+SendReceive2(const unsigned int xid, struct cifs_ses *ses,
+ struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
+ const int flags, struct kvec *resp_iov)
+{
+ struct smb_rqst rqst;
+ struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
+ int rc;
+
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
+ new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec),
+ GFP_KERNEL);
+ if (!new_iov) {
+ /* otherwise cifs_send_recv below sets resp_buf_type */
+ *resp_buf_type = CIFS_NO_BUFFER;
+ return -ENOMEM;
+ }
+ } else
+ new_iov = s_iov;
+
+ /* 1st iov is a RFC1001 length followed by the rest of the packet */
+ memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
+
+ new_iov[0].iov_base = new_iov[1].iov_base;
+ new_iov[0].iov_len = 4;
+ new_iov[1].iov_base += 4;
+ new_iov[1].iov_len -= 4;
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = new_iov;
+ rqst.rq_nvec = n_vec + 1;
+
+ rc = cifs_send_recv(xid, ses, ses->server,
+ &rqst, resp_buf_type, flags, resp_iov);
+ if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
+ kfree(new_iov);
+ return rc;
+}
+
+int
+SendReceive(const unsigned int xid, struct cifs_ses *ses,
+ struct smb_hdr *in_buf, struct smb_hdr *out_buf,
+ int *pbytes_returned, const int flags)
+{
+ int rc = 0;
+ struct mid_q_entry *midQ;
+ unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
+ struct kvec iov = { .iov_base = in_buf, .iov_len = len };
+ struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
+ struct cifs_credits credits = { .value = 1, .instance = 0 };
+ struct TCP_Server_Info *server;
+
+ if (ses == NULL) {
+ cifs_dbg(VFS, "Null smb session\n");
+ return -EIO;
+ }
+ server = ses->server;
+ if (server == NULL) {
+ cifs_dbg(VFS, "Null tcp session\n");
+ return -EIO;
+ }
+
+ spin_lock(&server->srv_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&server->srv_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&server->srv_lock);
+
+ /* Ensure that we do not send more than 50 overlapping requests
+ to the same server. We may make this configurable later or
+ use ses->maxReq */
+
+ if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+ cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
+ len);
+ return -EIO;
+ }
+
+ rc = wait_for_free_request(server, flags, &credits.instance);
+ if (rc)
+ return rc;
+
+ /* make sure that we sign in the same order that we send on this socket
+ and avoid races inside tcp sendmsg code that could cause corruption
+ of smb data */
+
+ cifs_server_lock(server);
+
+ rc = allocate_mid(ses, in_buf, &midQ);
+ if (rc) {
+ cifs_server_unlock(server);
+ /* Update # of requests on wire to server */
+ add_credits(server, &credits, 0);
+ return rc;
+ }
+
+ rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
+ if (rc) {
+ cifs_server_unlock(server);
+ goto out;
+ }
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+
+ rc = smb_send(server, in_buf, len);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+ server->sequence_number -= 2;
+
+ cifs_server_unlock(server);
+
+ if (rc < 0)
+ goto out;
+
+ rc = wait_for_response(server, midQ);
+ if (rc != 0) {
+ send_cancel(server, &rqst, midQ);
+ spin_lock(&server->mid_queue_lock);
+ if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
+ midQ->mid_state == MID_RESPONSE_RECEIVED) {
+ /* no longer considered to be "in-flight" */
+ midQ->callback = release_mid;
+ spin_unlock(&server->mid_queue_lock);
+ add_credits(server, &credits, 0);
+ return rc;
+ }
+ spin_unlock(&server->mid_queue_lock);
+ }
+
+ rc = cifs_sync_mid_result(midQ, server);
+ if (rc != 0) {
+ add_credits(server, &credits, 0);
+ return rc;
+ }
+
+ if (!midQ->resp_buf || !out_buf ||
+ midQ->mid_state != MID_RESPONSE_READY) {
+ rc = -EIO;
+ cifs_server_dbg(VFS, "Bad MID state?\n");
+ goto out;
+ }
+
+ *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
+ memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
+ rc = cifs_check_receive(midQ, server, 0);
+out:
+ delete_mid(midQ);
+ add_credits(server, &credits, 0);
+
+ return rc;
+}
+
+/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows
+ blocking lock to return. */
+
+static int
+send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
+ struct smb_hdr *in_buf,
+ struct smb_hdr *out_buf)
+{
+ int bytes_returned;
+ struct cifs_ses *ses = tcon->ses;
+ LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
+
+ /* We just modify the current in_buf to change
+ the type of lock from LOCKING_ANDX_SHARED_LOCK
+ or LOCKING_ANDX_EXCLUSIVE_LOCK to
+ LOCKING_ANDX_CANCEL_LOCK. */
+
+ pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
+ pSMB->Timeout = 0;
+ pSMB->hdr.Mid = get_next_mid(ses->server);
+
+ return SendReceive(xid, ses, in_buf, out_buf,
+ &bytes_returned, 0);
+}
+
+int
+SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
+ struct smb_hdr *in_buf, struct smb_hdr *out_buf,
+ int *pbytes_returned)
+{
+ int rc = 0;
+ int rstart = 0;
+ struct mid_q_entry *midQ;
+ struct cifs_ses *ses;
+ unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
+ struct kvec iov = { .iov_base = in_buf, .iov_len = len };
+ struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
+ unsigned int instance;
+ struct TCP_Server_Info *server;
+
+ if (tcon == NULL || tcon->ses == NULL) {
+ cifs_dbg(VFS, "Null smb session\n");
+ return -EIO;
+ }
+ ses = tcon->ses;
+ server = ses->server;
+
+ if (server == NULL) {
+ cifs_dbg(VFS, "Null tcp session\n");
+ return -EIO;
+ }
+
+ spin_lock(&server->srv_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&server->srv_lock);
+ return -ENOENT;
+ }
+ spin_unlock(&server->srv_lock);
+
+ /* Ensure that we do not send more than 50 overlapping requests
+ to the same server. We may make this configurable later or
+ use ses->maxReq */
+
+ if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+ cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
+ len);
+ return -EIO;
+ }
+
+ rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance);
+ if (rc)
+ return rc;
+
+ /* make sure that we sign in the same order that we send on this socket
+ and avoid races inside tcp sendmsg code that could cause corruption
+ of smb data */
+
+ cifs_server_lock(server);
+
+ rc = allocate_mid(ses, in_buf, &midQ);
+ if (rc) {
+ cifs_server_unlock(server);
+ return rc;
+ }
+
+ rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
+ if (rc) {
+ delete_mid(midQ);
+ cifs_server_unlock(server);
+ return rc;
+ }
+
+ midQ->mid_state = MID_REQUEST_SUBMITTED;
+ rc = smb_send(server, in_buf, len);
+ cifs_save_when_sent(midQ);
+
+ if (rc < 0)
+ server->sequence_number -= 2;
+
+ cifs_server_unlock(server);
+
+ if (rc < 0) {
+ delete_mid(midQ);
+ return rc;
+ }
+
+ /* Wait for a reply - allow signals to interrupt. */
+ rc = wait_event_interruptible(server->response_q,
+ (!(midQ->mid_state == MID_REQUEST_SUBMITTED ||
+ midQ->mid_state == MID_RESPONSE_RECEIVED)) ||
+ ((server->tcpStatus != CifsGood) &&
+ (server->tcpStatus != CifsNew)));
+
+ /* Were we interrupted by a signal ? */
+ spin_lock(&server->srv_lock);
+ if ((rc == -ERESTARTSYS) &&
+ (midQ->mid_state == MID_REQUEST_SUBMITTED ||
+ midQ->mid_state == MID_RESPONSE_RECEIVED) &&
+ ((server->tcpStatus == CifsGood) ||
+ (server->tcpStatus == CifsNew))) {
+ spin_unlock(&server->srv_lock);
+
+ if (in_buf->Command == SMB_COM_TRANSACTION2) {
+ /* POSIX lock. We send a NT_CANCEL SMB to cause the
+ blocking lock to return. */
+ rc = send_cancel(server, &rqst, midQ);
+ if (rc) {
+ delete_mid(midQ);
+ return rc;
+ }
+ } else {
+ /* Windows lock. We send a LOCKINGX_CANCEL_LOCK
+ to cause the blocking lock to return. */
+
+ rc = send_lock_cancel(xid, tcon, in_buf, out_buf);
+
+ /* If we get -ENOLCK back the lock may have
+ already been removed. Don't exit in this case. */
+ if (rc && rc != -ENOLCK) {
+ delete_mid(midQ);
+ return rc;
+ }
+ }
+
+ rc = wait_for_response(server, midQ);
+ if (rc) {
+ send_cancel(server, &rqst, midQ);
+ spin_lock(&server->mid_queue_lock);
+ if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
+ midQ->mid_state == MID_RESPONSE_RECEIVED) {
+ /* no longer considered to be "in-flight" */
+ midQ->callback = release_mid;
+ spin_unlock(&server->mid_queue_lock);
+ return rc;
+ }
+ spin_unlock(&server->mid_queue_lock);
+ }
+
+ /* We got the response - restart system call. */
+ rstart = 1;
+ spin_lock(&server->srv_lock);
+ }
+ spin_unlock(&server->srv_lock);
+
+ rc = cifs_sync_mid_result(midQ, server);
+ if (rc != 0)
+ return rc;
+
+ /* rcvd frame is ok */
+ if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) {
+ rc = -EIO;
+ cifs_tcon_dbg(VFS, "Bad MID state?\n");
+ goto out;
+ }
+
+ *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
+ memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
+ rc = cifs_check_receive(midQ, server, 0);
+out:
+ delete_mid(midQ);
+ if (rstart && rc == -EACCES)
+ return -ERESTARTSYS;
+ return rc;
+}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 5eec8957f2a9..587845a2452d 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -321,15 +321,15 @@ cifs_abort_connection(struct TCP_Server_Info *server)
/* mark submitted MIDs for retry and issue callback */
INIT_LIST_HEAD(&retry_list);
cifs_dbg(FYI, "%s: moving mids to private list\n", __func__);
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) {
kref_get(&mid->refcount);
if (mid->mid_state == MID_REQUEST_SUBMITTED)
mid->mid_state = MID_RETRY_NEEDED;
list_move(&mid->qhead, &retry_list);
- mid->mid_flags |= MID_DELETED;
+ mid->deleted_from_q = true;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
cifs_server_unlock(server);
cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
@@ -358,7 +358,7 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num
}
cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->conn_id,
+ trace_smb3_reconnect(server->current_mid, server->conn_id,
server->hostname);
server->tcpStatus = CifsNeedReconnect;
@@ -884,13 +884,13 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
* server there should be exactly one pending mid
* corresponding to SMB1/SMB2 Negotiate packet.
*/
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) {
kref_get(&mid->refcount);
list_move(&mid->qhead, &dispose_list);
- mid->mid_flags |= MID_DELETED;
+ mid->deleted_from_q = true;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
/* Now try to reconnect once with NetBIOS session. */
server->with_rfc1001 = true;
@@ -957,7 +957,7 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
#ifdef CONFIG_CIFS_STATS2
mid->when_received = jiffies;
#endif
- spin_lock(&mid->server->mid_lock);
+ spin_lock(&mid->server->mid_queue_lock);
if (!malformed)
mid->mid_state = MID_RESPONSE_RECEIVED;
else
@@ -966,13 +966,13 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
* Trying to handle/dequeue a mid after the send_recv()
* function has finished processing it is a bug.
*/
- if (mid->mid_flags & MID_DELETED) {
- spin_unlock(&mid->server->mid_lock);
+ if (mid->deleted_from_q == true) {
+ spin_unlock(&mid->server->mid_queue_lock);
pr_warn_once("trying to dequeue a deleted mid\n");
} else {
list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
- spin_unlock(&mid->server->mid_lock);
+ mid->deleted_from_q = true;
+ spin_unlock(&mid->server->mid_queue_lock);
}
}
@@ -1101,16 +1101,16 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
struct list_head *tmp, *tmp2;
LIST_HEAD(dispose_list);
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid);
kref_get(&mid_entry->refcount);
mid_entry->mid_state = MID_SHUTDOWN;
list_move(&mid_entry->qhead, &dispose_list);
- mid_entry->mid_flags |= MID_DELETED;
+ mid_entry->deleted_from_q = true;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
/* now walk dispose list and issue callbacks */
list_for_each_safe(tmp, tmp2, &dispose_list) {
@@ -1242,7 +1242,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- trace_smb3_hdr_credits(server->CurrentMid,
+ trace_smb3_hdr_credits(server->current_mid,
server->conn_id, server->hostname, scredits,
le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
@@ -1822,7 +1822,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
tcp_ses->compression.requested = ctx->compress;
spin_lock_init(&tcp_ses->req_lock);
spin_lock_init(&tcp_ses->srv_lock);
- spin_lock_init(&tcp_ses->mid_lock);
+ spin_lock_init(&tcp_ses->mid_queue_lock);
+ spin_lock_init(&tcp_ses->mid_counter_lock);
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 3f34bb07997b..072383899e81 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1652,6 +1652,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
pr_warn_once("conflicting posix mount options specified\n");
ctx->linux_ext = 1;
ctx->no_linux_ext = 0;
+ ctx->nonativesocket = 1; /* POSIX mounts use NFS style reparse points */
}
break;
case Opt_nocase:
@@ -1829,24 +1830,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
return -EINVAL;
}
-enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb)
-{
- if (cifs_sb->ctx->symlink_type == CIFS_SYMLINK_TYPE_DEFAULT) {
- if (cifs_sb->ctx->mfsymlinks)
- return CIFS_SYMLINK_TYPE_MFSYMLINKS;
- else if (cifs_sb->ctx->sfu_emul)
- return CIFS_SYMLINK_TYPE_SFU;
- else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext)
- return CIFS_SYMLINK_TYPE_UNIX;
- else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE)
- return CIFS_SYMLINK_TYPE_NATIVE;
- else
- return CIFS_SYMLINK_TYPE_NONE;
- } else {
- return cifs_sb->ctx->symlink_type;
- }
-}
-
int smb3_init_fs_context(struct fs_context *fc)
{
struct smb3_fs_context *ctx;
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 9e83302ce4b8..b0fec6b9a23b 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -341,7 +341,23 @@ struct smb3_fs_context {
extern const struct fs_parameter_spec smb3_fs_parameters[];
-extern enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb);
+static inline enum cifs_symlink_type cifs_symlink_type(struct cifs_sb_info *cifs_sb)
+{
+ bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions;
+
+ if (cifs_sb->ctx->symlink_type != CIFS_SYMLINK_TYPE_DEFAULT)
+ return cifs_sb->ctx->symlink_type;
+
+ if (cifs_sb->ctx->mfsymlinks)
+ return CIFS_SYMLINK_TYPE_MFSYMLINKS;
+ else if (cifs_sb->ctx->sfu_emul)
+ return CIFS_SYMLINK_TYPE_SFU;
+ else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext)
+ return posix ? CIFS_SYMLINK_TYPE_NATIVE : CIFS_SYMLINK_TYPE_UNIX;
+ else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE)
+ return CIFS_SYMLINK_TYPE_NATIVE;
+ return CIFS_SYMLINK_TYPE_NONE;
+}
extern int smb3_init_fs_context(struct fs_context *fc);
extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx);
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index 2ecd705e9e8c..fe80e711cd75 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -605,14 +605,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
/* BB what if DFS and this volume is on different share? BB */
rc = -EOPNOTSUPP;
- switch (get_cifs_symlink_type(cifs_sb)) {
- case CIFS_SYMLINK_TYPE_DEFAULT:
- /* should not happen, get_cifs_symlink_type() resolves the default */
- break;
-
- case CIFS_SYMLINK_TYPE_NONE:
- break;
-
+ switch (cifs_symlink_type(cifs_sb)) {
case CIFS_SYMLINK_TYPE_UNIX:
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (pTcon->unix_ext) {
@@ -642,12 +635,14 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
case CIFS_SYMLINK_TYPE_NATIVE:
case CIFS_SYMLINK_TYPE_NFS:
case CIFS_SYMLINK_TYPE_WSL:
- if (le32_to_cpu(pTcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) {
+ if (CIFS_REPARSE_SUPPORT(pTcon)) {
rc = create_reparse_symlink(xid, inode, direntry, pTcon,
full_path, symname);
goto symlink_exit;
}
break;
+ default:
+ break;
}
if (rc == 0) {
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 33c1d970747c..7869cec58f52 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -38,7 +38,7 @@ int create_reparse_symlink(const unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
const char *full_path, const char *symname)
{
- switch (get_cifs_symlink_type(CIFS_SB(inode->i_sb))) {
+ switch (cifs_symlink_type(CIFS_SB(inode->i_sb))) {
case CIFS_SYMLINK_TYPE_NATIVE:
return create_native_symlink(xid, inode, dentry, tcon, full_path, symname);
case CIFS_SYMLINK_TYPE_NFS:
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index e364b6515af3..893a1ea8c000 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -95,17 +95,17 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
struct smb_hdr *buf = (struct smb_hdr *)buffer;
struct mid_q_entry *mid;
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if (compare_mid(mid->mid, buf) &&
mid->mid_state == MID_REQUEST_SUBMITTED &&
le16_to_cpu(mid->command) == buf->Command) {
kref_get(&mid->refcount);
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return mid;
}
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return NULL;
}
@@ -169,10 +169,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
__u16 last_mid, cur_mid;
bool collision, reconnect = false;
- spin_lock(&server->mid_lock);
-
+ spin_lock(&server->mid_counter_lock);
/* mid is 16 bit only for CIFS/SMB */
- cur_mid = (__u16)((server->CurrentMid) & 0xffff);
+ cur_mid = (__u16)((server->current_mid) & 0xffff);
/* we do not want to loop forever */
last_mid = cur_mid;
cur_mid++;
@@ -198,6 +197,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
cur_mid++;
num_mids = 0;
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
++num_mids;
if (mid_entry->mid == cur_mid &&
@@ -207,6 +207,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
break;
}
}
+ spin_unlock(&server->mid_queue_lock);
/*
* if we have more than 32k mids in the list, then something
@@ -223,12 +224,12 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
if (!collision) {
mid = (__u64)cur_mid;
- server->CurrentMid = mid;
+ server->current_mid = mid;
break;
}
cur_mid++;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_counter_lock);
if (reconnect) {
cifs_signal_cifsd_for_reconnect(server, false);
@@ -1272,7 +1273,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
*/
return cifs_sfu_make_node(xid, inode, dentry, tcon,
full_path, mode, dev);
- } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) {
+ } else if (CIFS_REPARSE_SUPPORT(tcon)) {
/*
* mknod via reparse points requires server support for
* storing reparse points, which is available since
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 69d251726c02..2a0316c514e4 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1346,9 +1346,8 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data,
* attempt to create reparse point. This will prevent creating unusable
* empty object on the server.
*/
- if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS))
- if (!tcon->posix_extensions)
- return ERR_PTR(-EOPNOTSUPP);
+ if (!CIFS_REPARSE_SUPPORT(tcon))
+ return ERR_PTR(-EOPNOTSUPP);
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
SYNCHRONIZE | DELETE |
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 1b4a31894f43..ad8947434b71 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -91,7 +91,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
if (*val > 65000) {
*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
pr_warn_once("server overflowed SMB3 credits\n");
- trace_smb3_overflow_credits(server->CurrentMid,
+ trace_smb3_overflow_credits(server->current_mid,
server->conn_id, server->hostname, *val,
add, server->in_flight);
}
@@ -136,7 +136,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
wake_up(&server->request_q);
if (reconnect_detected) {
- trace_smb3_reconnect_detected(server->CurrentMid,
+ trace_smb3_reconnect_detected(server->current_mid,
server->conn_id, server->hostname, scredits, add, in_flight);
cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
@@ -144,7 +144,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
}
if (reconnect_with_invalid_credits) {
- trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+ trace_smb3_reconnect_with_invalid_credits(server->current_mid,
server->conn_id, server->hostname, scredits, add, in_flight);
cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n",
optype, scredits, add);
@@ -176,7 +176,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
break;
}
- trace_smb3_add_credits(server->CurrentMid,
+ trace_smb3_add_credits(server->current_mid,
server->conn_id, server->hostname, scredits, add, in_flight);
cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits);
}
@@ -203,7 +203,7 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val)
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
- trace_smb3_set_credits(server->CurrentMid,
+ trace_smb3_set_credits(server->current_mid,
server->conn_id, server->hostname, scredits, val, in_flight);
cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
@@ -288,7 +288,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
- trace_smb3_wait_credits(server->CurrentMid,
+ trace_smb3_wait_credits(server->current_mid,
server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
__func__, credits->value, scredits);
@@ -316,7 +316,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
server->credits, server->in_flight,
new_val - credits->value,
cifs_trace_rw_credits_no_adjust_up);
- trace_smb3_too_many_credits(server->CurrentMid,
+ trace_smb3_too_many_credits(server->current_mid,
server->conn_id, server->hostname, 0, credits->value - new_val, 0);
cifs_server_dbg(VFS, "R=%x[%x] request has less credits (%d) than required (%d)",
subreq->rreq->debug_id, subreq->subreq.debug_index,
@@ -338,7 +338,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
server->credits, server->in_flight,
new_val - credits->value,
cifs_trace_rw_credits_old_session);
- trace_smb3_reconnect_detected(server->CurrentMid,
+ trace_smb3_reconnect_detected(server->current_mid,
server->conn_id, server->hostname, scredits,
credits->value - new_val, in_flight);
cifs_server_dbg(VFS, "R=%x[%x] trying to return %d credits to old session\n",
@@ -358,7 +358,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- trace_smb3_adj_credits(server->CurrentMid,
+ trace_smb3_adj_credits(server->current_mid,
server->conn_id, server->hostname, scredits,
credits->value - new_val, in_flight);
cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
@@ -374,19 +374,19 @@ smb2_get_next_mid(struct TCP_Server_Info *server)
{
__u64 mid;
/* for SMB2 we need the current value */
- spin_lock(&server->mid_lock);
- mid = server->CurrentMid++;
- spin_unlock(&server->mid_lock);
+ spin_lock(&server->mid_counter_lock);
+ mid = server->current_mid++;
+ spin_unlock(&server->mid_counter_lock);
return mid;
}
static void
smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val)
{
- spin_lock(&server->mid_lock);
- if (server->CurrentMid >= val)
- server->CurrentMid -= val;
- spin_unlock(&server->mid_lock);
+ spin_lock(&server->mid_counter_lock);
+ if (server->current_mid >= val)
+ server->current_mid -= val;
+ spin_unlock(&server->mid_counter_lock);
}
static struct mid_q_entry *
@@ -401,7 +401,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue)
return NULL;
}
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if ((mid->mid == wire_mid) &&
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
@@ -409,13 +409,13 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue)
kref_get(&mid->refcount);
if (dequeue) {
list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ mid->deleted_from_q = true;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return mid;
}
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return NULL;
}
@@ -460,9 +460,9 @@ smb2_negotiate(const unsigned int xid,
{
int rc;
- spin_lock(&server->mid_lock);
- server->CurrentMid = 0;
- spin_unlock(&server->mid_lock);
+ spin_lock(&server->mid_counter_lock);
+ server->current_mid = 0;
+ spin_unlock(&server->mid_counter_lock);
rc = SMB2_negotiate(xid, ses, server);
return rc;
}
@@ -2498,7 +2498,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- trace_smb3_pend_credits(server->CurrentMid,
+ trace_smb3_pend_credits(server->current_mid,
server->conn_id, server->hostname, scredits,
le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
@@ -4809,18 +4809,18 @@ static void smb2_decrypt_offload(struct work_struct *work)
} else {
spin_lock(&dw->server->srv_lock);
if (dw->server->tcpStatus == CifsNeedReconnect) {
- spin_lock(&dw->server->mid_lock);
+ spin_lock(&dw->server->mid_queue_lock);
mid->mid_state = MID_RETRY_NEEDED;
- spin_unlock(&dw->server->mid_lock);
+ spin_unlock(&dw->server->mid_queue_lock);
spin_unlock(&dw->server->srv_lock);
mid->callback(mid);
} else {
- spin_lock(&dw->server->mid_lock);
+ spin_lock(&dw->server->mid_queue_lock);
mid->mid_state = MID_REQUEST_SUBMITTED;
- mid->mid_flags &= ~(MID_DELETED);
+ mid->deleted_from_q = false;
list_add_tail(&mid->qhead,
&dw->server->pending_mid_q);
- spin_unlock(&dw->server->mid_lock);
+ spin_unlock(&dw->server->mid_queue_lock);
spin_unlock(&dw->server->srv_lock);
}
}
@@ -5260,10 +5260,9 @@ static int smb2_make_node(unsigned int xid, struct inode *inode,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
rc = cifs_sfu_make_node(xid, inode, dentry, tcon,
full_path, mode, dev);
- } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)
- || (tcon->posix_extensions)) {
+ } else if (CIFS_REPARSE_SUPPORT(tcon)) {
rc = mknod_reparse(xid, inode, dentry, tcon,
- full_path, mode, dev);
+ full_path, mode, dev);
}
return rc;
}
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 475b36c27f65..ff9ef7fcd010 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -840,9 +840,9 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server,
*mid = smb2_mid_entry_alloc(shdr, server);
if (*mid == NULL)
return -ENOMEM;
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_add_tail(&(*mid)->qhead, &server->pending_mid_q);
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return 0;
}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 754e94a0e07f..c628e91c328b 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -13,27 +13,23 @@
#include "cifsproto.h"
#include "smb2proto.h"
-static struct smbd_response *get_empty_queue_buffer(
- struct smbd_connection *info);
-static struct smbd_response *get_receive_buffer(
+static struct smbdirect_recv_io *get_receive_buffer(
struct smbd_connection *info);
static void put_receive_buffer(
struct smbd_connection *info,
- struct smbd_response *response);
+ struct smbdirect_recv_io *response);
static int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
static void destroy_receive_buffers(struct smbd_connection *info);
-static void put_empty_packet(
- struct smbd_connection *info, struct smbd_response *response);
static void enqueue_reassembly(
struct smbd_connection *info,
- struct smbd_response *response, int data_length);
-static struct smbd_response *_get_first_reassembly(
+ struct smbdirect_recv_io *response, int data_length);
+static struct smbdirect_recv_io *_get_first_reassembly(
struct smbd_connection *info);
static int smbd_post_recv(
struct smbd_connection *info,
- struct smbd_response *response);
+ struct smbdirect_recv_io *response);
static int smbd_post_send_empty(struct smbd_connection *info);
@@ -182,9 +178,10 @@ static int smbd_conn_upcall(
{
struct smbd_connection *info = id->context;
struct smbdirect_socket *sc = &info->socket;
+ const char *event_name = rdma_event_msg(event->event);
- log_rdma_event(INFO, "event=%d status=%d\n",
- event->event, event->status);
+ log_rdma_event(INFO, "event=%s status=%d\n",
+ event_name, event->status);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
@@ -194,45 +191,50 @@ static int smbd_conn_upcall(
break;
case RDMA_CM_EVENT_ADDR_ERROR:
+ log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
info->ri_rc = -EHOSTUNREACH;
complete(&info->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
+ log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
info->ri_rc = -ENETUNREACH;
complete(&info->ri_done);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- log_rdma_event(INFO, "connected event=%d\n", event->event);
+ log_rdma_event(INFO, "connected event=%s\n", event_name);
sc->status = SMBDIRECT_SOCKET_CONNECTED;
- wake_up_interruptible(&info->conn_wait);
+ wake_up_interruptible(&info->status_wait);
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
- log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
+ log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
- wake_up_interruptible(&info->conn_wait);
+ wake_up_interruptible(&info->status_wait);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED:
/* This happens when we fail the negotiation */
if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) {
+ log_rdma_event(ERR, "event=%s during negotiation\n", event_name);
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
- wake_up(&info->conn_wait);
+ wake_up(&info->status_wait);
break;
}
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
- wake_up_interruptible(&info->disconn_wait);
- wake_up_interruptible(&info->wait_reassembly_queue);
+ wake_up_interruptible(&info->status_wait);
+ wake_up_interruptible(&sc->recv_io.reassembly.wait_queue);
wake_up_interruptible_all(&info->wait_send_queue);
break;
default:
+ log_rdma_event(ERR, "unexpected event=%s status=%d\n",
+ event_name, event->status);
break;
}
@@ -259,12 +261,12 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context)
}
}
-static inline void *smbd_request_payload(struct smbd_request *request)
+static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request)
{
return (void *)request->packet;
}
-static inline void *smbd_response_payload(struct smbd_response *response)
+static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response)
{
return (void *)response->packet;
}
@@ -273,32 +275,35 @@ static inline void *smbd_response_payload(struct smbd_response *response)
static void send_done(struct ib_cq *cq, struct ib_wc *wc)
{
int i;
- struct smbd_request *request =
- container_of(wc->wr_cqe, struct smbd_request, cqe);
- struct smbd_connection *info = request->info;
- struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_send_io *request =
+ container_of(wc->wr_cqe, struct smbdirect_send_io, cqe);
+ struct smbdirect_socket *sc = request->socket;
+ struct smbd_connection *info =
+ container_of(sc, struct smbd_connection, socket);
- log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n",
+ log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n",
request, wc->status);
- if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
- log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
- wc->status, wc->opcode);
- smbd_disconnect_rdma_connection(request->info);
- }
-
for (i = 0; i < request->num_sge; i++)
ib_dma_unmap_single(sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
- if (atomic_dec_and_test(&request->info->send_pending))
- wake_up(&request->info->wait_send_pending);
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
+ log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
+ wc->status, wc->opcode);
+ mempool_free(request, sc->send_io.mem.pool);
+ smbd_disconnect_rdma_connection(info);
+ return;
+ }
- wake_up(&request->info->wait_post_send);
+ if (atomic_dec_and_test(&info->send_pending))
+ wake_up(&info->wait_send_pending);
+
+ wake_up(&info->wait_post_send);
- mempool_free(request, request->info->request_mempool);
+ mempool_free(request, sc->send_io.mem.pool);
}
static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp)
@@ -317,12 +322,13 @@ static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp)
* return value: true if negotiation is a success, false if failed
*/
static bool process_negotiation_response(
- struct smbd_response *response, int packet_length)
+ struct smbdirect_recv_io *response, int packet_length)
{
- struct smbd_connection *info = response->info;
- struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket *sc = response->socket;
+ struct smbd_connection *info =
+ container_of(sc, struct smbd_connection, socket);
struct smbdirect_socket_parameters *sp = &sc->parameters;
- struct smbdirect_negotiate_resp *packet = smbd_response_payload(response);
+ struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response);
if (packet_length < sizeof(struct smbdirect_negotiate_resp)) {
log_rdma_event(ERR,
@@ -385,15 +391,15 @@ static bool process_negotiation_response(
info->max_frmr_depth * PAGE_SIZE);
info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE;
+ sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER;
return true;
}
static void smbd_post_send_credits(struct work_struct *work)
{
int ret = 0;
- int use_receive_queue = 1;
int rc;
- struct smbd_response *response;
+ struct smbdirect_recv_io *response;
struct smbd_connection *info =
container_of(work, struct smbd_connection,
post_send_credits_work);
@@ -407,20 +413,10 @@ static void smbd_post_send_credits(struct work_struct *work)
if (info->receive_credit_target >
atomic_read(&info->receive_credits)) {
while (true) {
- if (use_receive_queue)
- response = get_receive_buffer(info);
- else
- response = get_empty_queue_buffer(info);
- if (!response) {
- /* now switch to empty packet queue */
- if (use_receive_queue) {
- use_receive_queue = 0;
- continue;
- } else
- break;
- }
+ response = get_receive_buffer(info);
+ if (!response)
+ break;
- response->type = SMBD_TRANSFER_DATA;
response->first_segment = false;
rc = smbd_post_recv(info, response);
if (rc) {
@@ -454,19 +450,20 @@ static void smbd_post_send_credits(struct work_struct *work)
static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct smbdirect_data_transfer *data_transfer;
- struct smbd_response *response =
- container_of(wc->wr_cqe, struct smbd_response, cqe);
- struct smbd_connection *info = response->info;
+ struct smbdirect_recv_io *response =
+ container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe);
+ struct smbdirect_socket *sc = response->socket;
+ struct smbd_connection *info =
+ container_of(sc, struct smbd_connection, socket);
int data_length = 0;
log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n",
- response, response->type, wc->status, wc->opcode,
+ response, sc->recv_io.expected, wc->status, wc->opcode,
wc->byte_len, wc->pkey_index);
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
wc->status, wc->opcode);
- smbd_disconnect_rdma_connection(info);
goto error;
}
@@ -476,43 +473,31 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
response->sge.length,
DMA_FROM_DEVICE);
- switch (response->type) {
+ switch (sc->recv_io.expected) {
/* SMBD negotiation response */
- case SMBD_NEGOTIATE_RESP:
- dump_smbdirect_negotiate_resp(smbd_response_payload(response));
- info->full_packet_received = true;
+ case SMBDIRECT_EXPECT_NEGOTIATE_REP:
+ dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response));
+ sc->recv_io.reassembly.full_packet_received = true;
info->negotiate_done =
process_negotiation_response(response, wc->byte_len);
+ put_receive_buffer(info, response);
complete(&info->negotiate_completion);
- break;
+ return;
/* SMBD data transfer packet */
- case SMBD_TRANSFER_DATA:
- data_transfer = smbd_response_payload(response);
+ case SMBDIRECT_EXPECT_DATA_TRANSFER:
+ data_transfer = smbdirect_recv_io_payload(response);
data_length = le32_to_cpu(data_transfer->data_length);
- /*
- * If this is a packet with data playload place the data in
- * reassembly queue and wake up the reading thread
- */
if (data_length) {
- if (info->full_packet_received)
+ if (sc->recv_io.reassembly.full_packet_received)
response->first_segment = true;
if (le32_to_cpu(data_transfer->remaining_data_length))
- info->full_packet_received = false;
+ sc->recv_io.reassembly.full_packet_received = false;
else
- info->full_packet_received = true;
-
- enqueue_reassembly(
- info,
- response,
- data_length);
- } else
- put_empty_packet(info, response);
-
- if (data_length)
- wake_up_interruptible(&info->wait_reassembly_queue);
+ sc->recv_io.reassembly.full_packet_received = true;
+ }
atomic_dec(&info->receive_credits);
info->receive_credit_target =
@@ -540,15 +525,31 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
info->keep_alive_requested = KEEP_ALIVE_PENDING;
}
+ /*
+ * If this is a packet with data playload place the data in
+ * reassembly queue and wake up the reading thread
+ */
+ if (data_length) {
+ enqueue_reassembly(info, response, data_length);
+ wake_up_interruptible(&sc->recv_io.reassembly.wait_queue);
+ } else
+ put_receive_buffer(info, response);
+
return;
- default:
- log_rdma_recv(ERR,
- "unexpected response type=%d\n", response->type);
+ case SMBDIRECT_EXPECT_NEGOTIATE_REQ:
+ /* Only server... */
+ break;
}
+ /*
+ * This is an internal error!
+ */
+ log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected);
+ WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER);
error:
put_receive_buffer(info, response);
+ smbd_disconnect_rdma_connection(info);
}
static struct rdma_cm_id *smbd_create_id(
@@ -694,16 +695,16 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
struct smbdirect_socket_parameters *sp = &sc->parameters;
struct ib_send_wr send_wr;
int rc = -ENOMEM;
- struct smbd_request *request;
+ struct smbdirect_send_io *request;
struct smbdirect_negotiate_req *packet;
- request = mempool_alloc(info->request_mempool, GFP_KERNEL);
+ request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL);
if (!request)
return rc;
- request->info = info;
+ request->socket = sc;
- packet = smbd_request_payload(request);
+ packet = smbdirect_send_io_payload(request);
packet->min_version = cpu_to_le16(SMBDIRECT_V1);
packet->max_version = cpu_to_le16(SMBDIRECT_V1);
packet->reserved = 0;
@@ -756,7 +757,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
smbd_disconnect_rdma_connection(info);
dma_mapping_failed:
- mempool_free(request, info->request_mempool);
+ mempool_free(request, sc->send_io.mem.pool);
return rc;
}
@@ -800,7 +801,7 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info)
/* Post the send request */
static int smbd_post_send(struct smbd_connection *info,
- struct smbd_request *request)
+ struct smbdirect_send_io *request)
{
struct smbdirect_socket *sc = &info->socket;
struct smbdirect_socket_parameters *sp = &sc->parameters;
@@ -849,7 +850,7 @@ static int smbd_post_send_iter(struct smbd_connection *info,
int i, rc;
int header_length;
int data_length;
- struct smbd_request *request;
+ struct smbdirect_send_io *request;
struct smbdirect_data_transfer *packet;
int new_credits = 0;
@@ -888,20 +889,20 @@ wait_send_queue:
goto wait_send_queue;
}
- request = mempool_alloc(info->request_mempool, GFP_KERNEL);
+ request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL);
if (!request) {
rc = -ENOMEM;
goto err_alloc;
}
- request->info = info;
+ request->socket = sc;
memset(request->sge, 0, sizeof(request->sge));
/* Fill in the data payload to find out how much data we can add */
if (iter) {
struct smb_extract_to_rdma extract = {
.nr_sge = 1,
- .max_sge = SMBDIRECT_MAX_SEND_SGE,
+ .max_sge = SMBDIRECT_SEND_IO_MAX_SGE,
.sge = request->sge,
.device = sc->ib.dev,
.local_dma_lkey = sc->ib.pd->local_dma_lkey,
@@ -923,7 +924,7 @@ wait_send_queue:
}
/* Fill in the packet header */
- packet = smbd_request_payload(request);
+ packet = smbdirect_send_io_payload(request);
packet->credits_requested = cpu_to_le16(sp->send_credit_target);
new_credits = manage_credits_prior_sending(info);
@@ -982,7 +983,7 @@ err_dma:
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
- mempool_free(request, info->request_mempool);
+ mempool_free(request, sc->send_io.mem.pool);
/* roll back receive credits and credits to be offered */
spin_lock(&info->lock_new_credits_offered);
@@ -1042,7 +1043,7 @@ static int smbd_post_send_full_iter(struct smbd_connection *info,
* The interaction is controlled by send/receive credit system
*/
static int smbd_post_recv(
- struct smbd_connection *info, struct smbd_response *response)
+ struct smbd_connection *info, struct smbdirect_recv_io *response)
{
struct smbdirect_socket *sc = &info->socket;
struct smbdirect_socket_parameters *sp = &sc->parameters;
@@ -1069,6 +1070,7 @@ static int smbd_post_recv(
if (rc) {
ib_dma_unmap_single(sc->ib.dev, response->sge.addr,
response->sge.length, DMA_FROM_DEVICE);
+ response->sge.length = 0;
smbd_disconnect_rdma_connection(info);
log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
}
@@ -1079,10 +1081,11 @@ static int smbd_post_recv(
/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
static int smbd_negotiate(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
int rc;
- struct smbd_response *response = get_receive_buffer(info);
+ struct smbdirect_recv_io *response = get_receive_buffer(info);
- response->type = SMBD_NEGOTIATE_RESP;
+ sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP;
rc = smbd_post_recv(info, response);
log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n",
rc, response->sge.addr,
@@ -1113,17 +1116,6 @@ static int smbd_negotiate(struct smbd_connection *info)
return rc;
}
-static void put_empty_packet(
- struct smbd_connection *info, struct smbd_response *response)
-{
- spin_lock(&info->empty_packet_queue_lock);
- list_add_tail(&response->list, &info->empty_packet_queue);
- info->count_empty_packet_queue++;
- spin_unlock(&info->empty_packet_queue_lock);
-
- queue_work(info->workqueue, &info->post_send_credits_work);
-}
-
/*
* Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
* This is a queue for reassembling upper layer payload and present to upper
@@ -1136,12 +1128,14 @@ static void put_empty_packet(
*/
static void enqueue_reassembly(
struct smbd_connection *info,
- struct smbd_response *response,
+ struct smbdirect_recv_io *response,
int data_length)
{
- spin_lock(&info->reassembly_queue_lock);
- list_add_tail(&response->list, &info->reassembly_queue);
- info->reassembly_queue_length++;
+ struct smbdirect_socket *sc = &info->socket;
+
+ spin_lock(&sc->recv_io.reassembly.lock);
+ list_add_tail(&response->list, &sc->recv_io.reassembly.list);
+ sc->recv_io.reassembly.queue_length++;
/*
* Make sure reassembly_data_length is updated after list and
* reassembly_queue_length are updated. On the dequeue side
@@ -1149,8 +1143,8 @@ static void enqueue_reassembly(
* if reassembly_queue_length and list is up to date
*/
virt_wmb();
- info->reassembly_data_length += data_length;
- spin_unlock(&info->reassembly_queue_lock);
+ sc->recv_io.reassembly.data_length += data_length;
+ spin_unlock(&sc->recv_io.reassembly.lock);
info->count_reassembly_queue++;
info->count_enqueue_reassembly_queue++;
}
@@ -1160,34 +1154,16 @@ static void enqueue_reassembly(
* Caller is responsible for locking
* return value: the first entry if any, NULL if queue is empty
*/
-static struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
-{
- struct smbd_response *ret = NULL;
-
- if (!list_empty(&info->reassembly_queue)) {
- ret = list_first_entry(
- &info->reassembly_queue,
- struct smbd_response, list);
- }
- return ret;
-}
-
-static struct smbd_response *get_empty_queue_buffer(
- struct smbd_connection *info)
+static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info)
{
- struct smbd_response *ret = NULL;
- unsigned long flags;
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_recv_io *ret = NULL;
- spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
- if (!list_empty(&info->empty_packet_queue)) {
+ if (!list_empty(&sc->recv_io.reassembly.list)) {
ret = list_first_entry(
- &info->empty_packet_queue,
- struct smbd_response, list);
- list_del(&ret->list);
- info->count_empty_packet_queue--;
+ &sc->recv_io.reassembly.list,
+ struct smbdirect_recv_io, list);
}
- spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
-
return ret;
}
@@ -1197,21 +1173,22 @@ static struct smbd_response *get_empty_queue_buffer(
* pre-allocated in advance.
* return value: the receive buffer, NULL if none is available
*/
-static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
+static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info)
{
- struct smbd_response *ret = NULL;
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_recv_io *ret = NULL;
unsigned long flags;
- spin_lock_irqsave(&info->receive_queue_lock, flags);
- if (!list_empty(&info->receive_queue)) {
+ spin_lock_irqsave(&sc->recv_io.free.lock, flags);
+ if (!list_empty(&sc->recv_io.free.list)) {
ret = list_first_entry(
- &info->receive_queue,
- struct smbd_response, list);
+ &sc->recv_io.free.list,
+ struct smbdirect_recv_io, list);
list_del(&ret->list);
info->count_receive_queue--;
info->count_get_receive_buffer++;
}
- spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+ spin_unlock_irqrestore(&sc->recv_io.free.lock, flags);
return ret;
}
@@ -1223,19 +1200,24 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
* receive buffer is returned.
*/
static void put_receive_buffer(
- struct smbd_connection *info, struct smbd_response *response)
+ struct smbd_connection *info, struct smbdirect_recv_io *response)
{
struct smbdirect_socket *sc = &info->socket;
unsigned long flags;
- ib_dma_unmap_single(sc->ib.dev, response->sge.addr,
- response->sge.length, DMA_FROM_DEVICE);
+ if (likely(response->sge.length != 0)) {
+ ib_dma_unmap_single(sc->ib.dev,
+ response->sge.addr,
+ response->sge.length,
+ DMA_FROM_DEVICE);
+ response->sge.length = 0;
+ }
- spin_lock_irqsave(&info->receive_queue_lock, flags);
- list_add_tail(&response->list, &info->receive_queue);
+ spin_lock_irqsave(&sc->recv_io.free.lock, flags);
+ list_add_tail(&response->list, &sc->recv_io.free.list);
info->count_receive_queue++;
info->count_put_receive_buffer++;
- spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+ spin_unlock_irqrestore(&sc->recv_io.free.lock, flags);
queue_work(info->workqueue, &info->post_send_credits_work);
}
@@ -1243,58 +1225,54 @@ static void put_receive_buffer(
/* Preallocate all receive buffer on transport establishment */
static int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_recv_io *response;
int i;
- struct smbd_response *response;
- INIT_LIST_HEAD(&info->reassembly_queue);
- spin_lock_init(&info->reassembly_queue_lock);
- info->reassembly_data_length = 0;
- info->reassembly_queue_length = 0;
+ INIT_LIST_HEAD(&sc->recv_io.reassembly.list);
+ spin_lock_init(&sc->recv_io.reassembly.lock);
+ sc->recv_io.reassembly.data_length = 0;
+ sc->recv_io.reassembly.queue_length = 0;
- INIT_LIST_HEAD(&info->receive_queue);
- spin_lock_init(&info->receive_queue_lock);
+ INIT_LIST_HEAD(&sc->recv_io.free.list);
+ spin_lock_init(&sc->recv_io.free.lock);
info->count_receive_queue = 0;
- INIT_LIST_HEAD(&info->empty_packet_queue);
- spin_lock_init(&info->empty_packet_queue_lock);
- info->count_empty_packet_queue = 0;
-
init_waitqueue_head(&info->wait_receive_queues);
for (i = 0; i < num_buf; i++) {
- response = mempool_alloc(info->response_mempool, GFP_KERNEL);
+ response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL);
if (!response)
goto allocate_failed;
- response->info = info;
- list_add_tail(&response->list, &info->receive_queue);
+ response->socket = sc;
+ response->sge.length = 0;
+ list_add_tail(&response->list, &sc->recv_io.free.list);
info->count_receive_queue++;
}
return 0;
allocate_failed:
- while (!list_empty(&info->receive_queue)) {
+ while (!list_empty(&sc->recv_io.free.list)) {
response = list_first_entry(
- &info->receive_queue,
- struct smbd_response, list);
+ &sc->recv_io.free.list,
+ struct smbdirect_recv_io, list);
list_del(&response->list);
info->count_receive_queue--;
- mempool_free(response, info->response_mempool);
+ mempool_free(response, sc->recv_io.mem.pool);
}
return -ENOMEM;
}
static void destroy_receive_buffers(struct smbd_connection *info)
{
- struct smbd_response *response;
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_recv_io *response;
while ((response = get_receive_buffer(info)))
- mempool_free(response, info->response_mempool);
-
- while ((response = get_empty_queue_buffer(info)))
- mempool_free(response, info->response_mempool);
+ mempool_free(response, sc->recv_io.mem.pool);
}
/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
@@ -1332,7 +1310,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
struct smbd_connection *info = server->smbd_conn;
struct smbdirect_socket *sc;
struct smbdirect_socket_parameters *sp;
- struct smbd_response *response;
+ struct smbdirect_recv_io *response;
unsigned long flags;
if (!info) {
@@ -1347,7 +1325,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
rdma_disconnect(sc->rdma.cm_id);
log_rdma_event(INFO, "wait for transport being disconnected\n");
wait_event_interruptible(
- info->disconn_wait,
+ info->status_wait,
sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
}
@@ -1366,23 +1344,22 @@ void smbd_destroy(struct TCP_Server_Info *server)
/* It's not possible for upper layer to get to reassembly */
log_rdma_event(INFO, "drain the reassembly queue\n");
do {
- spin_lock_irqsave(&info->reassembly_queue_lock, flags);
+ spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
response = _get_first_reassembly(info);
if (response) {
list_del(&response->list);
spin_unlock_irqrestore(
- &info->reassembly_queue_lock, flags);
+ &sc->recv_io.reassembly.lock, flags);
put_receive_buffer(info, response);
} else
spin_unlock_irqrestore(
- &info->reassembly_queue_lock, flags);
+ &sc->recv_io.reassembly.lock, flags);
} while (response);
- info->reassembly_data_length = 0;
+ sc->recv_io.reassembly.data_length = 0;
log_rdma_event(INFO, "free receive buffers\n");
wait_event(info->wait_receive_queues,
- info->count_receive_queue + info->count_empty_packet_queue
- == sp->recv_credit_max);
+ info->count_receive_queue == sp->recv_credit_max);
destroy_receive_buffers(info);
/*
@@ -1407,11 +1384,11 @@ void smbd_destroy(struct TCP_Server_Info *server)
rdma_destroy_id(sc->rdma.cm_id);
/* free mempools */
- mempool_destroy(info->request_mempool);
- kmem_cache_destroy(info->request_cache);
+ mempool_destroy(sc->send_io.mem.pool);
+ kmem_cache_destroy(sc->send_io.mem.cache);
- mempool_destroy(info->response_mempool);
- kmem_cache_destroy(info->response_cache);
+ mempool_destroy(sc->recv_io.mem.pool);
+ kmem_cache_destroy(sc->recv_io.mem.cache);
sc->status = SMBDIRECT_SOCKET_DESTROYED;
@@ -1459,12 +1436,14 @@ create_conn:
static void destroy_caches_and_workqueue(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
+
destroy_receive_buffers(info);
destroy_workqueue(info->workqueue);
- mempool_destroy(info->response_mempool);
- kmem_cache_destroy(info->response_cache);
- mempool_destroy(info->request_mempool);
- kmem_cache_destroy(info->request_cache);
+ mempool_destroy(sc->recv_io.mem.pool);
+ kmem_cache_destroy(sc->recv_io.mem.cache);
+ mempool_destroy(sc->send_io.mem.pool);
+ kmem_cache_destroy(sc->send_io.mem.cache);
}
#define MAX_NAME_LEN 80
@@ -1478,41 +1457,41 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer)))
return -ENOMEM;
- scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
- info->request_cache =
+ scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info);
+ sc->send_io.mem.cache =
kmem_cache_create(
name,
- sizeof(struct smbd_request) +
+ sizeof(struct smbdirect_send_io) +
sizeof(struct smbdirect_data_transfer),
0, SLAB_HWCACHE_ALIGN, NULL);
- if (!info->request_cache)
+ if (!sc->send_io.mem.cache)
return -ENOMEM;
- info->request_mempool =
+ sc->send_io.mem.pool =
mempool_create(sp->send_credit_target, mempool_alloc_slab,
- mempool_free_slab, info->request_cache);
- if (!info->request_mempool)
+ mempool_free_slab, sc->send_io.mem.cache);
+ if (!sc->send_io.mem.pool)
goto out1;
- scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
+ scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info);
struct kmem_cache_args response_args = {
- .align = __alignof__(struct smbd_response),
- .useroffset = (offsetof(struct smbd_response, packet) +
+ .align = __alignof__(struct smbdirect_recv_io),
+ .useroffset = (offsetof(struct smbdirect_recv_io, packet) +
sizeof(struct smbdirect_data_transfer)),
.usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer),
};
- info->response_cache =
+ sc->recv_io.mem.cache =
kmem_cache_create(name,
- sizeof(struct smbd_response) + sp->max_recv_size,
+ sizeof(struct smbdirect_recv_io) + sp->max_recv_size,
&response_args, SLAB_HWCACHE_ALIGN);
- if (!info->response_cache)
+ if (!sc->recv_io.mem.cache)
goto out2;
- info->response_mempool =
+ sc->recv_io.mem.pool =
mempool_create(sp->recv_credit_max, mempool_alloc_slab,
- mempool_free_slab, info->response_cache);
- if (!info->response_mempool)
+ mempool_free_slab, sc->recv_io.mem.cache);
+ if (!sc->recv_io.mem.pool)
goto out3;
scnprintf(name, MAX_NAME_LEN, "smbd_%p", info);
@@ -1531,13 +1510,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
out5:
destroy_workqueue(info->workqueue);
out4:
- mempool_destroy(info->response_mempool);
+ mempool_destroy(sc->recv_io.mem.pool);
out3:
- kmem_cache_destroy(info->response_cache);
+ kmem_cache_destroy(sc->recv_io.mem.cache);
out2:
- mempool_destroy(info->request_mempool);
+ mempool_destroy(sc->send_io.mem.pool);
out1:
- kmem_cache_destroy(info->request_cache);
+ kmem_cache_destroy(sc->send_io.mem.cache);
return -ENOMEM;
}
@@ -1593,8 +1572,8 @@ static struct smbd_connection *_smbd_get_connection(
sp->max_recv_size = smbd_max_receive_size;
sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000;
- if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
- sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
+ if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE ||
+ sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) {
log_rdma_event(ERR,
"device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
IB_DEVICE_NAME_MAX,
@@ -1625,8 +1604,8 @@ static struct smbd_connection *_smbd_get_connection(
qp_attr.qp_context = info;
qp_attr.cap.max_send_wr = sp->send_credit_target;
qp_attr.cap.max_recv_wr = sp->recv_credit_max;
- qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
- qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
+ qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
+ qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
qp_attr.cap.max_inline_data = 0;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
@@ -1671,17 +1650,18 @@ static struct smbd_connection *_smbd_get_connection(
log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
&addr_in->sin_addr, port);
- init_waitqueue_head(&info->conn_wait);
- init_waitqueue_head(&info->disconn_wait);
- init_waitqueue_head(&info->wait_reassembly_queue);
+ init_waitqueue_head(&info->status_wait);
+ init_waitqueue_head(&sc->recv_io.reassembly.wait_queue);
rc = rdma_connect(sc->rdma.cm_id, &conn_param);
if (rc) {
log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
goto rdma_connect_failed;
}
- wait_event_interruptible(
- info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING);
+ wait_event_interruptible_timeout(
+ info->status_wait,
+ sc->status != SMBDIRECT_SOCKET_CONNECTING,
+ msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
@@ -1735,9 +1715,8 @@ negotiation_failed:
cancel_delayed_work_sync(&info->idle_timer_work);
destroy_caches_and_workqueue(info);
sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED;
- init_waitqueue_head(&info->conn_wait);
rdma_disconnect(sc->rdma.cm_id);
- wait_event(info->conn_wait,
+ wait_event(info->status_wait,
sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
allocate_cache_failed:
@@ -1794,7 +1773,7 @@ try_again:
int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{
struct smbdirect_socket *sc = &info->socket;
- struct smbd_response *response;
+ struct smbdirect_recv_io *response;
struct smbdirect_data_transfer *data_transfer;
size_t size = iov_iter_count(&msg->msg_iter);
int to_copy, to_read, data_read, offset;
@@ -1810,9 +1789,9 @@ again:
* the only one reading from the front of the queue. The transport
* may add more entries to the back of the queue at the same time
*/
- log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size,
- info->reassembly_data_length);
- if (info->reassembly_data_length >= size) {
+ log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size,
+ sc->recv_io.reassembly.data_length);
+ if (sc->recv_io.reassembly.data_length >= size) {
int queue_length;
int queue_removed = 0;
@@ -1824,13 +1803,13 @@ again:
* updated in SOFTIRQ as more data is received
*/
virt_rmb();
- queue_length = info->reassembly_queue_length;
+ queue_length = sc->recv_io.reassembly.queue_length;
data_read = 0;
to_read = size;
- offset = info->first_entry_offset;
+ offset = sc->recv_io.reassembly.first_entry_offset;
while (data_read < size) {
response = _get_first_reassembly(info);
- data_transfer = smbd_response_payload(response);
+ data_transfer = smbdirect_recv_io_payload(response);
data_length = le32_to_cpu(data_transfer->data_length);
remaining_data_length =
le32_to_cpu(
@@ -1875,10 +1854,10 @@ again:
list_del(&response->list);
else {
spin_lock_irq(
- &info->reassembly_queue_lock);
+ &sc->recv_io.reassembly.lock);
list_del(&response->list);
spin_unlock_irq(
- &info->reassembly_queue_lock);
+ &sc->recv_io.reassembly.lock);
}
queue_removed++;
info->count_reassembly_queue--;
@@ -1897,23 +1876,23 @@ again:
to_read, data_read, offset);
}
- spin_lock_irq(&info->reassembly_queue_lock);
- info->reassembly_data_length -= data_read;
- info->reassembly_queue_length -= queue_removed;
- spin_unlock_irq(&info->reassembly_queue_lock);
+ spin_lock_irq(&sc->recv_io.reassembly.lock);
+ sc->recv_io.reassembly.data_length -= data_read;
+ sc->recv_io.reassembly.queue_length -= queue_removed;
+ spin_unlock_irq(&sc->recv_io.reassembly.lock);
- info->first_entry_offset = offset;
+ sc->recv_io.reassembly.first_entry_offset = offset;
log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
- data_read, info->reassembly_data_length,
- info->first_entry_offset);
+ data_read, sc->recv_io.reassembly.data_length,
+ sc->recv_io.reassembly.first_entry_offset);
read_rfc1002_done:
return data_read;
}
log_read(INFO, "wait_event on more data\n");
rc = wait_event_interruptible(
- info->wait_reassembly_queue,
- info->reassembly_data_length >= size ||
+ sc->recv_io.reassembly.wait_queue,
+ sc->recv_io.reassembly.data_length >= size ||
sc->status != SMBDIRECT_SOCKET_CONNECTED);
/* Don't return any data if interrupted */
if (rc)
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h
index 75b3f491c3ad..e45aa9ddd71d 100644
--- a/fs/smb/client/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
@@ -33,16 +33,6 @@ enum keep_alive_status {
KEEP_ALIVE_SENT,
};
-enum smbd_connection_status {
- SMBD_CREATED,
- SMBD_CONNECTING,
- SMBD_CONNECTED,
- SMBD_NEGOTIATE_FAILED,
- SMBD_DISCONNECTING,
- SMBD_DISCONNECTED,
- SMBD_DESTROYED
-};
-
/*
* The context for the SMBDirect transport
* Everything related to the transport is here. It has several logical parts
@@ -57,8 +47,7 @@ struct smbd_connection {
int ri_rc;
struct completion ri_done;
- wait_queue_head_t conn_wait;
- wait_queue_head_t disconn_wait;
+ wait_queue_head_t status_wait;
struct completion negotiate_completion;
bool negotiate_done;
@@ -75,7 +64,6 @@ struct smbd_connection {
atomic_t send_credits;
atomic_t receive_credits;
int receive_credit_target;
- int fragment_reassembly_remaining;
/* Memory registrations */
/* Maximum number of RDMA read/write outstanding on this connection */
@@ -106,52 +94,16 @@ struct smbd_connection {
wait_queue_head_t wait_post_send;
/* Receive queue */
- struct list_head receive_queue;
int count_receive_queue;
- spinlock_t receive_queue_lock;
-
- struct list_head empty_packet_queue;
- int count_empty_packet_queue;
- spinlock_t empty_packet_queue_lock;
-
wait_queue_head_t wait_receive_queues;
- /* Reassembly queue */
- struct list_head reassembly_queue;
- spinlock_t reassembly_queue_lock;
- wait_queue_head_t wait_reassembly_queue;
-
- /* total data length of reassembly queue */
- int reassembly_data_length;
- int reassembly_queue_length;
- /* the offset to first buffer in reassembly queue */
- int first_entry_offset;
-
bool send_immediate;
wait_queue_head_t wait_send_queue;
- /*
- * Indicate if we have received a full packet on the connection
- * This is used to identify the first SMBD packet of a assembled
- * payload (SMB packet) in reassembly queue so we can return a
- * RFC1002 length to upper layer to indicate the length of the SMB
- * packet received
- */
- bool full_packet_received;
-
struct workqueue_struct *workqueue;
struct delayed_work idle_timer_work;
- /* Memory pool for preallocating buffers */
- /* request pool for RDMA send */
- struct kmem_cache *request_cache;
- mempool_t *request_mempool;
-
- /* response pool for RDMA receive */
- struct kmem_cache *response_cache;
- mempool_t *response_mempool;
-
/* for debug purposes */
unsigned int count_get_receive_buffer;
unsigned int count_put_receive_buffer;
@@ -161,48 +113,6 @@ struct smbd_connection {
unsigned int count_send_empty;
};
-enum smbd_message_type {
- SMBD_NEGOTIATE_RESP,
- SMBD_TRANSFER_DATA,
-};
-
-/* Maximum number of SGEs used by smbdirect.c in any send work request */
-#define SMBDIRECT_MAX_SEND_SGE 6
-
-/* The context for a SMBD request */
-struct smbd_request {
- struct smbd_connection *info;
- struct ib_cqe cqe;
-
- /* the SGE entries for this work request */
- struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE];
- int num_sge;
-
- /* SMBD packet header follows this structure */
- u8 packet[];
-};
-
-/* Maximum number of SGEs used by smbdirect.c in any receive work request */
-#define SMBDIRECT_MAX_RECV_SGE 1
-
-/* The context for a SMBD response */
-struct smbd_response {
- struct smbd_connection *info;
- struct ib_cqe cqe;
- struct ib_sge sge;
-
- enum smbd_message_type type;
-
- /* Link to receive queue or reassembly queue */
- struct list_head list;
-
- /* Indicate if this is the 1st packet of a payload */
- bool first_segment;
-
- /* SMBD packet header and payload follows this structure */
- u8 packet[];
-};
-
/* Create a SMBDirect session */
struct smbd_connection *smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr);
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 191783f553ce..32d528b4dd83 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -30,9 +30,6 @@
#include "smbdirect.h"
#include "compress.h"
-/* Max number of iovectors we can use off the stack when sending requests. */
-#define CIFS_MAX_IOV_SIZE 8
-
void
cifs_wake_up_task(struct mid_q_entry *mid)
{
@@ -41,42 +38,6 @@ cifs_wake_up_task(struct mid_q_entry *mid)
wake_up_process(mid->callback_data);
}
-static struct mid_q_entry *
-alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
-{
- struct mid_q_entry *temp;
-
- if (server == NULL) {
- cifs_dbg(VFS, "%s: null TCP session\n", __func__);
- return NULL;
- }
-
- temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
- memset(temp, 0, sizeof(struct mid_q_entry));
- kref_init(&temp->refcount);
- temp->mid = get_mid(smb_buffer);
- temp->pid = current->pid;
- temp->command = cpu_to_le16(smb_buffer->Command);
- cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
- /* easier to use jiffies */
- /* when mid allocated can be before when sent */
- temp->when_alloc = jiffies;
- temp->server = server;
-
- /*
- * The default is for the mid to be synchronous, so the
- * default callback just wakes up the current task.
- */
- get_task_struct(current);
- temp->creator = current;
- temp->callback = cifs_wake_up_task;
- temp->callback_data = current;
-
- atomic_inc(&mid_count);
- temp->mid_state = MID_REQUEST_ALLOCATED;
- return temp;
-}
-
void __release_mid(struct kref *refcount)
{
struct mid_q_entry *midEntry =
@@ -89,7 +50,7 @@ void __release_mid(struct kref *refcount)
#endif
struct TCP_Server_Info *server = midEntry->server;
- if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) &&
+ if (midEntry->resp_buf && (midEntry->wait_cancelled) &&
(midEntry->mid_state == MID_RESPONSE_RECEIVED ||
midEntry->mid_state == MID_RESPONSE_READY) &&
server->ops->handle_cancelled_mid)
@@ -160,12 +121,12 @@ void __release_mid(struct kref *refcount)
void
delete_mid(struct mid_q_entry *mid)
{
- spin_lock(&mid->server->mid_lock);
- if (!(mid->mid_flags & MID_DELETED)) {
+ spin_lock(&mid->server->mid_queue_lock);
+ if (mid->deleted_from_q == false) {
list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ mid->deleted_from_q = true;
}
- spin_unlock(&mid->server->mid_lock);
+ spin_unlock(&mid->server->mid_queue_lock);
release_mid(mid);
}
@@ -269,9 +230,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
return buflen;
}
-static int
-__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
- struct smb_rqst *rqst)
+int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst)
{
int rc;
struct kvec *iov;
@@ -397,7 +357,7 @@ unmask:
* socket so the server throws away the partial SMB
*/
cifs_signal_cifsd_for_reconnect(server, false);
- trace_smb3_partial_send_reconnect(server->CurrentMid,
+ trace_smb3_partial_send_reconnect(server->current_mid,
server->conn_id, server->hostname);
}
smbd_done:
@@ -456,22 +416,6 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
return rc;
}
-int
-smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
- unsigned int smb_buf_length)
-{
- struct kvec iov[2];
- struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
-
- iov[0].iov_base = smb_buffer;
- iov[0].iov_len = 4;
- iov[1].iov_base = (char *)smb_buffer + 4;
- iov[1].iov_len = smb_buf_length;
-
- return __smb_send_rqst(server, 1, &rqst);
-}
-
static int
wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
const int timeout, const int flags,
@@ -509,7 +453,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
- trace_smb3_nblk_credits(server->CurrentMid,
+ trace_smb3_nblk_credits(server->current_mid,
server->conn_id, server->hostname, scredits, -1, in_flight);
cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
__func__, 1, scredits);
@@ -542,7 +486,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
- trace_smb3_credit_timeout(server->CurrentMid,
+ trace_smb3_credit_timeout(server->current_mid,
server->conn_id, server->hostname, scredits,
num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
@@ -585,7 +529,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
spin_unlock(&server->req_lock);
trace_smb3_credit_timeout(
- server->CurrentMid,
+ server->current_mid,
server->conn_id, server->hostname,
scredits, num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
@@ -615,7 +559,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
in_flight = server->in_flight;
spin_unlock(&server->req_lock);
- trace_smb3_waitff_credits(server->CurrentMid,
+ trace_smb3_waitff_credits(server->current_mid,
server->conn_id, server->hostname, scredits,
-(num_credits), in_flight);
cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
@@ -626,9 +570,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
return 0;
}
-static int
-wait_for_free_request(struct TCP_Server_Info *server, const int flags,
- unsigned int *instance)
+int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
+ unsigned int *instance)
{
return wait_for_free_credits(server, 1, -1, flags,
instance);
@@ -666,7 +609,7 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
*/
if (server->in_flight == 0) {
spin_unlock(&server->req_lock);
- trace_smb3_insufficient_credits(server->CurrentMid,
+ trace_smb3_insufficient_credits(server->current_mid,
server->conn_id, server->hostname, scredits,
num, in_flight);
cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
@@ -690,40 +633,7 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
return 0;
}
-static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
- struct mid_q_entry **ppmidQ)
-{
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_NEW) {
- if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
- (in_buf->Command != SMB_COM_NEGOTIATE)) {
- spin_unlock(&ses->ses_lock);
- return -EAGAIN;
- }
- /* else ok - we are setting up session */
- }
-
- if (ses->ses_status == SES_EXITING) {
- /* check if SMB session is bad because we are setting it up */
- if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
- spin_unlock(&ses->ses_lock);
- return -EAGAIN;
- }
- /* else ok - we are shutting down session */
- }
- spin_unlock(&ses->ses_lock);
-
- *ppmidQ = alloc_mid(in_buf, ses->server);
- if (*ppmidQ == NULL)
- return -ENOMEM;
- spin_lock(&ses->server->mid_lock);
- list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q);
- spin_unlock(&ses->server->mid_lock);
- return 0;
-}
-
-static int
-wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
+int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
int error;
@@ -737,34 +647,6 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
return 0;
}
-struct mid_q_entry *
-cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
-{
- int rc;
- struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
- struct mid_q_entry *mid;
-
- if (rqst->rq_iov[0].iov_len != 4 ||
- rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
- return ERR_PTR(-EIO);
-
- /* enable signing if server requires it */
- if (server->sign)
- hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
-
- mid = alloc_mid(hdr, server);
- if (mid == NULL)
- return ERR_PTR(-ENOMEM);
-
- rc = cifs_sign_rqst(rqst, server, &mid->sequence_number);
- if (rc) {
- release_mid(mid);
- return ERR_PTR(rc);
- }
-
- return mid;
-}
-
/*
* Send a SMB request and set the callback function in the mid to handle
* the result. Caller is responsible for dealing with timeouts.
@@ -819,9 +701,9 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
mid->mid_state = MID_REQUEST_SUBMITTED;
/* put it on the pending_mid_q */
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
list_add_tail(&mid->qhead, &server->pending_mid_q);
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
/*
* Need to store the time in mid before calling I/O. For call_async,
@@ -845,45 +727,17 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
return rc;
}
-/*
- *
- * Send an SMB Request. No response info (other than return code)
- * needs to be parsed.
- *
- * flags indicate the type of request buffer and how long to wait
- * and whether to log NT STATUS code (error) before mapping it to POSIX error
- *
- */
-int
-SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
- char *in_buf, int flags)
-{
- int rc;
- struct kvec iov[1];
- struct kvec rsp_iov;
- int resp_buf_type;
-
- iov[0].iov_base = in_buf;
- iov[0].iov_len = get_rfc1002_length(in_buf) + 4;
- flags |= CIFS_NO_RSP_BUF;
- rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov);
- cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc);
-
- return rc;
-}
-
-static int
-cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
+int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
{
int rc = 0;
cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
__func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
- spin_lock(&server->mid_lock);
+ spin_lock(&server->mid_queue_lock);
switch (mid->mid_state) {
case MID_RESPONSE_READY:
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
return rc;
case MID_RETRY_NEEDED:
rc = -EAGAIN;
@@ -898,85 +752,23 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
rc = mid->mid_rc;
break;
default:
- if (!(mid->mid_flags & MID_DELETED)) {
+ if (mid->deleted_from_q == false) {
list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ mid->deleted_from_q = true;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
goto sync_mid_done;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
sync_mid_done:
release_mid(mid);
return rc;
}
-static inline int
-send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst,
- struct mid_q_entry *mid)
-{
- return server->ops->send_cancel ?
- server->ops->send_cancel(server, rqst, mid) : 0;
-}
-
-int
-cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
- bool log_error)
-{
- unsigned int len = get_rfc1002_length(mid->resp_buf) + 4;
-
- dump_smb(mid->resp_buf, min_t(u32, 92, len));
-
- /* convert the length into a more usable form */
- if (server->sign) {
- struct kvec iov[2];
- int rc = 0;
- struct smb_rqst rqst = { .rq_iov = iov,
- .rq_nvec = 2 };
-
- iov[0].iov_base = mid->resp_buf;
- iov[0].iov_len = 4;
- iov[1].iov_base = (char *)mid->resp_buf + 4;
- iov[1].iov_len = len - 4;
- /* FIXME: add code to kill session */
- rc = cifs_verify_signature(&rqst, server,
- mid->sequence_number);
- if (rc)
- cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n",
- rc);
- }
-
- /* BB special case reconnect tid and uid here? */
- return map_and_check_smb_error(mid, log_error);
-}
-
-struct mid_q_entry *
-cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored,
- struct smb_rqst *rqst)
-{
- int rc;
- struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
- struct mid_q_entry *mid;
-
- if (rqst->rq_iov[0].iov_len != 4 ||
- rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base)
- return ERR_PTR(-EIO);
-
- rc = allocate_mid(ses, hdr, &mid);
- if (rc)
- return ERR_PTR(rc);
- rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number);
- if (rc) {
- delete_mid(mid);
- return ERR_PTR(rc);
- }
- return mid;
-}
-
static void
cifs_compound_callback(struct mid_q_entry *mid)
{
@@ -1213,15 +1005,15 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(server, &rqst[i], midQ[i]);
- spin_lock(&server->mid_lock);
- midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
+ spin_lock(&server->mid_queue_lock);
+ midQ[i]->wait_cancelled = true;
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED ||
midQ[i]->mid_state == MID_RESPONSE_RECEIVED) {
midQ[i]->callback = cifs_cancelled_callback;
cancelled_mid[i] = true;
credits[i].value = 0;
}
- spin_unlock(&server->mid_lock);
+ spin_unlock(&server->mid_queue_lock);
}
}
@@ -1304,344 +1096,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
rqst, resp_buf_type, resp_iov);
}
-int
-SendReceive2(const unsigned int xid, struct cifs_ses *ses,
- struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
- const int flags, struct kvec *resp_iov)
-{
- struct smb_rqst rqst;
- struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov;
- int rc;
-
- if (n_vec + 1 > CIFS_MAX_IOV_SIZE) {
- new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec),
- GFP_KERNEL);
- if (!new_iov) {
- /* otherwise cifs_send_recv below sets resp_buf_type */
- *resp_buf_type = CIFS_NO_BUFFER;
- return -ENOMEM;
- }
- } else
- new_iov = s_iov;
-
- /* 1st iov is a RFC1001 length followed by the rest of the packet */
- memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec));
-
- new_iov[0].iov_base = new_iov[1].iov_base;
- new_iov[0].iov_len = 4;
- new_iov[1].iov_base += 4;
- new_iov[1].iov_len -= 4;
-
- memset(&rqst, 0, sizeof(struct smb_rqst));
- rqst.rq_iov = new_iov;
- rqst.rq_nvec = n_vec + 1;
-
- rc = cifs_send_recv(xid, ses, ses->server,
- &rqst, resp_buf_type, flags, resp_iov);
- if (n_vec + 1 > CIFS_MAX_IOV_SIZE)
- kfree(new_iov);
- return rc;
-}
-
-int
-SendReceive(const unsigned int xid, struct cifs_ses *ses,
- struct smb_hdr *in_buf, struct smb_hdr *out_buf,
- int *pbytes_returned, const int flags)
-{
- int rc = 0;
- struct mid_q_entry *midQ;
- unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
- struct kvec iov = { .iov_base = in_buf, .iov_len = len };
- struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
- struct cifs_credits credits = { .value = 1, .instance = 0 };
- struct TCP_Server_Info *server;
-
- if (ses == NULL) {
- cifs_dbg(VFS, "Null smb session\n");
- return -EIO;
- }
- server = ses->server;
- if (server == NULL) {
- cifs_dbg(VFS, "Null tcp session\n");
- return -EIO;
- }
-
- spin_lock(&server->srv_lock);
- if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->srv_lock);
- return -ENOENT;
- }
- spin_unlock(&server->srv_lock);
-
- /* Ensure that we do not send more than 50 overlapping requests
- to the same server. We may make this configurable later or
- use ses->maxReq */
-
- if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
- cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
- len);
- return -EIO;
- }
-
- rc = wait_for_free_request(server, flags, &credits.instance);
- if (rc)
- return rc;
-
- /* make sure that we sign in the same order that we send on this socket
- and avoid races inside tcp sendmsg code that could cause corruption
- of smb data */
-
- cifs_server_lock(server);
-
- rc = allocate_mid(ses, in_buf, &midQ);
- if (rc) {
- cifs_server_unlock(server);
- /* Update # of requests on wire to server */
- add_credits(server, &credits, 0);
- return rc;
- }
-
- rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
- if (rc) {
- cifs_server_unlock(server);
- goto out;
- }
-
- midQ->mid_state = MID_REQUEST_SUBMITTED;
-
- rc = smb_send(server, in_buf, len);
- cifs_save_when_sent(midQ);
-
- if (rc < 0)
- server->sequence_number -= 2;
-
- cifs_server_unlock(server);
-
- if (rc < 0)
- goto out;
-
- rc = wait_for_response(server, midQ);
- if (rc != 0) {
- send_cancel(server, &rqst, midQ);
- spin_lock(&server->mid_lock);
- if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED) {
- /* no longer considered to be "in-flight" */
- midQ->callback = release_mid;
- spin_unlock(&server->mid_lock);
- add_credits(server, &credits, 0);
- return rc;
- }
- spin_unlock(&server->mid_lock);
- }
-
- rc = cifs_sync_mid_result(midQ, server);
- if (rc != 0) {
- add_credits(server, &credits, 0);
- return rc;
- }
-
- if (!midQ->resp_buf || !out_buf ||
- midQ->mid_state != MID_RESPONSE_READY) {
- rc = -EIO;
- cifs_server_dbg(VFS, "Bad MID state?\n");
- goto out;
- }
-
- *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
- memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
- rc = cifs_check_receive(midQ, server, 0);
-out:
- delete_mid(midQ);
- add_credits(server, &credits, 0);
-
- return rc;
-}
-
-/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows
- blocking lock to return. */
-
-static int
-send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon,
- struct smb_hdr *in_buf,
- struct smb_hdr *out_buf)
-{
- int bytes_returned;
- struct cifs_ses *ses = tcon->ses;
- LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
-
- /* We just modify the current in_buf to change
- the type of lock from LOCKING_ANDX_SHARED_LOCK
- or LOCKING_ANDX_EXCLUSIVE_LOCK to
- LOCKING_ANDX_CANCEL_LOCK. */
-
- pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
- pSMB->Timeout = 0;
- pSMB->hdr.Mid = get_next_mid(ses->server);
-
- return SendReceive(xid, ses, in_buf, out_buf,
- &bytes_returned, 0);
-}
-
-int
-SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
- struct smb_hdr *in_buf, struct smb_hdr *out_buf,
- int *pbytes_returned)
-{
- int rc = 0;
- int rstart = 0;
- struct mid_q_entry *midQ;
- struct cifs_ses *ses;
- unsigned int len = be32_to_cpu(in_buf->smb_buf_length);
- struct kvec iov = { .iov_base = in_buf, .iov_len = len };
- struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 };
- unsigned int instance;
- struct TCP_Server_Info *server;
-
- if (tcon == NULL || tcon->ses == NULL) {
- cifs_dbg(VFS, "Null smb session\n");
- return -EIO;
- }
- ses = tcon->ses;
- server = ses->server;
-
- if (server == NULL) {
- cifs_dbg(VFS, "Null tcp session\n");
- return -EIO;
- }
-
- spin_lock(&server->srv_lock);
- if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->srv_lock);
- return -ENOENT;
- }
- spin_unlock(&server->srv_lock);
-
- /* Ensure that we do not send more than 50 overlapping requests
- to the same server. We may make this configurable later or
- use ses->maxReq */
-
- if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
- cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n",
- len);
- return -EIO;
- }
-
- rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance);
- if (rc)
- return rc;
-
- /* make sure that we sign in the same order that we send on this socket
- and avoid races inside tcp sendmsg code that could cause corruption
- of smb data */
-
- cifs_server_lock(server);
-
- rc = allocate_mid(ses, in_buf, &midQ);
- if (rc) {
- cifs_server_unlock(server);
- return rc;
- }
-
- rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number);
- if (rc) {
- delete_mid(midQ);
- cifs_server_unlock(server);
- return rc;
- }
-
- midQ->mid_state = MID_REQUEST_SUBMITTED;
- rc = smb_send(server, in_buf, len);
- cifs_save_when_sent(midQ);
-
- if (rc < 0)
- server->sequence_number -= 2;
-
- cifs_server_unlock(server);
-
- if (rc < 0) {
- delete_mid(midQ);
- return rc;
- }
-
- /* Wait for a reply - allow signals to interrupt. */
- rc = wait_event_interruptible(server->response_q,
- (!(midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED)) ||
- ((server->tcpStatus != CifsGood) &&
- (server->tcpStatus != CifsNew)));
-
- /* Were we interrupted by a signal ? */
- spin_lock(&server->srv_lock);
- if ((rc == -ERESTARTSYS) &&
- (midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED) &&
- ((server->tcpStatus == CifsGood) ||
- (server->tcpStatus == CifsNew))) {
- spin_unlock(&server->srv_lock);
-
- if (in_buf->Command == SMB_COM_TRANSACTION2) {
- /* POSIX lock. We send a NT_CANCEL SMB to cause the
- blocking lock to return. */
- rc = send_cancel(server, &rqst, midQ);
- if (rc) {
- delete_mid(midQ);
- return rc;
- }
- } else {
- /* Windows lock. We send a LOCKINGX_CANCEL_LOCK
- to cause the blocking lock to return. */
-
- rc = send_lock_cancel(xid, tcon, in_buf, out_buf);
-
- /* If we get -ENOLCK back the lock may have
- already been removed. Don't exit in this case. */
- if (rc && rc != -ENOLCK) {
- delete_mid(midQ);
- return rc;
- }
- }
-
- rc = wait_for_response(server, midQ);
- if (rc) {
- send_cancel(server, &rqst, midQ);
- spin_lock(&server->mid_lock);
- if (midQ->mid_state == MID_REQUEST_SUBMITTED ||
- midQ->mid_state == MID_RESPONSE_RECEIVED) {
- /* no longer considered to be "in-flight" */
- midQ->callback = release_mid;
- spin_unlock(&server->mid_lock);
- return rc;
- }
- spin_unlock(&server->mid_lock);
- }
-
- /* We got the response - restart system call. */
- rstart = 1;
- spin_lock(&server->srv_lock);
- }
- spin_unlock(&server->srv_lock);
-
- rc = cifs_sync_mid_result(midQ, server);
- if (rc != 0)
- return rc;
-
- /* rcvd frame is ok */
- if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) {
- rc = -EIO;
- cifs_tcon_dbg(VFS, "Bad MID state?\n");
- goto out;
- }
-
- *pbytes_returned = get_rfc1002_length(midQ->resp_buf);
- memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
- rc = cifs_check_receive(midQ, server, 0);
-out:
- delete_mid(midQ);
- if (rstart && rc == -EACCES)
- return -ERESTARTSYS;
- return rc;
-}
/*
* Discard any remaining data in the current SMB. To do this, we borrow the
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
index e5b15cc44a7b..3c4a8d627aa3 100644
--- a/fs/smb/common/smbdirect/smbdirect_socket.h
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -38,6 +38,124 @@ struct smbdirect_socket {
} ib;
struct smbdirect_socket_parameters parameters;
+
+ /*
+ * The state for posted send buffers
+ */
+ struct {
+ /*
+ * Memory pools for preallocating
+ * smbdirect_send_io buffers
+ */
+ struct {
+ struct kmem_cache *cache;
+ mempool_t *pool;
+ } mem;
+ } send_io;
+
+ /*
+ * The state for posted receive buffers
+ */
+ struct {
+ /*
+ * The type of PDU we are expecting
+ */
+ enum {
+ SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1,
+ SMBDIRECT_EXPECT_NEGOTIATE_REP = 2,
+ SMBDIRECT_EXPECT_DATA_TRANSFER = 3,
+ } expected;
+
+ /*
+ * Memory pools for preallocating
+ * smbdirect_recv_io buffers
+ */
+ struct {
+ struct kmem_cache *cache;
+ mempool_t *pool;
+ } mem;
+
+ /*
+ * The list of free smbdirect_recv_io
+ * structures
+ */
+ struct {
+ struct list_head list;
+ spinlock_t lock;
+ } free;
+
+ /*
+ * The list of arrived non-empty smbdirect_recv_io
+ * structures
+ *
+ * This represents the reassembly queue.
+ */
+ struct {
+ struct list_head list;
+ spinlock_t lock;
+ wait_queue_head_t wait_queue;
+ /* total data length of reassembly queue */
+ int data_length;
+ int queue_length;
+ /* the offset to first buffer in reassembly queue */
+ int first_entry_offset;
+ /*
+ * Indicate if we have received a full packet on the
+ * connection This is used to identify the first SMBD
+ * packet of a assembled payload (SMB packet) in
+ * reassembly queue so we can return a RFC1002 length to
+ * upper layer to indicate the length of the SMB packet
+ * received
+ */
+ bool full_packet_received;
+ } reassembly;
+ } recv_io;
+};
+
+struct smbdirect_send_io {
+ struct smbdirect_socket *socket;
+ struct ib_cqe cqe;
+
+ /*
+ * The SGE entries for this work request
+ *
+ * The first points to the packet header
+ */
+#define SMBDIRECT_SEND_IO_MAX_SGE 6
+ size_t num_sge;
+ struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE];
+
+ /*
+ * Link to the list of sibling smbdirect_send_io
+ * messages.
+ */
+ struct list_head sibling_list;
+ struct ib_send_wr wr;
+
+ /* SMBD packet header follows this structure */
+ u8 packet[];
+};
+
+struct smbdirect_recv_io {
+ struct smbdirect_socket *socket;
+ struct ib_cqe cqe;
+
+ /*
+ * For now we only use a single SGE
+ * as we have just one large buffer
+ * per posted recv.
+ */
+#define SMBDIRECT_RECV_IO_MAX_SGE 1
+ struct ib_sge sge;
+
+ /* Link to free or reassembly list */
+ struct list_head list;
+
+ /* Indicate if this is the 1st packet of a payload */
+ bool first_segment;
+
+ /* SMBD packet header and payload follows this structure */
+ u8 packet[];
};
#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index dd3e0e3f7bf0..31dd1caac1e8 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -46,6 +46,7 @@ struct ksmbd_conn {
struct mutex srv_mutex;
int status;
unsigned int cli_cap;
+ __be32 inet_addr;
char *request_buf;
struct ksmbd_transport *transport;
struct nls_table *local_nls;
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index 425c756bcfb8..b23203a1c286 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -515,7 +515,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
p = strrchr(longname, '.');
if (p == longname) { /*name starts with a dot*/
- strscpy(extension, "___", strlen("___"));
+ strscpy(extension, "___", sizeof(extension));
} else {
if (p) {
p++;
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index c6cbe0d56e32..8d366db5f605 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -129,9 +129,6 @@ struct smb_direct_transport {
spinlock_t recvmsg_queue_lock;
struct list_head recvmsg_queue;
- spinlock_t empty_recvmsg_queue_lock;
- struct list_head empty_recvmsg_queue;
-
int send_credit_target;
atomic_t send_credits;
spinlock_t lock_new_recv_credits;
@@ -268,40 +265,19 @@ smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
static void put_recvmsg(struct smb_direct_transport *t,
struct smb_direct_recvmsg *recvmsg)
{
- ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
- recvmsg->sge.length, DMA_FROM_DEVICE);
+ if (likely(recvmsg->sge.length != 0)) {
+ ib_dma_unmap_single(t->cm_id->device,
+ recvmsg->sge.addr,
+ recvmsg->sge.length,
+ DMA_FROM_DEVICE);
+ recvmsg->sge.length = 0;
+ }
spin_lock(&t->recvmsg_queue_lock);
list_add(&recvmsg->list, &t->recvmsg_queue);
spin_unlock(&t->recvmsg_queue_lock);
}
-static struct
-smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
-{
- struct smb_direct_recvmsg *recvmsg = NULL;
-
- spin_lock(&t->empty_recvmsg_queue_lock);
- if (!list_empty(&t->empty_recvmsg_queue)) {
- recvmsg = list_first_entry(&t->empty_recvmsg_queue,
- struct smb_direct_recvmsg, list);
- list_del(&recvmsg->list);
- }
- spin_unlock(&t->empty_recvmsg_queue_lock);
- return recvmsg;
-}
-
-static void put_empty_recvmsg(struct smb_direct_transport *t,
- struct smb_direct_recvmsg *recvmsg)
-{
- ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
- recvmsg->sge.length, DMA_FROM_DEVICE);
-
- spin_lock(&t->empty_recvmsg_queue_lock);
- list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
- spin_unlock(&t->empty_recvmsg_queue_lock);
-}
-
static void enqueue_reassembly(struct smb_direct_transport *t,
struct smb_direct_recvmsg *recvmsg,
int data_length)
@@ -386,9 +362,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
spin_lock_init(&t->recvmsg_queue_lock);
INIT_LIST_HEAD(&t->recvmsg_queue);
- spin_lock_init(&t->empty_recvmsg_queue_lock);
- INIT_LIST_HEAD(&t->empty_recvmsg_queue);
-
init_waitqueue_head(&t->wait_send_pending);
atomic_set(&t->send_pending, 0);
@@ -548,13 +521,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
t = recvmsg->transport;
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ put_recvmsg(t, recvmsg);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
pr_err("Recv error. status='%s (%d)' opcode=%d\n",
ib_wc_status_msg(wc->status), wc->status,
wc->opcode);
smb_direct_disconnect_rdma_connection(t);
}
- put_empty_recvmsg(t, recvmsg);
return;
}
@@ -568,7 +541,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
switch (recvmsg->type) {
case SMB_DIRECT_MSG_NEGOTIATE_REQ:
if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
t->negotiation_requested = true;
@@ -576,7 +550,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
t->status = SMB_DIRECT_CS_CONNECTED;
enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
- break;
+ return;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
struct smb_direct_data_transfer *data_transfer =
(struct smb_direct_data_transfer *)recvmsg->packet;
@@ -585,7 +559,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->byte_len <
offsetof(struct smb_direct_data_transfer, padding)) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
@@ -593,7 +568,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (data_length) {
if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
(u64)data_length) {
- put_empty_recvmsg(t, recvmsg);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
return;
}
@@ -605,16 +581,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
else
t->full_packet_received = true;
- enqueue_reassembly(t, recvmsg, (int)data_length);
- wake_up_interruptible(&t->wait_reassembly_queue);
-
spin_lock(&t->receive_credit_lock);
receive_credits = --(t->recv_credits);
avail_recvmsg_count = t->count_avail_recvmsg;
spin_unlock(&t->receive_credit_lock);
} else {
- put_empty_recvmsg(t, recvmsg);
-
spin_lock(&t->receive_credit_lock);
receive_credits = --(t->recv_credits);
avail_recvmsg_count = ++(t->count_avail_recvmsg);
@@ -636,11 +607,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
mod_delayed_work(smb_direct_wq,
&t->post_recv_credits_work, 0);
- break;
+
+ if (data_length) {
+ enqueue_reassembly(t, recvmsg, (int)data_length);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+ } else
+ put_recvmsg(t, recvmsg);
+
+ return;
}
- default:
- break;
}
+
+ /*
+ * This is an internal error!
+ */
+ WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER);
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
}
static int smb_direct_post_recv(struct smb_direct_transport *t,
@@ -670,6 +653,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t,
ib_dma_unmap_single(t->cm_id->device,
recvmsg->sge.addr, recvmsg->sge.length,
DMA_FROM_DEVICE);
+ recvmsg->sge.length = 0;
smb_direct_disconnect_rdma_connection(t);
return ret;
}
@@ -811,7 +795,6 @@ static void smb_direct_post_recv_credits(struct work_struct *work)
struct smb_direct_recvmsg *recvmsg;
int receive_credits, credits = 0;
int ret;
- int use_free = 1;
spin_lock(&t->receive_credit_lock);
receive_credits = t->recv_credits;
@@ -819,18 +802,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work)
if (receive_credits < t->recv_credit_target) {
while (true) {
- if (use_free)
- recvmsg = get_free_recvmsg(t);
- else
- recvmsg = get_empty_recvmsg(t);
- if (!recvmsg) {
- if (use_free) {
- use_free = 0;
- continue;
- } else {
- break;
- }
- }
+ recvmsg = get_free_recvmsg(t);
+ if (!recvmsg)
+ break;
recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
recvmsg->first_segment = false;
@@ -1806,8 +1780,6 @@ static void smb_direct_destroy_pools(struct smb_direct_transport *t)
while ((recvmsg = get_free_recvmsg(t)))
mempool_free(recvmsg, t->recvmsg_mempool);
- while ((recvmsg = get_empty_recvmsg(t)))
- mempool_free(recvmsg, t->recvmsg_mempool);
mempool_destroy(t->recvmsg_mempool);
t->recvmsg_mempool = NULL;
@@ -1863,6 +1835,7 @@ static int smb_direct_create_pools(struct smb_direct_transport *t)
if (!recvmsg)
goto err;
recvmsg->transport = t;
+ recvmsg->sge.length = 0;
list_add(&recvmsg->list, &t->recvmsg_queue);
}
t->count_avail_recvmsg = t->recv_credit_max;
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index f8c772a7cb43..b1df02e321b0 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -85,6 +85,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk)
return NULL;
}
+ conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr;
conn->transport = KSMBD_TRANS(t);
KSMBD_TRANS(t)->conn = conn;
KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops;
@@ -228,6 +229,8 @@ static int ksmbd_kthread_fn(void *p)
{
struct socket *client_sk = NULL;
struct interface *iface = (struct interface *)p;
+ struct inet_sock *csk_inet;
+ struct ksmbd_conn *conn;
int ret;
while (!kthread_should_stop()) {
@@ -246,6 +249,20 @@ static int ksmbd_kthread_fn(void *p)
continue;
}
+ /*
+ * Limits repeated connections from clients with the same IP.
+ */
+ csk_inet = inet_sk(client_sk->sk);
+ down_read(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list)
+ if (csk_inet->inet_daddr == conn->inet_addr) {
+ ret = -EAGAIN;
+ break;
+ }
+ up_read(&conn_list_lock);
+ if (ret == -EAGAIN)
+ continue;
+
if (server_conf.max_connections &&
atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 3061043e915c..b69c294e3ef0 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -80,23 +80,22 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
struct address_space *cache_mapping, u64 index, int length,
u64 read_start, u64 read_end, int page_count)
{
- struct page *head_to_cache = NULL, *tail_to_cache = NULL;
+ struct folio *head_to_cache = NULL, *tail_to_cache = NULL;
struct block_device *bdev = fullbio->bi_bdev;
int start_idx = 0, end_idx = 0;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
struct bio *bio = NULL;
- struct bio_vec *bv;
int idx = 0;
int err = 0;
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
- struct page **cache_pages = kmalloc_array(page_count,
- sizeof(void *), GFP_KERNEL | __GFP_ZERO);
+ struct folio **cache_folios = kmalloc_array(page_count,
+ sizeof(*cache_folios), GFP_KERNEL | __GFP_ZERO);
#endif
- bio_for_each_segment_all(bv, fullbio, iter_all) {
- struct page *page = bv->bv_page;
+ bio_for_each_folio_all(fi, fullbio) {
+ struct folio *folio = fi.folio;
- if (page->mapping == cache_mapping) {
+ if (folio->mapping == cache_mapping) {
idx++;
continue;
}
@@ -111,13 +110,13 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
* adjacent blocks.
*/
if (idx == 0 && index != read_start)
- head_to_cache = page;
+ head_to_cache = folio;
else if (idx == page_count - 1 && index + length != read_end)
- tail_to_cache = page;
+ tail_to_cache = folio;
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
/* Cache all pages in the BIO for repeated reads */
- else if (cache_pages)
- cache_pages[idx] = page;
+ else if (cache_folios)
+ cache_folios[idx] = folio;
#endif
if (!bio || idx != end_idx) {
@@ -150,45 +149,45 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
return err;
if (head_to_cache) {
- int ret = add_to_page_cache_lru(head_to_cache, cache_mapping,
+ int ret = filemap_add_folio(cache_mapping, head_to_cache,
read_start >> PAGE_SHIFT,
GFP_NOIO);
if (!ret) {
- SetPageUptodate(head_to_cache);
- unlock_page(head_to_cache);
+ folio_mark_uptodate(head_to_cache);
+ folio_unlock(head_to_cache);
}
}
if (tail_to_cache) {
- int ret = add_to_page_cache_lru(tail_to_cache, cache_mapping,
+ int ret = filemap_add_folio(cache_mapping, tail_to_cache,
(read_end >> PAGE_SHIFT) - 1,
GFP_NOIO);
if (!ret) {
- SetPageUptodate(tail_to_cache);
- unlock_page(tail_to_cache);
+ folio_mark_uptodate(tail_to_cache);
+ folio_unlock(tail_to_cache);
}
}
#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
- if (!cache_pages)
+ if (!cache_folios)
goto out;
for (idx = 0; idx < page_count; idx++) {
- if (!cache_pages[idx])
+ if (!cache_folios[idx])
continue;
- int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping,
+ int ret = filemap_add_folio(cache_mapping, cache_folios[idx],
(read_start >> PAGE_SHIFT) + idx,
GFP_NOIO);
if (!ret) {
- SetPageUptodate(cache_pages[idx]);
- unlock_page(cache_pages[idx]);
+ folio_mark_uptodate(cache_folios[idx]);
+ folio_unlock(cache_folios[idx]);
}
}
- kfree(cache_pages);
+ kfree(cache_folios);
out:
#endif
return 0;
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 5ca2baa16dc2..ce7d661d5ad8 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -493,10 +493,9 @@ out:
return res;
}
-static int squashfs_readahead_fragment(struct page **page,
+static int squashfs_readahead_fragment(struct inode *inode, struct page **page,
unsigned int pages, unsigned int expected, loff_t start)
{
- struct inode *inode = page[0]->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
@@ -605,8 +604,8 @@ static void squashfs_readahead(struct readahead_control *ractl)
if (start >> msblk->block_log == file_end &&
squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) {
- res = squashfs_readahead_fragment(pages, nr_pages,
- expected, start);
+ res = squashfs_readahead_fragment(inode, pages,
+ nr_pages, expected, start);
if (res)
goto skip_pages;
continue;