summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs22
-rw-r--r--Documentation/admin-guide/device-mapper/thin-provisioning.rst16
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt38
-rw-r--r--Documentation/devicetree/bindings/i2c/apple,i2c.yaml5
-rw-r--r--Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml11
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml7
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml49
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml (renamed from Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml)2
-rw-r--r--Documentation/devicetree/bindings/rtc/trivial-rtc.yaml2
-rw-r--r--Documentation/filesystems/f2fs.rst6
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/openrisc/include/asm/mmu.h2
-rw-r--r--arch/openrisc/include/asm/page.h8
-rw-r--r--arch/openrisc/include/asm/pgtable.h4
-rw-r--r--arch/openrisc/include/asm/processor.h4
-rw-r--r--arch/openrisc/include/asm/ptrace.h4
-rw-r--r--arch/openrisc/include/asm/setup.h2
-rw-r--r--arch/openrisc/include/asm/thread_info.h8
-rw-r--r--arch/openrisc/include/uapi/asm/ptrace.h2
-rw-r--r--drivers/i2c/busses/Kconfig1
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c6
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c32
-rw-r--r--drivers/i2c/busses/i2c-tegra.c64
-rw-r--r--drivers/i2c/i2c-core-acpi.c1
-rw-r--r--drivers/i2c/muxes/i2c-mux-mule.c3
-rw-r--r--drivers/md/dm-flakey.c9
-rw-r--r--drivers/md/dm-ima.c42
-rw-r--r--drivers/md/dm-path-selector.c8
-rw-r--r--drivers/md/dm-path-selector.h2
-rw-r--r--drivers/md/dm-ps-historical-service-time.c9
-rw-r--r--drivers/md/dm-ps-io-affinity.c5
-rw-r--r--drivers/md/dm-ps-queue-length.c9
-rw-r--r--drivers/md/dm-ps-round-robin.c9
-rw-r--r--drivers/md/dm-ps-service-time.c9
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-table.c10
-rw-r--r--drivers/md/dm-thin.c7
-rw-r--r--drivers/md/dm-vdo/funnel-workqueue.c3
-rw-r--r--drivers/md/dm-verity-fec.c4
-rw-r--r--drivers/md/dm-verity-target.c185
-rw-r--r--drivers/md/dm-verity.h22
-rw-r--r--drivers/md/dm-zone.c2
-rw-r--r--drivers/md/dm-zoned-target.c2
-rw-r--r--drivers/md/dm.c11
-rw-r--r--drivers/rtc/Kconfig21
-rw-r--r--drivers/rtc/Makefile2
-rw-r--r--drivers/rtc/lib.c40
-rw-r--r--drivers/rtc/rtc-ds1307.c30
-rw-r--r--drivers/rtc/rtc-ds1685.c4
-rw-r--r--drivers/rtc/rtc-hym8563.c15
-rw-r--r--drivers/rtc/rtc-m41t80.c25
-rw-r--r--drivers/rtc/rtc-max31335.c12
-rw-r--r--drivers/rtc/rtc-nct3018y.c15
-rw-r--r--drivers/rtc/rtc-pcf85063.c267
-rw-r--r--drivers/rtc/rtc-pcf8563.c15
-rw-r--r--drivers/rtc/rtc-rv3028.c15
-rw-r--r--drivers/rtc/rtc-rv3032.c21
-rw-r--r--drivers/rtc/rtc-s3c.c8
-rw-r--r--drivers/rtc/rtc-sh.c8
-rw-r--r--drivers/rtc/sysfs.c64
-rw-r--r--drivers/rtc/test_rtc_lib.c (renamed from drivers/rtc/lib_test.c)0
-rw-r--r--fs/f2fs/checkpoint.c8
-rw-r--r--fs/f2fs/compress.c120
-rw-r--r--fs/f2fs/data.c183
-rw-r--r--fs/f2fs/debug.c21
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/extent_cache.c10
-rw-r--r--fs/f2fs/f2fs.h151
-rw-r--r--fs/f2fs/file.c107
-rw-r--r--fs/f2fs/gc.c54
-rw-r--r--fs/f2fs/gc.h5
-rw-r--r--fs/f2fs/inline.c20
-rw-r--r--fs/f2fs/inode.c84
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/node.c261
-rw-r--r--fs/f2fs/node.h77
-rw-r--r--fs/f2fs/recovery.c116
-rw-r--r--fs/f2fs/segment.c62
-rw-r--r--fs/f2fs/segment.h59
-rw-r--r--fs/f2fs/super.c2111
-rw-r--r--fs/f2fs/sysfs.c48
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/f2fs_fs.h2
-rw-r--r--include/linux/fscrypt.h10
-rw-r--r--include/linux/rtc/ds1685.h2
-rw-r--r--include/linux/sprintf.h2
-rw-r--r--kernel/printk/internal.h2
-rw-r--r--kernel/printk/nbcon.c89
-rw-r--r--kernel/printk/printk.c20
-rw-r--r--kernel/sched/psi.c6
-rw-r--r--lib/vsprintf.c70
-rw-r--r--mm/slub.c5
-rw-r--r--security/apparmor/Makefile6
-rw-r--r--security/apparmor/af_unix.c799
-rw-r--r--security/apparmor/apparmorfs.c39
-rw-r--r--security/apparmor/audit.c2
-rw-r--r--security/apparmor/capability.c61
-rw-r--r--security/apparmor/domain.c203
-rw-r--r--security/apparmor/file.c92
-rw-r--r--security/apparmor/include/af_unix.h55
-rw-r--r--security/apparmor/include/apparmor.h4
-rw-r--r--security/apparmor/include/audit.h5
-rw-r--r--security/apparmor/include/capability.h1
-rw-r--r--security/apparmor/include/cred.h31
-rw-r--r--security/apparmor/include/file.h11
-rw-r--r--security/apparmor/include/ipc.h3
-rw-r--r--security/apparmor/include/label.h51
-rw-r--r--security/apparmor/include/lib.h46
-rw-r--r--security/apparmor/include/match.h10
-rw-r--r--security/apparmor/include/net.h38
-rw-r--r--security/apparmor/include/path.h1
-rw-r--r--security/apparmor/include/perms.h8
-rw-r--r--security/apparmor/include/policy.h59
-rw-r--r--security/apparmor/include/sig_names.h6
-rw-r--r--security/apparmor/include/signal.h19
-rw-r--r--security/apparmor/ipc.c13
-rw-r--r--security/apparmor/label.c37
-rw-r--r--security/apparmor/lib.c114
-rw-r--r--security/apparmor/lsm.c468
-rw-r--r--security/apparmor/match.c23
-rw-r--r--security/apparmor/mount.c12
-rw-r--r--security/apparmor/net.c189
-rw-r--r--security/apparmor/policy.c93
-rw-r--r--security/apparmor/policy_compat.c6
-rw-r--r--security/apparmor/policy_ns.c2
-rw-r--r--security/apparmor/policy_unpack.c67
-rw-r--r--security/apparmor/policy_unpack_test.c6
-rw-r--r--security/apparmor/procattr.c6
-rw-r--r--security/apparmor/resource.c11
-rw-r--r--security/apparmor/task.c11
131 files changed, 5035 insertions, 2547 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index bf03263b9f46..bc0e7fefc39d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -861,3 +861,25 @@ Description: This is a read-only entry to show the value of sb.s_encoding_flags,
SB_ENC_STRICT_MODE_FL 0x00000001
SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002
============================ ==========
+
+What: /sys/fs/f2fs/<disk>/reserved_pin_section
+Date: June 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: This threshold is used to control triggering garbage collection while
+ fallocating on pinned file, so, it can guarantee there is enough free
+ reserved section before preallocating on pinned file.
+ By default, the value is ovp_sections, especially, for zoned ufs, the
+ value is 1.
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Set a multiplier for the background GC migration window when F2FS GC is
+ boosted. The range should be from 1 to the segment count in a section.
+ Default: 5
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
+ Default: 1
diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
index bafebf79da4b..b2fa49a5608a 100644
--- a/Documentation/admin-guide/device-mapper/thin-provisioning.rst
+++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
@@ -80,11 +80,11 @@ less sharing than average you'll need a larger-than-average metadata device.
As a guide, we suggest you calculate the number of bytes to use in the
metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller. If you're creating large numbers of
+to 2MiB if the answer is smaller. If you're creating large numbers of
snapshots which are recording large amounts of change, you may find you
need to increase this.
-The largest size supported is 16GB: If the device is larger,
+The largest size supported is 16GiB: If the device is larger,
a warning will be issued and the excess space will not be used.
Reloading a pool table
@@ -107,13 +107,13 @@ Using an existing pool device
$data_block_size gives the smallest unit of disk space that can be
allocated at a time expressed in units of 512-byte sectors.
-$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
-multiple of 128 (64KB). $data_block_size cannot be changed after the
+$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a
+multiple of 128 (64KiB). $data_block_size cannot be changed after the
thin-pool is created. People primarily interested in thin provisioning
-may want to use a value such as 1024 (512KB). People doing lots of
-snapshotting may want a smaller value such as 128 (64KB). If you are
+may want to use a value such as 1024 (512KiB). People doing lots of
+snapshotting may want a smaller value such as 128 (64KiB). If you are
not zeroing newly-allocated data, a larger $data_block_size in the
-region of 256000 (128MB) is suggested.
+region of 262144 (128MiB) is suggested.
$low_water_mark is expressed in blocks of size $data_block_size. If
free space on the data device drops below this level then a dm event
@@ -291,7 +291,7 @@ i) Constructor
error_if_no_space:
Error IOs, instead of queueing, if no space.
- Data block size must be between 64KB (128 sectors) and 1GB
+ Data block size must be between 64KiB (128 sectors) and 1GiB
(2097152 sectors) inclusive.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c1a5cd991f2e..747a55abf494 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1828,6 +1828,27 @@
backtraces on all cpus.
Format: 0 | 1
+ hash_pointers=
+ [KNL,EARLY]
+ By default, when pointers are printed to the console
+ or buffers via the %p format string, that pointer is
+ "hashed", i.e. obscured by hashing the pointer value.
+ This is a security feature that hides actual kernel
+ addresses from unprivileged users, but it also makes
+ debugging the kernel more difficult since unequal
+ pointers can no longer be compared. The choices are:
+ Format: { auto | always | never }
+ Default: auto
+
+ auto - Hash pointers unless slab_debug is enabled.
+ always - Always hash pointers (even if slab_debug is
+ enabled).
+ never - Never hash pointers. This option should only
+ be specified when debugging the kernel. Do
+ not use on production kernels. The boot
+ param "no_hash_pointers" is an alias for
+ this mode.
+
hashdist= [KNL,NUMA] Large hashes allocated during boot
are distributed across NUMA nodes. Defaults on
for 64-bit NUMA, off otherwise.
@@ -4216,18 +4237,7 @@
no_hash_pointers
[KNL,EARLY]
- Force pointers printed to the console or buffers to be
- unhashed. By default, when a pointer is printed via %p
- format string, that pointer is "hashed", i.e. obscured
- by hashing the pointer value. This is a security feature
- that hides actual kernel addresses from unprivileged
- users, but it also makes debugging the kernel more
- difficult since unequal pointers can no longer be
- compared. However, if this command-line option is
- specified, then all normal pointers will have their true
- value printed. This option should only be specified when
- debugging the kernel. Please do not use on production
- kernels.
+ Alias for "hash_pointers=never".
nohibernate [HIBERNATION] Disable hibernation and resume.
@@ -6644,6 +6654,10 @@
Documentation/admin-guide/mm/slab.rst.
(slub_debug legacy name also accepted for now)
+ Using this option implies the "no_hash_pointers"
+ option which can be undone by adding the
+ "hash_pointers=always" option.
+
slab_max_order= [MM]
Determines the maximum allowed order for slabs.
A high setting may cause OOMs due to memory
diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml
index 077d2a539c83..fed3e1b8c43f 100644
--- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml
@@ -22,6 +22,11 @@ properties:
compatible:
items:
- enum:
+ - apple,s5l8960x-i2c
+ - apple,t7000-i2c
+ - apple,s8000-i2c
+ - apple,t8010-i2c
+ - apple,t8015-i2c
- apple,t8103-i2c
- apple,t8112-i2c
- apple,t6000-i2c
diff --git a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml
index 5d3ac737abcb..e61f22eca85b 100644
--- a/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml
@@ -16,9 +16,14 @@ allOf:
properties:
compatible:
- enum:
- - amlogic,a4-rtc
- - amlogic,a5-rtc
+ oneOf:
+ - enum:
+ - amlogic,a4-rtc
+ - amlogic,a5-rtc
+ - items:
+ - enum:
+ - amlogic,c3-rtc
+ - const: amlogic,a5-rtc
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml
index e88b847a1cc5..e896ba59302a 100644
--- a/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml
@@ -18,7 +18,12 @@ allOf:
properties:
compatible:
- const: nxp,lpc1788-rtc
+ oneOf:
+ - items:
+ - enum:
+ - nxp,lpc1850-rtc
+ - const: nxp,lpc1788-rtc
+ - const: nxp,lpc1788-rtc
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml
new file mode 100644
index 000000000000..53353de4cb37
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/nxp,lpc3220-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP LPC32xx SoC Real-time Clock
+
+maintainers:
+ - Frank Li <Frank.Li@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - nxp,lpc3220-rtc
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ start-year: true
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: rtc.yaml#
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/lpc32xx-clock.h>
+
+ rtc@40024000 {
+ compatible = "nxp,lpc3220-rtc";
+ reg = <0x40024000 0x1000>;
+ interrupt-parent = <&sic1>;
+ interrupts = <20 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LPC32XX_CLK_RTC>;
+ };
+
diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml
index 2f892f8640d1..1e6277e524c2 100644
--- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml
+++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml
@@ -12,6 +12,7 @@ maintainers:
properties:
compatible:
enum:
+ - microcrystal,rv8063
- microcrystal,rv8263
- nxp,pcf85063
- nxp,pcf85063a
@@ -44,13 +45,19 @@ properties:
wakeup-source: true
+ spi-cs-high: true
+
+ spi-3wire: true
+
allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
- $ref: rtc.yaml#
- if:
properties:
compatible:
contains:
enum:
+ - microcrystal,rv8063
- microcrystal,rv8263
then:
properties:
@@ -65,12 +72,23 @@ allOf:
properties:
quartz-load-femtofarads:
const: 7000
+ - if:
+ properties:
+ compatible:
+ not:
+ contains:
+ enum:
+ - microcrystal,rv8063
+ then:
+ properties:
+ spi-cs-high: false
+ spi-3wire: false
required:
- compatible
- reg
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
@@ -90,3 +108,16 @@ examples:
};
};
};
+
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtc@0 {
+ compatible = "microcrystal,rv8063";
+ reg = <0>;
+ spi-cs-high;
+ spi-3wire;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml
index 5cf186c396c9..c695d2ff9fcc 100644
--- a/Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas/sophgo/sophgo,cv1800b-rtc.yaml#
+$id: http://devicetree.org/schemas/rtc/sophgo,cv1800b-rtc.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Real Time Clock of the Sophgo CV1800 SoC
diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
index 7330a7200831..5e0c7cd25cc6 100644
--- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
@@ -63,8 +63,6 @@ properties:
- microcrystal,rv3029
# Real Time Clock
- microcrystal,rv8523
- # NXP LPC32xx SoC Real-time Clock
- - nxp,lpc3220-rtc
# I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
- ricoh,r2025sd
# I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 8eeb7ea14f61..e5bb89452aff 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -238,9 +238,9 @@ usrjquota=<file> Appoint specified file and type during mount, so that quota
grpjquota=<file> information can be properly updated during recovery flow,
prjjquota=<file> <quota file>: must be in root directory;
jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1].
-offusrjquota Turn off user journalled quota.
-offgrpjquota Turn off group journalled quota.
-offprjjquota Turn off project journalled quota.
+usrjquota= Turn off user journalled quota.
+grpjquota= Turn off group journalled quota.
+prjjquota= Turn off project journalled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
alloc_mode=%s Adjust block allocation policy, which supports "reuse"
diff --git a/MAINTAINERS b/MAINTAINERS
index edcfac45ccdf..1b57dd4fcf01 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6713,7 +6713,7 @@ S: Supported
F: drivers/input/keyboard/dlink-dir685-touchkeys.c
DALLAS/MAXIM DS1685-FAMILY REAL TIME CLOCK
-M: Joshua Kinard <kumba@gentoo.org>
+M: Joshua Kinard <linux@kumba.dev>
S: Maintained
F: drivers/rtc/rtc-ds1685.c
F: include/linux/rtc/ds1685.h
diff --git a/arch/openrisc/include/asm/mmu.h b/arch/openrisc/include/asm/mmu.h
index eb720110f3a2..e7826a681bc4 100644
--- a/arch/openrisc/include/asm/mmu.h
+++ b/arch/openrisc/include/asm/mmu.h
@@ -15,7 +15,7 @@
#ifndef __ASM_OPENRISC_MMU_H
#define __ASM_OPENRISC_MMU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef unsigned long mm_context_t;
#endif
diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index c589e96035e1..85797f94d1d7 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h
@@ -25,7 +25,7 @@
*/
#include <asm/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define clear_page(page) memset((page), 0, PAGE_SIZE)
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
@@ -55,10 +55,10 @@ typedef struct page *pgtable_t;
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
@@ -73,7 +73,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define virt_addr_valid(kaddr) (pfn_valid(virt_to_pfn(kaddr)))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 5bd6463bd514..d33702831505 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -23,7 +23,7 @@
#include <asm-generic/pgtable-nopmd.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/mmu.h>
#include <asm/fixmap.h>
@@ -430,5 +430,5 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
typedef pte_t *pte_addr_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_OPENRISC_PGTABLE_H */
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index e05d1b59e24e..3ff893a67c13 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -39,7 +39,7 @@
*/
#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
@@ -78,5 +78,5 @@ void show_registers(struct pt_regs *regs);
#define cpu_relax() barrier()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_OPENRISC_PROCESSOR_H */
diff --git a/arch/openrisc/include/asm/ptrace.h b/arch/openrisc/include/asm/ptrace.h
index e5a282b67075..28facf2f3e00 100644
--- a/arch/openrisc/include/asm/ptrace.h
+++ b/arch/openrisc/include/asm/ptrace.h
@@ -27,7 +27,7 @@
* they share a cacheline (not done yet, though... future optimization).
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This struct describes how the registers are laid out on the kernel stack
* during a syscall or other kernel entry.
@@ -147,7 +147,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
return *(unsigned long *)((unsigned long)regs + offset);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Offsets used by 'ptrace' system call interface.
diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h
index 9acbc5deda69..dce9f4d3b378 100644
--- a/arch/openrisc/include/asm/setup.h
+++ b/arch/openrisc/include/asm/setup.h
@@ -8,7 +8,7 @@
#include <linux/init.h>
#include <asm-generic/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
void __init or1k_early_setup(void *fdt);
#endif
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index 4af3049c34c2..e338fff7efb0 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -17,7 +17,7 @@
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/types.h>
#include <asm/processor.h>
#endif
@@ -38,7 +38,7 @@
* - if the contents of this structure are changed, the assembly constants
* must also be changed
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct thread_info {
struct task_struct *task; /* main task structure */
@@ -58,7 +58,7 @@ struct thread_info {
*
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define INIT_THREAD_INFO(tsk) \
{ \
.task = &tsk, \
@@ -75,7 +75,7 @@ register struct thread_info *current_thread_info_reg asm("r10");
#define get_thread_info(ti) get_task_struct((ti)->task)
#define put_thread_info(ti) put_task_struct((ti)->task)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* thread information flags
diff --git a/arch/openrisc/include/uapi/asm/ptrace.h b/arch/openrisc/include/uapi/asm/ptrace.h
index a77cc9915ca8..1f12a60d5a06 100644
--- a/arch/openrisc/include/uapi/asm/ptrace.h
+++ b/arch/openrisc/include/uapi/asm/ptrace.h
@@ -20,7 +20,7 @@
#ifndef _UAPI__ASM_OPENRISC_PTRACE_H
#define _UAPI__ASM_OPENRISC_PTRACE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This is the layout of the regset returned by the GETREGSET ptrace call
*/
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c8d115b58e44..070d014fdc5d 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -992,7 +992,6 @@ config I2C_APPLE
tristate "Apple SMBus platform driver"
depends on !I2C_PASEMI
depends on ARCH_APPLE || COMPILE_TEST
- default ARCH_APPLE
help
Say Y here if you want to use the I2C controller present on Apple
Silicon chips such as the M1.
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 13889f52b6f7..ff2289b52c84 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -155,9 +155,9 @@ static const struct geni_i2c_clk_fld geni_i2c_clk_map_19p2mhz[] = {
/* source_clock = 32 MHz */
static const struct geni_i2c_clk_fld geni_i2c_clk_map_32mhz[] = {
- { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 40 },
- { I2C_MAX_FAST_MODE_FREQ, 4, 3, 11, 20 },
- { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 6, 15 },
+ { I2C_MAX_STANDARD_MODE_FREQ, 8, 14, 18, 38 },
+ { I2C_MAX_FAST_MODE_FREQ, 4, 3, 9, 19 },
+ { I2C_MAX_FAST_MODE_PLUS_FREQ, 2, 3, 5, 15 },
{}
};
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index c8b4b404f6c1..e6815f6cae78 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -742,11 +742,14 @@ static void stm32f7_i2c_dma_callback(void *arg)
{
struct stm32f7_i2c_dev *i2c_dev = arg;
struct stm32_i2c_dma *dma = i2c_dev->dma;
+ struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
stm32f7_i2c_disable_dma_req(i2c_dev);
dmaengine_terminate_async(dma->chan_using);
dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len,
dma->dma_data_dir);
+ if (!f7_msg->smbus)
+ i2c_put_dma_safe_msg_buf(f7_msg->buf, i2c_dev->msg, true);
complete(&dma->dma_complete);
}
@@ -882,6 +885,7 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev,
{
struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
void __iomem *base = i2c_dev->base;
+ u8 *dma_buf;
u32 cr1, cr2;
int ret;
@@ -931,17 +935,23 @@ static void stm32f7_i2c_xfer_msg(struct stm32f7_i2c_dev *i2c_dev,
/* Configure DMA or enable RX/TX interrupt */
i2c_dev->use_dma = false;
- if (i2c_dev->dma && f7_msg->count >= STM32F7_I2C_DMA_LEN_MIN
- && !i2c_dev->atomic) {
- ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma,
- msg->flags & I2C_M_RD,
- f7_msg->count, f7_msg->buf,
- stm32f7_i2c_dma_callback,
- i2c_dev);
- if (!ret)
- i2c_dev->use_dma = true;
- else
- dev_warn(i2c_dev->dev, "can't use DMA\n");
+ if (i2c_dev->dma && !i2c_dev->atomic) {
+ dma_buf = i2c_get_dma_safe_msg_buf(msg, STM32F7_I2C_DMA_LEN_MIN);
+ if (dma_buf) {
+ f7_msg->buf = dma_buf;
+ ret = stm32_i2c_prep_dma_xfer(i2c_dev->dev, i2c_dev->dma,
+ msg->flags & I2C_M_RD,
+ f7_msg->count, f7_msg->buf,
+ stm32f7_i2c_dma_callback,
+ i2c_dev);
+ if (ret) {
+ dev_warn(i2c_dev->dev, "can't use DMA\n");
+ i2c_put_dma_safe_msg_buf(f7_msg->buf, msg, false);
+ f7_msg->buf = msg->buf;
+ } else {
+ i2c_dev->use_dma = true;
+ }
+ }
}
if (!i2c_dev->use_dma) {
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 4f05afab161f..4eb31b913c1a 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -134,6 +134,8 @@
#define I2C_MST_FIFO_STATUS_TX GENMASK(23, 16)
#define I2C_MST_FIFO_STATUS_RX GENMASK(7, 0)
+#define I2C_MASTER_RESET_CNTRL 0x0a8
+
/* configuration load timeout in microseconds */
#define I2C_CONFIG_LOAD_TIMEOUT 1000000
@@ -184,6 +186,9 @@ enum msg_end_type {
* @has_mst_fifo: The I2C controller contains the new MST FIFO interface that
* provides additional features and allows for longer messages to
* be transferred in one go.
+ * @has_mst_reset: The I2C controller contains MASTER_RESET_CTRL register which
+ * provides an alternative to controller reset when configured as
+ * I2C master
* @quirks: I2C adapter quirks for limiting write/read transfer size and not
* allowing 0 length transfers.
* @supports_bus_clear: Bus Clear support to recover from bus hang during
@@ -213,6 +218,7 @@ struct tegra_i2c_hw_feature {
bool has_multi_master_mode;
bool has_slcg_override_reg;
bool has_mst_fifo;
+ bool has_mst_reset;
const struct i2c_adapter_quirks *quirks;
bool supports_bus_clear;
bool has_apb_dma;
@@ -605,6 +611,26 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev)
return 0;
}
+static int tegra_i2c_master_reset(struct tegra_i2c_dev *i2c_dev)
+{
+ if (!i2c_dev->hw->has_mst_reset)
+ return -EOPNOTSUPP;
+
+ /*
+ * Writing 1 to I2C_MASTER_RESET_CNTRL will reset all internal state of
+ * Master logic including FIFOs. Clear this bit to 0 for normal operation.
+ * SW needs to wait for 2us after assertion and de-assertion of this soft
+ * reset.
+ */
+ i2c_writel(i2c_dev, 0x1, I2C_MASTER_RESET_CNTRL);
+ fsleep(2);
+
+ i2c_writel(i2c_dev, 0x0, I2C_MASTER_RESET_CNTRL);
+ fsleep(2);
+
+ return 0;
+}
+
static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
{
u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode;
@@ -612,6 +638,16 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
int err;
/*
+ * Reset the controller before initializing it.
+ * In case if device_reset() returns -ENOENT, i.e. when the reset is
+ * not available, the internal software reset will be used if it is
+ * supported by the controller.
+ */
+ err = device_reset(i2c_dev->dev);
+ if (err == -ENOENT)
+ err = tegra_i2c_master_reset(i2c_dev);
+
+ /*
* The reset shouldn't ever fail in practice. The failure will be a
* sign of a severe problem that needs to be resolved. Still we don't
* want to fail the initialization completely because this may break
@@ -619,7 +655,6 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
* emit a noisy warning on error, which won't stay unnoticed and
* won't hose machine entirely.
*/
- err = device_reset(i2c_dev->dev);
WARN_ON_ONCE(err);
if (IS_DVC(i2c_dev))
@@ -1266,17 +1301,9 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
if (i2c_dev->dma_mode) {
if (i2c_dev->msg_read) {
- dma_sync_single_for_device(i2c_dev->dma_dev,
- i2c_dev->dma_phys,
- xfer_size, DMA_FROM_DEVICE);
-
err = tegra_i2c_dma_submit(i2c_dev, xfer_size);
if (err)
return err;
- } else {
- dma_sync_single_for_cpu(i2c_dev->dma_dev,
- i2c_dev->dma_phys,
- xfer_size, DMA_TO_DEVICE);
}
}
@@ -1286,11 +1313,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
if (i2c_dev->dma_mode) {
memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE,
msg->buf, i2c_dev->msg_len);
-
- dma_sync_single_for_device(i2c_dev->dma_dev,
- i2c_dev->dma_phys,
- xfer_size, DMA_TO_DEVICE);
-
err = tegra_i2c_dma_submit(i2c_dev, xfer_size);
if (err)
return err;
@@ -1331,13 +1353,8 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
return -ETIMEDOUT;
}
- if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) {
- dma_sync_single_for_cpu(i2c_dev->dma_dev,
- i2c_dev->dma_phys,
- xfer_size, DMA_FROM_DEVICE);
-
+ if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE)
memcpy(i2c_dev->msg_buf, i2c_dev->dma_buf, i2c_dev->msg_len);
- }
}
time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete,
@@ -1468,6 +1485,7 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = false,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = false,
.has_apb_dma = true,
@@ -1492,6 +1510,7 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = false,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = false,
.has_apb_dma = true,
@@ -1516,6 +1535,7 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = false,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = true,
.has_apb_dma = true,
@@ -1540,6 +1560,7 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = true,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = true,
.has_apb_dma = true,
@@ -1564,6 +1585,7 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = true,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = true,
.has_apb_dma = true,
@@ -1588,6 +1610,7 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = {
.has_multi_master_mode = false,
.has_slcg_override_reg = true,
.has_mst_fifo = false,
+ .has_mst_reset = false,
.quirks = &tegra_i2c_quirks,
.supports_bus_clear = true,
.has_apb_dma = false,
@@ -1612,6 +1635,7 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = {
.has_multi_master_mode = true,
.has_slcg_override_reg = true,
.has_mst_fifo = true,
+ .has_mst_reset = true,
.quirks = &tegra194_i2c_quirks,
.supports_bus_clear = true,
.has_apb_dma = false,
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 3445cc3b476b..ed90858a27b7 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -370,6 +370,7 @@ static const struct acpi_device_id i2c_acpi_force_100khz_device_ids[] = {
* the device works without issues on Windows at what is expected to be
* a 400KHz frequency. The root cause of the issue is not known.
*/
+ { "DLL0945", 0 },
{ "ELAN06FA", 0 },
{}
};
diff --git a/drivers/i2c/muxes/i2c-mux-mule.c b/drivers/i2c/muxes/i2c-mux-mule.c
index 284ff4afeeac..d3b32b794172 100644
--- a/drivers/i2c/muxes/i2c-mux-mule.c
+++ b/drivers/i2c/muxes/i2c-mux-mule.c
@@ -47,7 +47,6 @@ static int mule_i2c_mux_probe(struct platform_device *pdev)
struct mule_i2c_reg_mux *priv;
struct i2c_client *client;
struct i2c_mux_core *muxc;
- struct device_node *dev;
unsigned int readback;
int ndev, ret;
bool old_fw;
@@ -95,7 +94,7 @@ static int mule_i2c_mux_probe(struct platform_device *pdev)
"Failed to register mux remove\n");
/* Create device adapters */
- for_each_child_of_node(mux_dev->of_node, dev) {
+ for_each_child_of_node_scoped(mux_dev->of_node, dev) {
u32 reg;
ret = of_property_read_u32(dev, "reg", &reg);
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c711db6f8f5c..cf17fd46e255 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -215,16 +215,19 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
}
if (test_bit(DROP_WRITES, &fc->flags) &&
- (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) ||
+ fc->random_write_corrupt)) {
ti->error = "drop_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
} else if (test_bit(ERROR_WRITES, &fc->flags) &&
- (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) ||
+ fc->random_write_corrupt)) {
ti->error = "error_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
} else if (test_bit(ERROR_READS, &fc->flags) &&
- (fc->corrupt_bio_rw == READ || fc->random_read_corrupt)) {
+ ((fc->corrupt_bio_byte && fc->corrupt_bio_rw == READ) ||
+ fc->random_read_corrupt)) {
ti->error = "error_reads is incompatible with random_read_corrupt or corrupt_bio_byte with the READ flag set";
return -EINVAL;
}
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index b90f34259fbb..8b50c908c6f4 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -241,10 +241,11 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl
/*
* First retrieve the target metadata.
*/
- scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN,
- "target_index=%d,target_begin=%llu,target_len=%llu,",
- i, ti->begin, ti->len);
- target_metadata_buf_len = strlen(target_metadata_buf);
+ target_metadata_buf_len =
+ scnprintf(target_metadata_buf,
+ DM_IMA_TARGET_METADATA_BUF_LEN,
+ "target_index=%d,target_begin=%llu,target_len=%llu,",
+ i, ti->begin, ti->len);
/*
* Then retrieve the actual target data.
@@ -448,11 +449,9 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap)
if (r)
goto error;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;device_resume=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
-
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_resume=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
capacity_len = strlen(capacity_str);
@@ -561,10 +560,9 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all)
if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
goto error;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;device_remove=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;device_remove=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
memcpy(device_table_data + l, remove_all_str, remove_all_len);
@@ -647,10 +645,9 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map)
if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio))
goto error2;
- scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
- "%sname=%s,uuid=%s;table_clear=no_data;",
- DM_IMA_VERSION_STR, dev_name, dev_uuid);
- l = strlen(device_table_data);
+ l = scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN,
+ "%sname=%s,uuid=%s;table_clear=no_data;",
+ DM_IMA_VERSION_STR, dev_name, dev_uuid);
}
capacity_len = strlen(capacity_str);
@@ -706,7 +703,7 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL;
char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL;
bool noio = true;
- int r;
+ int r, len;
if (dm_ima_alloc_and_copy_device_data(md, &new_device_data,
md->ima.active_table.num_targets, noio))
@@ -728,12 +725,11 @@ void dm_ima_measure_on_device_rename(struct mapped_device *md)
md->ima.active_table.device_metadata = new_device_data;
md->ima.active_table.device_metadata_len = strlen(new_device_data);
- scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2,
- "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data,
- new_dev_name, new_dev_uuid, capacity_str);
+ len = scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2,
+ "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data,
+ new_dev_name, new_dev_uuid, capacity_str);
- dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data),
- noio);
+ dm_ima_measure_data("dm_device_rename", combined_device_data, len, noio);
goto exit;
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 3e4cb81ce512..d0b883fabfeb 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -117,16 +117,16 @@ int dm_register_path_selector(struct path_selector_type *pst)
}
EXPORT_SYMBOL_GPL(dm_register_path_selector);
-int dm_unregister_path_selector(struct path_selector_type *pst)
+void dm_unregister_path_selector(struct path_selector_type *pst)
{
struct ps_internal *psi;
down_write(&_ps_lock);
psi = __find_path_selector_type(pst->name);
- if (!psi) {
+ if (WARN_ON(!psi)) {
up_write(&_ps_lock);
- return -EINVAL;
+ return;
}
list_del(&psi->list);
@@ -134,7 +134,5 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
up_write(&_ps_lock);
kfree(psi);
-
- return 0;
}
EXPORT_SYMBOL_GPL(dm_unregister_path_selector);
diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h
index 3861b2d8b963..7b2270532e64 100644
--- a/drivers/md/dm-path-selector.h
+++ b/drivers/md/dm-path-selector.h
@@ -96,7 +96,7 @@ struct path_selector_type {
int dm_register_path_selector(struct path_selector_type *type);
/* Unregister a path selector */
-int dm_unregister_path_selector(struct path_selector_type *type);
+void dm_unregister_path_selector(struct path_selector_type *type);
/* Returns a registered path selector type */
struct path_selector_type *dm_get_path_selector(const char *name);
diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index b49e10d76d03..f07e773d9cc0 100644
--- a/drivers/md/dm-ps-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
@@ -541,8 +541,10 @@ static int __init dm_hst_init(void)
{
int r = dm_register_path_selector(&hst_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " HST_VERSION " loaded");
@@ -551,10 +553,7 @@ static int __init dm_hst_init(void)
static void __exit dm_hst_exit(void)
{
- int r = dm_unregister_path_selector(&hst_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&hst_ps);
}
module_init(dm_hst_init);
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index 716807e511ee..80415a045c68 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -260,10 +260,7 @@ static int __init dm_ioa_init(void)
static void __exit dm_ioa_exit(void)
{
- int ret = dm_unregister_path_selector(&ioa_ps);
-
- if (ret < 0)
- DMERR("unregister failed %d", ret);
+ dm_unregister_path_selector(&ioa_ps);
}
module_init(dm_ioa_init);
diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c
index e305f05ad1e5..9c68701ed7a4 100644
--- a/drivers/md/dm-ps-queue-length.c
+++ b/drivers/md/dm-ps-queue-length.c
@@ -260,8 +260,10 @@ static int __init dm_ql_init(void)
{
int r = dm_register_path_selector(&ql_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " QL_VERSION " loaded");
@@ -270,10 +272,7 @@ static int __init dm_ql_init(void)
static void __exit dm_ql_exit(void)
{
- int r = dm_unregister_path_selector(&ql_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&ql_ps);
}
module_init(dm_ql_init);
diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c
index d1745b123dc1..0c12f4073461 100644
--- a/drivers/md/dm-ps-round-robin.c
+++ b/drivers/md/dm-ps-round-robin.c
@@ -220,8 +220,10 @@ static int __init dm_rr_init(void)
{
int r = dm_register_path_selector(&rr_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " RR_VERSION " loaded");
@@ -230,10 +232,7 @@ static int __init dm_rr_init(void)
static void __exit dm_rr_exit(void)
{
- int r = dm_unregister_path_selector(&rr_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&rr_ps);
}
module_init(dm_rr_init);
diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c
index 969d31c40272..0543fe7969c4 100644
--- a/drivers/md/dm-ps-service-time.c
+++ b/drivers/md/dm-ps-service-time.c
@@ -341,8 +341,10 @@ static int __init dm_st_init(void)
{
int r = dm_register_path_selector(&st_ps);
- if (r < 0)
+ if (r < 0) {
DMERR("register failed %d", r);
+ return r;
+ }
DMINFO("version " ST_VERSION " loaded");
@@ -351,10 +353,7 @@ static int __init dm_st_init(void)
static void __exit dm_st_exit(void)
{
- int r = dm_unregister_path_selector(&st_ps);
-
- if (r < 0)
- DMERR("unregister failed %d", r);
+ dm_unregister_path_selector(&st_ps);
}
module_init(dm_st_init);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e8c0a8c6fb51..15c538ee9537 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -14,7 +14,6 @@
#include "raid5.h"
#include "raid10.h"
#include "md-bitmap.h"
-#include "dm-core.h"
#include <linux/device-mapper.h>
@@ -2532,6 +2531,10 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
struct md_rdev *rdev, *freshest;
struct mddev *mddev = &rs->md;
+ /* Respect resynchronization requested with "sync" argument. */
+ if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
+ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
+
freshest = NULL;
rdev_for_each(rdev, mddev) {
if (test_bit(Journal, &rdev->flags))
@@ -3305,7 +3308,7 @@ size_check:
/* Disable/enable discard support on raid set. */
configure_discard_support(rs);
- rs->md.dm_gendisk = ti->table->md->disk;
+ rs->md.dm_gendisk = dm_disk(dm_table_get_md(ti->table));
mddev_unlock(&rs->md);
return 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index d9d5e6aa5707..ad0a60a07b93 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -899,17 +899,17 @@ static bool dm_table_supports_dax(struct dm_table *t,
return true;
}
-static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int device_is_not_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
struct block_device *bdev = dev->bdev;
struct request_queue *q = bdev_get_queue(bdev);
/* request-based cannot stack on partitions! */
if (bdev_is_partition(bdev))
- return false;
+ return true;
- return queue_is_mq(q);
+ return !queue_is_mq(q);
}
static int dm_table_determine_type(struct dm_table *t)
@@ -1005,7 +1005,7 @@ verify_rq_based:
/* Non-request-stackable devices can't be used for request-based dm */
if (!ti->type->iterate_devices ||
- !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
+ ti->type->iterate_devices(ti, device_is_not_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 05cf4e3f2bbe..007bb93e5fca 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4111,8 +4111,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
- DM_TARGET_IMMUTABLE,
- .version = {1, 23, 0},
+ DM_TARGET_IMMUTABLE | DM_TARGET_PASSES_CRYPTO,
+ .version = {1, 24, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -4497,7 +4497,8 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 23, 0},
+ .features = DM_TARGET_PASSES_CRYPTO,
+ .version = {1, 24, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
diff --git a/drivers/md/dm-vdo/funnel-workqueue.c b/drivers/md/dm-vdo/funnel-workqueue.c
index ae11941c90a9..0613c82bbe8e 100644
--- a/drivers/md/dm-vdo/funnel-workqueue.c
+++ b/drivers/md/dm-vdo/funnel-workqueue.c
@@ -252,8 +252,7 @@ static void service_work_queue(struct simple_work_queue *queue)
* This speeds up some performance tests; that "other work" might include other VDO
* threads.
*/
- if (need_resched())
- cond_resched();
+ cond_resched();
}
run_finish_hook(queue);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 631a887b487c..d382a390d39a 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -191,7 +191,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
u8 *want_digest, u8 *data)
{
if (unlikely(verity_hash(v, io, data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true)))
+ verity_io_real_digest(v, io))))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -392,7 +392,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, io, fio->output, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0))
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 81186bded1ce..66a00a8ccb39 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -19,7 +19,6 @@
#include "dm-audit.h"
#include <linux/module.h>
#include <linux/reboot.h>
-#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/jump_label.h>
#include <linux/security.h>
@@ -61,9 +60,6 @@ module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644)
static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
-/* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
-static DEFINE_STATIC_KEY_FALSE(ahash_enabled);
-
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
@@ -118,100 +114,21 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
return block >> (level * v->hash_per_block_bits);
}
-static int verity_ahash_update(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len,
- struct crypto_wait *wait)
-{
- struct scatterlist sg;
-
- if (likely(!is_vmalloc_addr(data))) {
- sg_init_one(&sg, data, len);
- ahash_request_set_crypt(req, &sg, NULL, len);
- return crypto_wait_req(crypto_ahash_update(req), wait);
- }
-
- do {
- int r;
- size_t this_step = min_t(size_t, len, PAGE_SIZE - offset_in_page(data));
-
- flush_kernel_vmap_range((void *)data, this_step);
- sg_init_table(&sg, 1);
- sg_set_page(&sg, vmalloc_to_page(data), this_step, offset_in_page(data));
- ahash_request_set_crypt(req, &sg, NULL, this_step);
- r = crypto_wait_req(crypto_ahash_update(req), wait);
- if (unlikely(r))
- return r;
- data += this_step;
- len -= this_step;
- } while (len);
-
- return 0;
-}
-
-/*
- * Wrapper for crypto_ahash_init, which handles verity salting.
- */
-static int verity_ahash_init(struct dm_verity *v, struct ahash_request *req,
- struct crypto_wait *wait, bool may_sleep)
-{
- int r;
-
- ahash_request_set_tfm(req, v->ahash_tfm);
- ahash_request_set_callback(req,
- may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
- crypto_req_done, (void *)wait);
- crypto_init_wait(wait);
-
- r = crypto_wait_req(crypto_ahash_init(req), wait);
-
- if (unlikely(r < 0)) {
- if (r != -ENOMEM)
- DMERR("crypto_ahash_init failed: %d", r);
- return r;
- }
-
- if (likely(v->salt_size && (v->version >= 1)))
- r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
-
- return r;
-}
-
-static int verity_ahash_final(struct dm_verity *v, struct ahash_request *req,
- u8 *digest, struct crypto_wait *wait)
-{
- int r;
-
- if (unlikely(v->salt_size && (!v->version))) {
- r = verity_ahash_update(v, req, v->salt, v->salt_size, wait);
-
- if (r < 0) {
- DMERR("%s failed updating salt: %d", __func__, r);
- goto out;
- }
- }
-
- ahash_request_set_crypt(req, NULL, digest, 0);
- r = crypto_wait_req(crypto_ahash_final(req), wait);
-out:
- return r;
-}
-
int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
- const u8 *data, size_t len, u8 *digest, bool may_sleep)
+ const u8 *data, size_t len, u8 *digest)
{
+ struct shash_desc *desc = &io->hash_desc;
int r;
- if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm) {
- struct ahash_request *req = verity_io_hash_req(v, io);
- struct crypto_wait wait;
-
- r = verity_ahash_init(v, req, &wait, may_sleep) ?:
- verity_ahash_update(v, req, data, len, &wait) ?:
- verity_ahash_final(v, req, digest, &wait);
+ desc->tfm = v->shash_tfm;
+ if (unlikely(v->initial_hashstate == NULL)) {
+ /* Version 0: salt at end */
+ r = crypto_shash_init(desc) ?:
+ crypto_shash_update(desc, data, len) ?:
+ crypto_shash_update(desc, v->salt, v->salt_size) ?:
+ crypto_shash_final(desc, digest);
} else {
- struct shash_desc *desc = verity_io_hash_req(v, io);
-
- desc->tfm = v->shash_tfm;
+ /* Version 1: salt at beginning */
r = crypto_shash_import(desc, v->initial_hashstate) ?:
crypto_shash_finup(desc, data, len, digest);
}
@@ -362,7 +279,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
r = verity_hash(v, io, data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io), !io->in_bh);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0))
goto release_ret_r;
@@ -465,7 +382,7 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
goto free_ret;
r = verity_hash(v, io, buffer, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io), true);
+ verity_io_real_digest(v, io));
if (unlikely(r))
goto free_ret;
@@ -581,7 +498,7 @@ static int verity_verify_io(struct dm_verity_io *io)
}
r = verity_hash(v, io, data, block_size,
- verity_io_real_digest(v, io), !io->in_bh);
+ verity_io_real_digest(v, io));
if (unlikely(r < 0)) {
kunmap_local(data);
return r;
@@ -1092,12 +1009,7 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->zero_digest);
verity_free_sig(v);
- if (v->ahash_tfm) {
- static_branch_dec(&ahash_enabled);
- crypto_free_ahash(v->ahash_tfm);
- } else {
- crypto_free_shash(v->shash_tfm);
- }
+ crypto_free_shash(v->shash_tfm);
kfree(v->alg_name);
@@ -1157,7 +1069,8 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
if (!v->zero_digest)
return r;
- io = kmalloc(sizeof(*io) + v->hash_reqsize, GFP_KERNEL);
+ io = kmalloc(sizeof(*io) + crypto_shash_descsize(v->shash_tfm),
+ GFP_KERNEL);
if (!io)
return r; /* verity_dtr will free zero_digest */
@@ -1168,7 +1081,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
goto out;
r = verity_hash(v, io, zero_data, 1 << v->data_dev_block_bits,
- v->zero_digest, true);
+ v->zero_digest);
out:
kfree(io);
@@ -1324,9 +1237,7 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
{
struct dm_target *ti = v->ti;
- struct crypto_ahash *ahash;
- struct crypto_shash *shash = NULL;
- const char *driver_name;
+ struct crypto_shash *shash;
v->alg_name = kstrdup(alg_name, GFP_KERNEL);
if (!v->alg_name) {
@@ -1334,50 +1245,14 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
return -ENOMEM;
}
- /*
- * Allocate the hash transformation object that this dm-verity instance
- * will use. The vast majority of dm-verity users use CPU-based
- * hashing, so when possible use the shash API to minimize the crypto
- * API overhead. If the ahash API resolves to a different driver
- * (likely an off-CPU hardware offload), use ahash instead. Also use
- * ahash if the obsolete dm-verity format with the appended salt is
- * being used, so that quirk only needs to be handled in one place.
- */
- ahash = crypto_alloc_ahash(alg_name, 0,
- v->use_bh_wq ? CRYPTO_ALG_ASYNC : 0);
- if (IS_ERR(ahash)) {
+ shash = crypto_alloc_shash(alg_name, 0, 0);
+ if (IS_ERR(shash)) {
ti->error = "Cannot initialize hash function";
- return PTR_ERR(ahash);
- }
- driver_name = crypto_ahash_driver_name(ahash);
- if (v->version >= 1 /* salt prepended, not appended? */) {
- shash = crypto_alloc_shash(alg_name, 0, 0);
- if (!IS_ERR(shash) &&
- strcmp(crypto_shash_driver_name(shash), driver_name) != 0) {
- /*
- * ahash gave a different driver than shash, so probably
- * this is a case of real hardware offload. Use ahash.
- */
- crypto_free_shash(shash);
- shash = NULL;
- }
- }
- if (!IS_ERR_OR_NULL(shash)) {
- crypto_free_ahash(ahash);
- ahash = NULL;
- v->shash_tfm = shash;
- v->digest_size = crypto_shash_digestsize(shash);
- v->hash_reqsize = sizeof(struct shash_desc) +
- crypto_shash_descsize(shash);
- DMINFO("%s using shash \"%s\"", alg_name, driver_name);
- } else {
- v->ahash_tfm = ahash;
- static_branch_inc(&ahash_enabled);
- v->digest_size = crypto_ahash_digestsize(ahash);
- v->hash_reqsize = sizeof(struct ahash_request) +
- crypto_ahash_reqsize(ahash);
- DMINFO("%s using ahash \"%s\"", alg_name, driver_name);
+ return PTR_ERR(shash);
}
+ v->shash_tfm = shash;
+ v->digest_size = crypto_shash_digestsize(shash);
+ DMINFO("%s using \"%s\"", alg_name, crypto_shash_driver_name(shash));
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
ti->error = "Digest size too big";
return -EINVAL;
@@ -1402,7 +1277,7 @@ static int verity_setup_salt_and_hashstate(struct dm_verity *v, const char *arg)
return -EINVAL;
}
}
- if (v->shash_tfm) {
+ if (v->version) { /* Version 1: salt at beginning */
SHASH_DESC_ON_STACK(desc, v->shash_tfm);
int r;
@@ -1681,7 +1556,8 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- ti->per_io_data_size = sizeof(struct dm_verity_io) + v->hash_reqsize;
+ ti->per_io_data_size = sizeof(struct dm_verity_io) +
+ crypto_shash_descsize(v->shash_tfm);
r = verity_fec_ctr(v);
if (r)
@@ -1788,10 +1664,7 @@ static int verity_preresume(struct dm_target *ti)
bdev = dm_disk(dm_table_get_md(ti->table))->part0;
root_digest.digest = v->root_digest;
root_digest.digest_len = v->digest_size;
- if (static_branch_unlikely(&ahash_enabled) && !v->shash_tfm)
- root_digest.alg = crypto_ahash_alg_name(v->ahash_tfm);
- else
- root_digest.alg = crypto_shash_alg_name(v->shash_tfm);
+ root_digest.alg = crypto_shash_alg_name(v->shash_tfm);
r = security_bdev_setintegrity(bdev, LSM_INT_DMVERITY_ROOTHASH, &root_digest,
sizeof(root_digest));
@@ -1817,7 +1690,7 @@ static struct target_type verity_target = {
.name = "verity",
/* Note: the LSMs depend on the singleton and immutable features */
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
- .version = {1, 11, 0},
+ .version = {1, 12, 0},
.module = THIS_MODULE,
.ctr = verity_ctr,
.dtr = verity_dtr,
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 8cbb57862ae1..6d141abd965c 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -39,11 +39,10 @@ struct dm_verity {
struct dm_target *ti;
struct dm_bufio_client *bufio;
char *alg_name;
- struct crypto_ahash *ahash_tfm; /* either this or shash_tfm is set */
- struct crypto_shash *shash_tfm; /* either this or ahash_tfm is set */
+ struct crypto_shash *shash_tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
- u8 *initial_hashstate; /* salted initial state, if shash_tfm is set */
+ u8 *initial_hashstate; /* salted initial state, if version >= 1 */
u8 *zero_digest; /* digest for a zero block */
#ifdef CONFIG_SECURITY
u8 *root_digest_sig; /* signature of the root digest */
@@ -61,7 +60,6 @@ struct dm_verity {
bool hash_failed:1; /* set if hash of any block failed */
bool use_bh_wq:1; /* try to verify in BH wq before normal work-queue */
unsigned int digest_size; /* digest size for the current hash algorithm */
- unsigned int hash_reqsize; /* the size of temporary space for crypto */
enum verity_mode mode; /* mode for handling verification errors */
enum verity_mode error_mode;/* mode for handling I/O errors */
unsigned int corrupted_errs;/* Number of errors for corrupted blocks */
@@ -100,19 +98,13 @@ struct dm_verity_io {
u8 want_digest[HASH_MAX_DIGESTSIZE];
/*
- * This struct is followed by a variable-sized hash request of size
- * v->hash_reqsize, either a struct ahash_request or a struct shash_desc
- * (depending on whether ahash_tfm or shash_tfm is being used). To
- * access it, use verity_io_hash_req().
+ * Temporary space for hashing. This is variable-length and must be at
+ * the end of the struct. struct shash_desc is just the fixed part;
+ * it's followed by a context of size crypto_shash_descsize(shash_tfm).
*/
+ struct shash_desc hash_desc;
};
-static inline void *verity_io_hash_req(struct dm_verity *v,
- struct dm_verity_io *io)
-{
- return io + 1;
-}
-
static inline u8 *verity_io_real_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
@@ -126,7 +118,7 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
}
extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
- const u8 *data, size_t len, u8 *digest, bool may_sleep);
+ const u8 *data, size_t len, u8 *digest);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 3d31b82e0730..78e17dd4d01b 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -467,8 +467,6 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
bdev_offset_from_zone_start(disk->part0,
clone->bi_iter.bi_sector);
}
-
- return;
}
static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 5da3db06da10..9da329078ea4 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1062,7 +1062,7 @@ static int dmz_iterate_devices(struct dm_target *ti,
struct dmz_target *dmz = ti->private;
unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
sector_t capacity;
- int i, r;
+ int i, r = 0;
for (i = 0; i < dmz->nr_ddevs; i++) {
capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2d8402778e5c..a44e8c2dccee 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1024,10 +1024,8 @@ static void dm_wq_requeue_work(struct work_struct *work)
*
* 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
*/
-static void dm_io_complete(struct dm_io *io)
+static inline void dm_io_complete(struct dm_io *io)
{
- bool first_requeue;
-
/*
* Only dm_io that has been split needs two stage requeue, otherwise
* we may run into long bio clone chain during suspend and OOM could
@@ -1036,12 +1034,7 @@ static void dm_io_complete(struct dm_io *io)
* Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
* also aren't handled via the first stage requeue.
*/
- if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
- first_requeue = true;
- else
- first_requeue = false;
-
- __dm_io_complete(io, first_requeue);
+ __dm_io_complete(io, dm_io_flagged(io, DM_IO_WAS_SPLIT));
}
/*
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 9aec922613ce..64f6e9756aff 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -483,15 +483,6 @@ config RTC_DRV_PCF8523
This driver can also be built as a module. If so, the module
will be called rtc-pcf8523.
-config RTC_DRV_PCF85063
- tristate "NXP PCF85063"
- select REGMAP_I2C
- help
- If you say yes here you get support for the PCF85063 RTC chip
-
- This driver can also be built as a module. If so, the module
- will be called rtc-pcf85063.
-
config RTC_DRV_PCF85363
tristate "NXP PCF85363"
select REGMAP_I2C
@@ -971,6 +962,18 @@ config RTC_DRV_PCF2127
This driver can also be built as a module. If so, the module
will be called rtc-pcf2127.
+config RTC_DRV_PCF85063
+ tristate "NXP PCF85063"
+ depends on RTC_I2C_AND_SPI
+ select REGMAP_I2C if I2C
+ select REGMAP_SPI if SPI_MASTER
+ help
+ If you say yes here you get support for the PCF85063 and RV8063
+ RTC chips.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-pcf85063.
+
config RTC_DRV_RV3029C2
tristate "Micro Crystal RV3029/3049"
depends on RTC_I2C_AND_SPI
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 4619aa2ac469..789bddfea99d 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,7 +15,7 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += dev.o
rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o
rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o
-obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o
+obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += test_rtc_lib.o
# Keep the list ordered.
diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c
index 13b5b1f20465..f7051592a6e3 100644
--- a/drivers/rtc/lib.c
+++ b/drivers/rtc/lib.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(rtc_year_days);
*/
void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
{
- int days, secs;
+ int secs;
u64 u64tmp;
u32 u32tmp, udays, century, day_of_century, year_of_century, year,
@@ -59,28 +59,26 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
bool is_Jan_or_Feb, is_leap_year;
/*
- * Get days and seconds while preserving the sign to
- * handle negative time values (dates before 1970-01-01)
+ * The time represented by `time` is given in seconds since 1970-01-01
+ * (UTC). As the division done below might misbehave for negative
+ * values, we convert it to seconds since 0000-03-01 and then assume it
+ * will be non-negative.
+ * Below we do 4 * udays + 3 which should fit into a 32 bit unsigned
+ * variable. So the latest date this algorithm works for is 1073741823
+ * days after 0000-03-01 which is in the year 2939805.
*/
- days = div_s64_rem(time, 86400, &secs);
+ time += (u64)719468 * 86400;
+
+ udays = div_s64_rem(time, 86400, &secs);
/*
- * We need 0 <= secs < 86400 which isn't given for negative
- * values of time. Fixup accordingly.
+ * day of the week, 0000-03-01 was a Wednesday (in the proleptic
+ * Gregorian calendar)
*/
- if (secs < 0) {
- days -= 1;
- secs += 86400;
- }
-
- /* day of the week, 1970-01-01 was a Thursday */
- tm->tm_wday = (days + 4) % 7;
- /* Ensure tm_wday is always positive */
- if (tm->tm_wday < 0)
- tm->tm_wday += 7;
+ tm->tm_wday = (udays + 3) % 7;
/*
- * The following algorithm is, basically, Proposition 6.3 of Neri
+ * The following algorithm is, basically, Figure 12 of Neri
* and Schneider [1]. In a few words: it works on the computational
* (fictitious) calendar where the year starts in March, month = 2
* (*), and finishes in February, month = 13. This calendar is
@@ -100,15 +98,15 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
* (using just arithmetics) it's easy to convert it to the
* corresponding date in the Gregorian calendar.
*
- * [1] "Euclidean Affine Functions and Applications to Calendar
- * Algorithms". https://arxiv.org/abs/2102.06959
+ * [1] Neri C, Schneider L. Euclidean affine functions and their
+ * application to calendar algorithms. Softw Pract Exper.
+ * 2023;53(4):937-970. doi: 10.1002/spe.3172
+ * https://doi.org/10.1002/spe.3172
*
* (*) The numbering of months follows rtc_time more closely and
* thus, is slightly different from [1].
*/
- udays = days + 719468;
-
u32tmp = 4 * udays + 3;
century = u32tmp / 146097;
day_of_century = u32tmp % 146097 / 4;
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 5efbe69bf5ca..7205c59ff729 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -279,6 +279,13 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
if (tmp & DS1340_BIT_OSF)
return -EINVAL;
break;
+ case ds_1341:
+ ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &tmp);
+ if (ret)
+ return ret;
+ if (tmp & DS1337_BIT_OSF)
+ return -EINVAL;
+ break;
case ds_1388:
ret = regmap_read(ds1307->regmap, DS1388_REG_FLAG, &tmp);
if (ret)
@@ -377,6 +384,10 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
regmap_update_bits(ds1307->regmap, DS1340_REG_FLAG,
DS1340_BIT_OSF, 0);
break;
+ case ds_1341:
+ regmap_update_bits(ds1307->regmap, DS1337_REG_STATUS,
+ DS1337_BIT_OSF, 0);
+ break;
case ds_1388:
regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG,
DS1388_BIT_OSF, 0);
@@ -1456,16 +1467,21 @@ static unsigned long ds3231_clk_sqw_recalc_rate(struct clk_hw *hw,
return ds3231_clk_sqw_rates[rate_sel];
}
-static long ds3231_clk_sqw_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int ds3231_clk_sqw_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = ARRAY_SIZE(ds3231_clk_sqw_rates) - 1; i >= 0; i--) {
- if (ds3231_clk_sqw_rates[i] <= rate)
- return ds3231_clk_sqw_rates[i];
+ if (ds3231_clk_sqw_rates[i] <= req->rate) {
+ req->rate = ds3231_clk_sqw_rates[i];
+
+ return 0;
+ }
}
+ req->rate = ds3231_clk_sqw_rates[ARRAY_SIZE(ds3231_clk_sqw_rates) - 1];
+
return 0;
}
@@ -1525,7 +1541,7 @@ static const struct clk_ops ds3231_clk_sqw_ops = {
.unprepare = ds3231_clk_sqw_unprepare,
.is_prepared = ds3231_clk_sqw_is_prepared,
.recalc_rate = ds3231_clk_sqw_recalc_rate,
- .round_rate = ds3231_clk_sqw_round_rate,
+ .determine_rate = ds3231_clk_sqw_determine_rate,
.set_rate = ds3231_clk_sqw_set_rate,
};
@@ -1813,10 +1829,8 @@ static int ds1307_probe(struct i2c_client *client)
regmap_write(ds1307->regmap, DS1337_REG_CONTROL,
regs[0]);
- /* oscillator fault? clear flag, and warn */
+ /* oscillator fault? warn */
if (regs[1] & DS1337_BIT_OSF) {
- regmap_write(ds1307->regmap, DS1337_REG_STATUS,
- regs[1] & ~DS1337_BIT_OSF);
dev_warn(ds1307->dev, "SET TIME!\n");
}
break;
diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index 38e25f63597a..97423f1d0361 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -3,7 +3,7 @@
* An rtc driver for the Dallas/Maxim DS1685/DS1687 and related real-time
* chips.
*
- * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
+ * Copyright (C) 2011-2014 Joshua Kinard <linux@kumba.dev>.
* Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
*
* References:
@@ -1436,7 +1436,7 @@ EXPORT_SYMBOL_GPL(ds1685_rtc_poweroff);
/* ----------------------------------------------------------------------- */
-MODULE_AUTHOR("Joshua Kinard <kumba@gentoo.org>");
+MODULE_AUTHOR("Joshua Kinard <linux@kumba.dev>");
MODULE_AUTHOR("Matthias Fuchs <matthias.fuchs@esd-electronics.com>");
MODULE_DESCRIPTION("Dallas/Maxim DS1685/DS1687-series RTC driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
index 63f11ea3589d..7a170c0f9710 100644
--- a/drivers/rtc/rtc-hym8563.c
+++ b/drivers/rtc/rtc-hym8563.c
@@ -285,14 +285,19 @@ static unsigned long hym8563_clkout_recalc_rate(struct clk_hw *hw,
return clkout_rates[ret];
}
-static long hym8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int hym8563_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
- if (clkout_rates[i] <= rate)
- return clkout_rates[i];
+ if (clkout_rates[i] <= req->rate) {
+ req->rate = clkout_rates[i];
+
+ return 0;
+ }
+
+ req->rate = clkout_rates[0];
return 0;
}
@@ -363,7 +368,7 @@ static const struct clk_ops hym8563_clkout_ops = {
.unprepare = hym8563_clkout_unprepare,
.is_prepared = hym8563_clkout_is_prepared,
.recalc_rate = hym8563_clkout_recalc_rate,
- .round_rate = hym8563_clkout_round_rate,
+ .determine_rate = hym8563_clkout_determine_rate,
.set_rate = hym8563_clkout_set_rate,
};
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index c568639d2151..740cab013f59 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -72,7 +72,7 @@
static const struct i2c_device_id m41t80_id[] = {
{ "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT },
- { "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD },
+ { "m41t65", M41T80_FEATURE_WD },
{ "m41t80", M41T80_FEATURE_SQ },
{ "m41t81", M41T80_FEATURE_HT | M41T80_FEATURE_SQ},
{ "m41t81s", M41T80_FEATURE_HT | M41T80_FEATURE_BL | M41T80_FEATURE_SQ },
@@ -93,7 +93,7 @@ static const __maybe_unused struct of_device_id m41t80_of_match[] = {
},
{
.compatible = "st,m41t65",
- .data = (void *)(M41T80_FEATURE_HT | M41T80_FEATURE_WD)
+ .data = (void *)(M41T80_FEATURE_WD)
},
{
.compatible = "st,m41t80",
@@ -484,16 +484,17 @@ static unsigned long m41t80_sqw_recalc_rate(struct clk_hw *hw,
return sqw_to_m41t80_data(hw)->freq;
}
-static long m41t80_sqw_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int m41t80_sqw_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
- if (rate >= M41T80_SQW_MAX_FREQ)
- return M41T80_SQW_MAX_FREQ;
- if (rate >= M41T80_SQW_MAX_FREQ / 4)
- return M41T80_SQW_MAX_FREQ / 4;
- if (!rate)
- return 0;
- return 1 << ilog2(rate);
+ if (req->rate >= M41T80_SQW_MAX_FREQ)
+ req->rate = M41T80_SQW_MAX_FREQ;
+ else if (req->rate >= M41T80_SQW_MAX_FREQ / 4)
+ req->rate = M41T80_SQW_MAX_FREQ / 4;
+ else if (req->rate)
+ req->rate = 1 << ilog2(req->rate);
+
+ return 0;
}
static int m41t80_sqw_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -564,7 +565,7 @@ static const struct clk_ops m41t80_sqw_ops = {
.unprepare = m41t80_sqw_unprepare,
.is_prepared = m41t80_sqw_is_prepared,
.recalc_rate = m41t80_sqw_recalc_rate,
- .round_rate = m41t80_sqw_round_rate,
+ .determine_rate = m41t80_sqw_determine_rate,
.set_rate = m41t80_sqw_set_rate,
};
diff --git a/drivers/rtc/rtc-max31335.c b/drivers/rtc/rtc-max31335.c
index a7bb37aaab9e..dfb5bad3a369 100644
--- a/drivers/rtc/rtc-max31335.c
+++ b/drivers/rtc/rtc-max31335.c
@@ -497,15 +497,17 @@ static unsigned long max31335_clkout_recalc_rate(struct clk_hw *hw,
return max31335_clkout_freq[reg & freq_mask];
}
-static long max31335_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int max31335_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int index;
- index = find_closest(rate, max31335_clkout_freq,
+ index = find_closest(req->rate, max31335_clkout_freq,
ARRAY_SIZE(max31335_clkout_freq));
- return max31335_clkout_freq[index];
+ req->rate = max31335_clkout_freq[index];
+
+ return 0;
}
static int max31335_clkout_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -554,7 +556,7 @@ static int max31335_clkout_is_enabled(struct clk_hw *hw)
static const struct clk_ops max31335_clkout_ops = {
.recalc_rate = max31335_clkout_recalc_rate,
- .round_rate = max31335_clkout_round_rate,
+ .determine_rate = max31335_clkout_determine_rate,
.set_rate = max31335_clkout_set_rate,
.enable = max31335_clkout_enable,
.disable = max31335_clkout_disable,
diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c
index 76c5f464b2da..cd4b1db902e9 100644
--- a/drivers/rtc/rtc-nct3018y.c
+++ b/drivers/rtc/rtc-nct3018y.c
@@ -367,14 +367,19 @@ static unsigned long nct3018y_clkout_recalc_rate(struct clk_hw *hw,
return clkout_rates[flags];
}
-static long nct3018y_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int nct3018y_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
- if (clkout_rates[i] <= rate)
- return clkout_rates[i];
+ if (clkout_rates[i] <= req->rate) {
+ req->rate = clkout_rates[i];
+
+ return 0;
+ }
+
+ req->rate = clkout_rates[0];
return 0;
}
@@ -446,7 +451,7 @@ static const struct clk_ops nct3018y_clkout_ops = {
.unprepare = nct3018y_clkout_unprepare,
.is_prepared = nct3018y_clkout_is_prepared,
.recalc_rate = nct3018y_clkout_recalc_rate,
- .round_rate = nct3018y_clkout_round_rate,
+ .determine_rate = nct3018y_clkout_determine_rate,
.set_rate = nct3018y_clkout_set_rate,
};
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 4fa5c4ecdd5a..f643e0bd7351 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -17,6 +17,7 @@
#include <linux/of.h>
#include <linux/pm_wakeirq.h>
#include <linux/regmap.h>
+#include <linux/spi/spi.h>
/*
* Information for this driver was pulled from the following datasheets.
@@ -29,6 +30,9 @@
*
* https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8263-C7_App-Manual.pdf
* RV8263 -- Rev. 1.0 — January 2019
+ *
+ * https://www.microcrystal.com/fileadmin/Media/Products/RTC/App.Manual/RV-8063-C7_App-Manual.pdf
+ * RV8063 -- Rev. 1.1 - October 2018
*/
#define PCF85063_REG_CTRL1 0x00 /* status */
@@ -401,14 +405,19 @@ static unsigned long pcf85063_clkout_recalc_rate(struct clk_hw *hw,
return clkout_rates[buf];
}
-static long pcf85063_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int pcf85063_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
- if (clkout_rates[i] <= rate)
- return clkout_rates[i];
+ if (clkout_rates[i] <= req->rate) {
+ req->rate = clkout_rates[i];
+
+ return 0;
+ }
+
+ req->rate = clkout_rates[0];
return 0;
}
@@ -482,7 +491,7 @@ static const struct clk_ops pcf85063_clkout_ops = {
.unprepare = pcf85063_clkout_unprepare,
.is_prepared = pcf85063_clkout_is_prepared,
.recalc_rate = pcf85063_clkout_recalc_rate,
- .round_rate = pcf85063_clkout_round_rate,
+ .determine_rate = pcf85063_clkout_determine_rate,
.set_rate = pcf85063_clkout_set_rate,
};
@@ -524,47 +533,12 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
}
#endif
-static const struct pcf85063_config config_pcf85063 = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
- },
-};
-
-static const struct pcf85063_config config_pcf85063tp = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
- },
-};
-
-static const struct pcf85063_config config_pcf85063a = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
-};
-
-static const struct pcf85063_config config_rv8263 = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
- .force_cap_7000 = 1,
-};
-
-static int pcf85063_probe(struct i2c_client *client)
+static int pcf85063_probe(struct device *dev, struct regmap *regmap, int irq,
+ const struct pcf85063_config *config)
{
struct pcf85063 *pcf85063;
unsigned int tmp;
int err;
- const struct pcf85063_config *config;
struct nvmem_config nvmem_cfg = {
.name = "pcf85063_nvram",
.reg_read = pcf85063_nvmem_read,
@@ -573,28 +547,22 @@ static int pcf85063_probe(struct i2c_client *client)
.size = 1,
};
- dev_dbg(&client->dev, "%s\n", __func__);
+ dev_dbg(dev, "%s\n", __func__);
- pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063),
+ pcf85063 = devm_kzalloc(dev, sizeof(struct pcf85063),
GFP_KERNEL);
if (!pcf85063)
return -ENOMEM;
- config = i2c_get_match_data(client);
- if (!config)
- return -ENODEV;
-
- pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
- if (IS_ERR(pcf85063->regmap))
- return PTR_ERR(pcf85063->regmap);
+ pcf85063->regmap = regmap;
- i2c_set_clientdata(client, pcf85063);
+ dev_set_drvdata(dev, pcf85063);
err = regmap_read(pcf85063->regmap, PCF85063_REG_SC, &tmp);
if (err)
- return dev_err_probe(&client->dev, err, "RTC chip is not present\n");
+ return dev_err_probe(dev, err, "RTC chip is not present\n");
- pcf85063->rtc = devm_rtc_allocate_device(&client->dev);
+ pcf85063->rtc = devm_rtc_allocate_device(dev);
if (IS_ERR(pcf85063->rtc))
return PTR_ERR(pcf85063->rtc);
@@ -605,19 +573,17 @@ static int pcf85063_probe(struct i2c_client *client)
* of the registers after the automatic power-on reset...
*/
if (tmp & PCF85063_REG_SC_OS) {
- dev_warn(&client->dev,
- "POR issue detected, sending a SW reset\n");
+ dev_warn(dev, "POR issue detected, sending a SW reset\n");
err = regmap_write(pcf85063->regmap, PCF85063_REG_CTRL1,
PCF85063_REG_CTRL1_SWR);
if (err < 0)
- dev_warn(&client->dev,
- "SW reset failed, trying to continue\n");
+ dev_warn(dev, "SW reset failed, trying to continue\n");
}
- err = pcf85063_load_capacitance(pcf85063, client->dev.of_node,
+ err = pcf85063_load_capacitance(pcf85063, dev->of_node,
config->force_cap_7000 ? 7000 : 0);
if (err < 0)
- dev_warn(&client->dev, "failed to set xtal load capacitance: %d",
+ dev_warn(dev, "failed to set xtal load capacitance: %d",
err);
pcf85063->rtc->ops = &pcf85063_rtc_ops;
@@ -627,13 +593,13 @@ static int pcf85063_probe(struct i2c_client *client)
clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf85063->rtc->features);
clear_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features);
- if (config->has_alarms && client->irq > 0) {
+ if (config->has_alarms && irq > 0) {
unsigned long irqflags = IRQF_TRIGGER_LOW;
- if (dev_fwnode(&client->dev))
+ if (dev_fwnode(dev))
irqflags = 0;
- err = devm_request_threaded_irq(&client->dev, client->irq,
+ err = devm_request_threaded_irq(dev, irq,
NULL, pcf85063_rtc_handle_irq,
irqflags | IRQF_ONESHOT,
"pcf85063", pcf85063);
@@ -642,8 +608,8 @@ static int pcf85063_probe(struct i2c_client *client)
"unable to request IRQ, alarms disabled\n");
} else {
set_bit(RTC_FEATURE_ALARM, pcf85063->rtc->features);
- device_init_wakeup(&client->dev, true);
- err = dev_pm_set_wake_irq(&client->dev, client->irq);
+ device_init_wakeup(dev, true);
+ err = dev_pm_set_wake_irq(dev, irq);
if (err)
dev_err(&pcf85063->rtc->dev,
"failed to enable irq wake\n");
@@ -661,6 +627,43 @@ static int pcf85063_probe(struct i2c_client *client)
return devm_rtc_register_device(pcf85063->rtc);
}
+#if IS_ENABLED(CONFIG_I2C)
+
+static const struct pcf85063_config config_pcf85063 = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+};
+
+static const struct pcf85063_config config_pcf85063tp = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
+};
+
+static const struct pcf85063_config config_pcf85063a = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+};
+
+static const struct pcf85063_config config_rv8263 = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ .force_cap_7000 = 1,
+};
+
static const struct i2c_device_id pcf85063_ids[] = {
{ "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
{ "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 },
@@ -683,16 +686,146 @@ static const struct of_device_id pcf85063_of_match[] = {
MODULE_DEVICE_TABLE(of, pcf85063_of_match);
#endif
+static int pcf85063_i2c_probe(struct i2c_client *client)
+{
+ const struct pcf85063_config *config;
+ struct regmap *regmap;
+
+ config = i2c_get_match_data(client);
+ if (!config)
+ return -ENODEV;
+
+ regmap = devm_regmap_init_i2c(client, &config->regmap);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ return pcf85063_probe(&client->dev, regmap, client->irq, config);
+}
+
static struct i2c_driver pcf85063_driver = {
.driver = {
.name = "rtc-pcf85063",
.of_match_table = of_match_ptr(pcf85063_of_match),
},
- .probe = pcf85063_probe,
+ .probe = pcf85063_i2c_probe,
.id_table = pcf85063_ids,
};
-module_i2c_driver(pcf85063_driver);
+static int pcf85063_register_driver(void)
+{
+ return i2c_add_driver(&pcf85063_driver);
+}
+
+static void pcf85063_unregister_driver(void)
+{
+ i2c_del_driver(&pcf85063_driver);
+}
+
+#else
+
+static int pcf85063_register_driver(void)
+{
+ return 0;
+}
+
+static void pcf85063_unregister_driver(void)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_I2C) */
+
+#if IS_ENABLED(CONFIG_SPI_MASTER)
+
+static const struct pcf85063_config config_rv8063 = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ .read_flag_mask = BIT(7) | BIT(5),
+ .write_flag_mask = BIT(5),
+ },
+ .has_alarms = 1,
+ .force_cap_7000 = 1,
+};
+
+static const struct spi_device_id rv8063_id[] = {
+ { "rv8063" },
+ {}
+};
+MODULE_DEVICE_TABLE(spi, rv8063_id);
+
+static const struct of_device_id rv8063_of_match[] = {
+ { .compatible = "microcrystal,rv8063" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rv8063_of_match);
+
+static int rv8063_probe(struct spi_device *spi)
+{
+ const struct pcf85063_config *config = &config_rv8063;
+ struct regmap *regmap;
+
+ regmap = devm_regmap_init_spi(spi, &config->regmap);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ return pcf85063_probe(&spi->dev, regmap, spi->irq, config);
+}
+
+static struct spi_driver rv8063_driver = {
+ .driver = {
+ .name = "rv8063",
+ .of_match_table = rv8063_of_match,
+ },
+ .probe = rv8063_probe,
+ .id_table = rv8063_id,
+};
+
+static int __init rv8063_register_driver(void)
+{
+ return spi_register_driver(&rv8063_driver);
+}
+
+static void __exit rv8063_unregister_driver(void)
+{
+ spi_unregister_driver(&rv8063_driver);
+}
+
+#else
+
+static int __init rv8063_register_driver(void)
+{
+ return 0;
+}
+
+static void __exit rv8063_unregister_driver(void)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_SPI_MASTER) */
+
+static int __init pcf85063_init(void)
+{
+ int ret;
+
+ ret = pcf85063_register_driver();
+ if (ret)
+ return ret;
+
+ ret = rv8063_register_driver();
+ if (ret)
+ pcf85063_unregister_driver();
+
+ return ret;
+}
+module_init(pcf85063_init);
+
+static void __exit pcf85063_exit(void)
+{
+ rv8063_unregister_driver();
+ pcf85063_unregister_driver();
+}
+module_exit(pcf85063_exit);
MODULE_AUTHOR("Søren Andersen <san@rosetechnology.dk>");
MODULE_DESCRIPTION("PCF85063 RTC driver");
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index b2611697fa5e..4e61011fb7a9 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -330,14 +330,19 @@ static unsigned long pcf8563_clkout_recalc_rate(struct clk_hw *hw,
return clkout_rates[buf];
}
-static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int pcf8563_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
- if (clkout_rates[i] <= rate)
- return clkout_rates[i];
+ if (clkout_rates[i] <= req->rate) {
+ req->rate = clkout_rates[i];
+
+ return 0;
+ }
+
+ req->rate = clkout_rates[0];
return 0;
}
@@ -413,7 +418,7 @@ static const struct clk_ops pcf8563_clkout_ops = {
.unprepare = pcf8563_clkout_unprepare,
.is_prepared = pcf8563_clkout_is_prepared,
.recalc_rate = pcf8563_clkout_recalc_rate,
- .round_rate = pcf8563_clkout_round_rate,
+ .determine_rate = pcf8563_clkout_determine_rate,
.set_rate = pcf8563_clkout_set_rate,
};
diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c
index 868d1b1eb0f4..c2a531f0e125 100644
--- a/drivers/rtc/rtc-rv3028.c
+++ b/drivers/rtc/rtc-rv3028.c
@@ -731,14 +731,19 @@ static unsigned long rv3028_clkout_recalc_rate(struct clk_hw *hw,
return clkout_rates[clkout];
}
-static long rv3028_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int rv3028_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i;
for (i = 0; i < ARRAY_SIZE(clkout_rates); i++)
- if (clkout_rates[i] <= rate)
- return clkout_rates[i];
+ if (clkout_rates[i] <= req->rate) {
+ req->rate = clkout_rates[i];
+
+ return 0;
+ }
+
+ req->rate = clkout_rates[0];
return 0;
}
@@ -802,7 +807,7 @@ static const struct clk_ops rv3028_clkout_ops = {
.unprepare = rv3028_clkout_unprepare,
.is_prepared = rv3028_clkout_is_prepared,
.recalc_rate = rv3028_clkout_recalc_rate,
- .round_rate = rv3028_clkout_round_rate,
+ .determine_rate = rv3028_clkout_determine_rate,
.set_rate = rv3028_clkout_set_rate,
};
diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c
index 2c6a8918acba..b8376bd1d905 100644
--- a/drivers/rtc/rtc-rv3032.c
+++ b/drivers/rtc/rtc-rv3032.c
@@ -646,19 +646,24 @@ static unsigned long rv3032_clkout_recalc_rate(struct clk_hw *hw,
return clkout_xtal_rates[FIELD_GET(RV3032_CLKOUT2_FD_MSK, clkout)];
}
-static long rv3032_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
- unsigned long *prate)
+static int rv3032_clkout_determine_rate(struct clk_hw *hw,
+ struct clk_rate_request *req)
{
int i, hfd;
- if (rate < RV3032_HFD_STEP)
+ if (req->rate < RV3032_HFD_STEP)
for (i = 0; i < ARRAY_SIZE(clkout_xtal_rates); i++)
- if (clkout_xtal_rates[i] <= rate)
- return clkout_xtal_rates[i];
+ if (clkout_xtal_rates[i] <= req->rate) {
+ req->rate = clkout_xtal_rates[i];
- hfd = DIV_ROUND_CLOSEST(rate, RV3032_HFD_STEP);
+ return 0;
+ }
+
+ hfd = DIV_ROUND_CLOSEST(req->rate, RV3032_HFD_STEP);
- return RV3032_HFD_STEP * clamp(hfd, 0, 8192);
+ req->rate = RV3032_HFD_STEP * clamp(hfd, 0, 8192);
+
+ return 0;
}
static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -738,7 +743,7 @@ static const struct clk_ops rv3032_clkout_ops = {
.unprepare = rv3032_clkout_unprepare,
.is_prepared = rv3032_clkout_is_prepared,
.recalc_rate = rv3032_clkout_recalc_rate,
- .round_rate = rv3032_clkout_round_rate,
+ .determine_rate = rv3032_clkout_determine_rate,
.set_rate = rv3032_clkout_set_rate,
};
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 5dd575865adf..79b2a16f15ad 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -549,25 +549,25 @@ static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask)
writeb(mask, info->base + S3C2410_INTP);
}
-static struct s3c_rtc_data const s3c2410_rtc_data = {
+static const struct s3c_rtc_data s3c2410_rtc_data = {
.irq_handler = s3c24xx_rtc_irq,
.enable = s3c24xx_rtc_enable,
.disable = s3c24xx_rtc_disable,
};
-static struct s3c_rtc_data const s3c2416_rtc_data = {
+static const struct s3c_rtc_data s3c2416_rtc_data = {
.irq_handler = s3c24xx_rtc_irq,
.enable = s3c24xx_rtc_enable,
.disable = s3c24xx_rtc_disable,
};
-static struct s3c_rtc_data const s3c2443_rtc_data = {
+static const struct s3c_rtc_data s3c2443_rtc_data = {
.irq_handler = s3c24xx_rtc_irq,
.enable = s3c24xx_rtc_enable,
.disable = s3c24xx_rtc_disable,
};
-static struct s3c_rtc_data const s3c6410_rtc_data = {
+static const struct s3c_rtc_data s3c6410_rtc_data = {
.needs_src_clk = true,
.irq_handler = s3c6410_rtc_irq,
.enable = s3c24xx_rtc_enable,
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index f15ef3aa82a0..619800a00479 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -455,7 +455,7 @@ static void __exit sh_rtc_remove(struct platform_device *pdev)
clk_disable(rtc->clk);
}
-static int __maybe_unused sh_rtc_suspend(struct device *dev)
+static int sh_rtc_suspend(struct device *dev)
{
struct sh_rtc *rtc = dev_get_drvdata(dev);
@@ -465,7 +465,7 @@ static int __maybe_unused sh_rtc_suspend(struct device *dev)
return 0;
}
-static int __maybe_unused sh_rtc_resume(struct device *dev)
+static int sh_rtc_resume(struct device *dev)
{
struct sh_rtc *rtc = dev_get_drvdata(dev);
@@ -475,7 +475,7 @@ static int __maybe_unused sh_rtc_resume(struct device *dev)
return 0;
}
-static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
static const struct of_device_id sh_rtc_of_match[] = {
{ .compatible = "renesas,sh-rtc", },
@@ -492,7 +492,7 @@ MODULE_DEVICE_TABLE(of, sh_rtc_of_match);
static struct platform_driver sh_rtc_platform_driver __refdata = {
.driver = {
.name = DRV_NAME,
- .pm = &sh_rtc_pm_ops,
+ .pm = pm_sleep_ptr(&sh_rtc_pm_ops),
.of_match_table = sh_rtc_of_match,
},
.remove = __exit_p(sh_rtc_remove),
diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c
index e3062c4d3f2c..4ab05e105a76 100644
--- a/drivers/rtc/sysfs.c
+++ b/drivers/rtc/sysfs.c
@@ -24,8 +24,8 @@
static ssize_t
name_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%s %s\n", dev_driver_string(dev->parent),
- dev_name(dev->parent));
+ return sysfs_emit(buf, "%s %s\n", dev_driver_string(dev->parent),
+ dev_name(dev->parent));
}
static DEVICE_ATTR_RO(name);
@@ -39,7 +39,7 @@ date_show(struct device *dev, struct device_attribute *attr, char *buf)
if (retval)
return retval;
- return sprintf(buf, "%ptRd\n", &tm);
+ return sysfs_emit(buf, "%ptRd\n", &tm);
}
static DEVICE_ATTR_RO(date);
@@ -53,7 +53,7 @@ time_show(struct device *dev, struct device_attribute *attr, char *buf)
if (retval)
return retval;
- return sprintf(buf, "%ptRt\n", &tm);
+ return sysfs_emit(buf, "%ptRt\n", &tm);
}
static DEVICE_ATTR_RO(time);
@@ -64,21 +64,17 @@ since_epoch_show(struct device *dev, struct device_attribute *attr, char *buf)
struct rtc_time tm;
retval = rtc_read_time(to_rtc_device(dev), &tm);
- if (retval == 0) {
- time64_t time;
-
- time = rtc_tm_to_time64(&tm);
- retval = sprintf(buf, "%lld\n", time);
- }
+ if (retval)
+ return retval;
- return retval;
+ return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&tm));
}
static DEVICE_ATTR_RO(since_epoch);
static ssize_t
max_user_freq_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", to_rtc_device(dev)->max_user_freq);
+ return sysfs_emit(buf, "%d\n", to_rtc_device(dev)->max_user_freq);
}
static ssize_t
@@ -118,9 +114,9 @@ hctosys_show(struct device *dev, struct device_attribute *attr, char *buf)
if (rtc_hctosys_ret == 0 &&
strcmp(dev_name(&to_rtc_device(dev)->dev),
CONFIG_RTC_HCTOSYS_DEVICE) == 0)
- return sprintf(buf, "1\n");
+ return sysfs_emit(buf, "1\n");
#endif
- return sprintf(buf, "0\n");
+ return sysfs_emit(buf, "0\n");
}
static DEVICE_ATTR_RO(hctosys);
@@ -128,7 +124,6 @@ static ssize_t
wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf)
{
ssize_t retval;
- time64_t alarm;
struct rtc_wkalrm alm;
/* Don't show disabled alarms. For uniformity, RTC alarms are
@@ -140,12 +135,13 @@ wakealarm_show(struct device *dev, struct device_attribute *attr, char *buf)
* alarms after they trigger, to ensure one-shot semantics.
*/
retval = rtc_read_alarm(to_rtc_device(dev), &alm);
- if (retval == 0 && alm.enabled) {
- alarm = rtc_tm_to_time64(&alm.time);
- retval = sprintf(buf, "%lld\n", alarm);
- }
+ if (retval)
+ return retval;
- return retval;
+ if (alm.enabled)
+ return sysfs_emit(buf, "%lld\n", rtc_tm_to_time64(&alm.time));
+
+ return 0;
}
static ssize_t
@@ -222,10 +218,10 @@ offset_show(struct device *dev, struct device_attribute *attr, char *buf)
long offset;
retval = rtc_read_offset(to_rtc_device(dev), &offset);
- if (retval == 0)
- retval = sprintf(buf, "%ld\n", offset);
+ if (retval)
+ return retval;
- return retval;
+ return sysfs_emit(buf, "%ld\n", offset);
}
static ssize_t
@@ -246,8 +242,8 @@ static DEVICE_ATTR_RW(offset);
static ssize_t
range_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min,
- to_rtc_device(dev)->range_max);
+ return sysfs_emit(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min,
+ to_rtc_device(dev)->range_max);
}
static DEVICE_ATTR_RO(range);
@@ -302,11 +298,7 @@ static struct attribute_group rtc_attr_group = {
.is_visible = rtc_attr_is_visible,
.attrs = rtc_attrs,
};
-
-static const struct attribute_group *rtc_attr_groups[] = {
- &rtc_attr_group,
- NULL
-};
+__ATTRIBUTE_GROUPS(rtc_attr);
const struct attribute_group **rtc_get_dev_attribute_groups(void)
{
@@ -318,17 +310,21 @@ int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
size_t old_cnt = 0, add_cnt = 0, new_cnt;
const struct attribute_group **groups, **old;
- if (!grps)
+ if (grps) {
+ for (groups = grps; *groups; groups++)
+ add_cnt++;
+ /* No need to modify current groups if nothing new is provided */
+ if (add_cnt == 0)
+ return 0;
+ } else {
return -EINVAL;
+ }
groups = rtc->dev.groups;
if (groups)
for (; *groups; groups++)
old_cnt++;
- for (groups = grps; *groups; groups++)
- add_cnt++;
-
new_cnt = old_cnt + add_cnt + 1;
groups = devm_kcalloc(&rtc->dev, new_cnt, sizeof(*groups), GFP_KERNEL);
if (!groups)
diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/test_rtc_lib.c
index 0eebad1fe2a0..0eebad1fe2a0 100644
--- a/drivers/rtc/lib_test.c
+++ b/drivers/rtc/test_rtc_lib.c
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f149ec28aefd..db3831f7f2f5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -82,7 +82,7 @@ repeat:
if (folio_test_uptodate(folio))
goto out;
- fio.page = &folio->page;
+ fio.folio = folio;
err = f2fs_submit_page_bio(&fio);
if (err) {
@@ -309,7 +309,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
continue;
}
- fio.page = &folio->page;
+ fio.folio = folio;
err = f2fs_submit_page_bio(&fio);
f2fs_folio_put(folio, err ? true : false);
@@ -485,7 +485,7 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping,
folio_mark_uptodate(folio);
if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
return true;
}
return false;
@@ -1045,7 +1045,7 @@ void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
}
void f2fs_remove_dirty_inode(struct inode *inode)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index b3c1df93a163..5c1f47e45dab 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -23,20 +23,18 @@
static struct kmem_cache *cic_entry_slab;
static struct kmem_cache *dic_entry_slab;
-static void *page_array_alloc(struct inode *inode, int nr)
+static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int size = sizeof(struct page *) * nr;
if (likely(size <= sbi->page_array_slab_size))
return f2fs_kmem_cache_alloc(sbi->page_array_slab,
- GFP_F2FS_ZERO, false, F2FS_I_SB(inode));
+ GFP_F2FS_ZERO, false, sbi);
return f2fs_kzalloc(sbi, size, GFP_NOFS);
}
-static void page_array_free(struct inode *inode, void *pages, int nr)
+static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int size = sizeof(struct page *) * nr;
if (!pages)
@@ -73,17 +71,15 @@ static pgoff_t start_idx_of_cluster(struct compress_ctx *cc)
return cc->cluster_idx << cc->log_cluster_size;
}
-bool f2fs_is_compressed_page(struct page *page)
+bool f2fs_is_compressed_page(struct folio *folio)
{
- if (!PagePrivate(page))
- return false;
- if (!page_private(page))
+ if (!folio->private)
return false;
- if (page_private_nonpointer(page))
+ if (folio_test_f2fs_nonpointer(folio))
return false;
- f2fs_bug_on(F2FS_P_SB(page),
- *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
+ f2fs_bug_on(F2FS_F_SB(folio),
+ *((u32 *)folio->private) != F2FS_COMPRESSED_PAGE_MAGIC);
return true;
}
@@ -149,13 +145,13 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
if (cc->rpages)
return 0;
- cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size);
return cc->rpages ? 0 : -ENOMEM;
}
void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
{
- page_array_free(cc->inode, cc->rpages, cc->cluster_size);
+ page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size);
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
@@ -216,13 +212,13 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic)
ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen,
dic->rbuf, &dic->rlen);
if (ret != LZO_E_OK) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lzo decompress failed, ret:%d", ret);
return -EIO;
}
if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lzo invalid rlen:%zu, expected:%lu",
dic->rlen, PAGE_SIZE << dic->log_cluster_size);
return -EIO;
@@ -296,13 +292,13 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf,
dic->clen, dic->rlen);
if (ret < 0) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lz4 decompress failed, ret:%d", ret);
return -EIO;
}
if (ret != PAGE_SIZE << dic->log_cluster_size) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"lz4 invalid ret:%d, expected:%lu",
ret, PAGE_SIZE << dic->log_cluster_size);
return -EIO;
@@ -424,13 +420,13 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
workspace_size = zstd_dstream_workspace_bound(max_window_size);
- workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size);
+ workspace = f2fs_vmalloc(dic->sbi, workspace_size);
if (!workspace)
return -ENOMEM;
stream = zstd_init_dstream(max_window_size, workspace, workspace_size);
if (!stream) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s zstd_init_dstream failed", __func__);
vfree(workspace);
return -EIO;
@@ -466,14 +462,14 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
ret = zstd_decompress_stream(stream, &outbuf, &inbuf);
if (zstd_is_error(ret)) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s zstd_decompress_stream failed, ret: %d",
__func__, zstd_get_error_code(ret));
return -EIO;
}
if (dic->rlen != outbuf.pos) {
- f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
+ f2fs_err_ratelimited(dic->sbi,
"%s ZSTD invalid rlen:%zu, expected:%lu",
__func__, dic->rlen,
PAGE_SIZE << dic->log_cluster_size);
@@ -622,6 +618,7 @@ static void *f2fs_vmap(struct page **pages, unsigned int count)
static int f2fs_compress_pages(struct compress_ctx *cc)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct f2fs_inode_info *fi = F2FS_I(cc->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
@@ -642,7 +639,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
cc->valid_nr_cpages = cc->nr_cpages;
- cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
+ cc->cpages = page_array_alloc(sbi, cc->nr_cpages);
if (!cc->cpages) {
ret = -ENOMEM;
goto destroy_compress_ctx;
@@ -716,7 +713,7 @@ out_free_cpages:
if (cc->cpages[i])
f2fs_compress_free_page(cc->cpages[i]);
}
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
destroy_compress_ctx:
if (cops->destroy_compress_ctx)
@@ -734,7 +731,7 @@ static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+ struct f2fs_sb_info *sbi = dic->sbi;
struct f2fs_inode_info *fi = F2FS_I(dic->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
@@ -796,25 +793,27 @@ out_end_io:
f2fs_decompress_end_io(dic, ret, in_task);
}
+static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
+ struct folio *folio, nid_t ino, block_t blkaddr);
+
/*
* This is called when a page of a compressed cluster has been read from disk
* (or failed to be read from disk). It checks whether this page was the last
* page being waited on in the cluster, and if so, it decompresses the cluster
* (or in the case of a failure, cleans up without actually decompressing).
*/
-void f2fs_end_read_compressed_page(struct page *page, bool failed,
+void f2fs_end_read_compressed_page(struct folio *folio, bool failed,
block_t blkaddr, bool in_task)
{
- struct decompress_io_ctx *dic =
- (struct decompress_io_ctx *)page_private(page);
- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
+ struct decompress_io_ctx *dic = folio->private;
+ struct f2fs_sb_info *sbi = dic->sbi;
dec_page_count(sbi, F2FS_RD_DATA);
if (failed)
WRITE_ONCE(dic->failed, true);
else if (blkaddr && in_task)
- f2fs_cache_compressed_page(sbi, page,
+ f2fs_cache_compressed_page(sbi, folio,
dic->inode->i_ino, blkaddr);
if (atomic_dec_and_test(&dic->remaining_pages))
@@ -1340,7 +1339,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
atomic_set(&cic->pending_pages, cc->valid_nr_cpages);
- cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ cic->rpages = page_array_alloc(sbi, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
@@ -1420,7 +1419,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
(*submitted)++;
unlock_continue:
inode_dec_dirty_pages(cc->inode);
- unlock_page(fio.page);
+ folio_unlock(fio.folio);
}
if (fio.compr_blocks)
@@ -1442,13 +1441,13 @@ unlock_continue:
spin_unlock(&fi->i_size_lock);
f2fs_put_rpages(cc);
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
f2fs_destroy_compress_ctx(cc, false);
return 0;
out_destroy_crypt:
- page_array_free(cc->inode, cic->rpages, cc->cluster_size);
+ page_array_free(sbi, cic->rpages, cc->cluster_size);
for (--i; i >= 0; i--) {
if (!cc->cpages[i])
@@ -1469,18 +1468,18 @@ out_free:
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ page_array_free(sbi, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
return -EAGAIN;
}
-void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
+void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
{
+ struct page *page = &folio->page;
struct f2fs_sb_info *sbi = bio->bi_private;
- struct compress_io_ctx *cic =
- (struct compress_io_ctx *)page_private(page);
- enum count_type type = WB_DATA_TYPE(page,
- f2fs_is_compressed_page(page));
+ struct compress_io_ctx *cic = folio->private;
+ enum count_type type = WB_DATA_TYPE(folio,
+ f2fs_is_compressed_page(folio));
int i;
if (unlikely(bio->bi_status != BLK_STS_OK))
@@ -1499,7 +1498,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
end_page_writeback(cic->rpages[i]);
}
- page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
+ page_array_free(sbi, cic->rpages, cic->nr_rpages);
kmem_cache_free(cic_entry_slab, cic);
}
@@ -1633,14 +1632,13 @@ static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi,
static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
bool pre_alloc)
{
- const struct f2fs_compress_ops *cops =
- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm];
int i;
- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc))
return 0;
- dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
+ dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size);
if (!dic->tpages)
return -ENOMEM;
@@ -1670,10 +1668,9 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
bool bypass_destroy_callback, bool pre_alloc)
{
- const struct f2fs_compress_ops *cops =
- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm];
- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc))
return;
if (!bypass_destroy_callback && cops->destroy_decompress_ctx)
@@ -1700,7 +1697,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
if (!dic)
return ERR_PTR(-ENOMEM);
- dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
+ dic->rpages = page_array_alloc(sbi, cc->cluster_size);
if (!dic->rpages) {
kmem_cache_free(dic_entry_slab, dic);
return ERR_PTR(-ENOMEM);
@@ -1708,6 +1705,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
+ dic->sbi = sbi;
+ dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm;
atomic_set(&dic->remaining_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
@@ -1721,7 +1720,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->rpages[i] = cc->rpages[i];
dic->nr_rpages = cc->cluster_size;
- dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
+ dic->cpages = page_array_alloc(sbi, dic->nr_cpages);
if (!dic->cpages) {
ret = -ENOMEM;
goto out_free;
@@ -1751,6 +1750,8 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
bool bypass_destroy_callback)
{
int i;
+ /* use sbi in dic to avoid UFA of dic->inode*/
+ struct f2fs_sb_info *sbi = dic->sbi;
f2fs_release_decomp_mem(dic, bypass_destroy_callback, true);
@@ -1762,7 +1763,7 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
continue;
f2fs_compress_free_page(dic->tpages[i]);
}
- page_array_free(dic->inode, dic->tpages, dic->cluster_size);
+ page_array_free(sbi, dic->tpages, dic->cluster_size);
}
if (dic->cpages) {
@@ -1771,10 +1772,10 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic,
continue;
f2fs_compress_free_page(dic->cpages[i]);
}
- page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
+ page_array_free(sbi, dic->cpages, dic->nr_cpages);
}
- page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
+ page_array_free(sbi, dic->rpages, dic->nr_rpages);
kmem_cache_free(dic_entry_slab, dic);
}
@@ -1793,8 +1794,7 @@ static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task)
f2fs_free_dic(dic, false);
} else {
INIT_WORK(&dic->free_work, f2fs_late_free_dic);
- queue_work(F2FS_I_SB(dic->inode)->post_read_wq,
- &dic->free_work);
+ queue_work(dic->sbi->post_read_wq, &dic->free_work);
}
}
}
@@ -1921,8 +1921,8 @@ void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1);
}
-void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
- nid_t ino, block_t blkaddr)
+static void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
+ struct folio *folio, nid_t ino, block_t blkaddr)
{
struct folio *cfolio;
int ret;
@@ -1953,9 +1953,9 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
return;
}
- set_page_private_data(&cfolio->page, ino);
+ folio_set_f2fs_data(cfolio, ino);
- memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE);
+ memcpy(folio_address(cfolio), folio_address(folio), PAGE_SIZE);
folio_mark_uptodate(cfolio);
f2fs_folio_put(cfolio, true);
}
@@ -2012,7 +2012,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino)
continue;
}
- if (ino != get_page_private_data(&folio->page)) {
+ if (ino != folio_get_f2fs_data(folio)) {
folio_unlock(folio);
continue;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 711ad80b38d0..7961e0ddfca3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -47,14 +47,14 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}
-bool f2fs_is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(const struct folio *folio)
{
- struct address_space *mapping = page_folio(page)->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *inode;
struct f2fs_sb_info *sbi;
- if (fscrypt_is_bounce_page(page))
- return page_private_gcing(fscrypt_pagecache_page(page));
+ if (fscrypt_is_bounce_folio(folio))
+ return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio));
inode = mapping->host;
sbi = F2FS_I_SB(inode);
@@ -65,7 +65,7 @@ bool f2fs_is_cp_guaranteed(struct page *page)
return true;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
- page_private_gcing(page))
+ folio_test_f2fs_gcing(folio))
return true;
return false;
}
@@ -142,9 +142,9 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
bio_for_each_folio_all(fi, bio) {
struct folio *folio = fi.folio;
- if (f2fs_is_compressed_page(&folio->page)) {
+ if (f2fs_is_compressed_page(folio)) {
if (ctx && !ctx->decompression_attempted)
- f2fs_end_read_compressed_page(&folio->page, true, 0,
+ f2fs_end_read_compressed_page(folio, true, 0,
in_task);
f2fs_put_folio_dic(folio, in_task);
continue;
@@ -181,14 +181,13 @@ static void f2fs_verify_bio(struct work_struct *work)
* as those were handled separately by f2fs_end_read_compressed_page().
*/
if (may_have_compressed_pages) {
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
+ bio_for_each_folio_all(fi, bio) {
+ struct folio *folio = fi.folio;
- if (!f2fs_is_compressed_page(page) &&
- !fsverity_verify_page(page)) {
+ if (!f2fs_is_compressed_page(folio) &&
+ !fsverity_verify_page(&folio->page)) {
bio->bi_status = BLK_STS_IOERR;
break;
}
@@ -233,16 +232,15 @@ static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
bool in_task)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
bool all_compressed = true;
block_t blkaddr = ctx->fs_blkaddr;
- bio_for_each_segment_all(bv, ctx->bio, iter_all) {
- struct page *page = bv->bv_page;
+ bio_for_each_folio_all(fi, ctx->bio) {
+ struct folio *folio = fi.folio;
- if (f2fs_is_compressed_page(page))
- f2fs_end_read_compressed_page(page, false, blkaddr,
+ if (f2fs_is_compressed_page(folio))
+ f2fs_end_read_compressed_page(folio, false, blkaddr,
in_task);
else
all_compressed = false;
@@ -280,9 +278,9 @@ static void f2fs_post_read_work(struct work_struct *work)
static void f2fs_read_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
+ struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio));
struct bio_post_read_ctx *ctx;
- bool intask = in_task();
+ bool intask = in_task() && !irqs_disabled();
iostat_update_and_unbind_ctx(bio);
ctx = bio->bi_private;
@@ -339,13 +337,13 @@ static void f2fs_write_end_io(struct bio *bio)
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (f2fs_is_compressed_page(&folio->page)) {
- f2fs_compress_write_end_io(bio, &folio->page);
+ if (f2fs_is_compressed_page(folio)) {
+ f2fs_compress_write_end_io(bio, folio);
continue;
}
#endif
- type = WB_DATA_TYPE(&folio->page, false);
+ type = WB_DATA_TYPE(folio, false);
if (unlikely(bio->bi_status != BLK_STS_OK)) {
mapping_set_error(folio->mapping, -EIO);
@@ -355,12 +353,12 @@ static void f2fs_write_end_io(struct bio *bio)
}
f2fs_bug_on(sbi, is_node_folio(folio) &&
- folio->index != nid_of_node(&folio->page));
+ folio->index != nid_of_node(folio));
dec_page_count(sbi, type);
if (f2fs_in_warm_node_list(sbi, folio))
f2fs_del_fsync_node_entry(sbi, folio);
- clear_page_private_gcing(&folio->page);
+ folio_clear_f2fs_gcing(folio);
folio_end_writeback(folio);
}
if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
@@ -419,7 +417,6 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
{
unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
- struct folio *fio_folio = page_folio(fio->page);
unsigned int fua_flag, meta_flag, io_flag;
blk_opf_t op_flags = 0;
@@ -447,7 +444,7 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
op_flags |= REQ_FUA;
if (fio->type == DATA &&
- F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
+ F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
op_flags |= REQ_PRIO;
return op_flags;
@@ -546,14 +543,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
}
static bool __has_merged_page(struct bio *bio, struct inode *inode,
- struct page *page, nid_t ino)
+ struct folio *folio, nid_t ino)
{
struct folio_iter fi;
if (!bio)
return false;
- if (!inode && !page && !ino)
+ if (!inode && !folio && !ino)
return true;
bio_for_each_folio_all(fi, bio) {
@@ -564,7 +561,7 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode,
if (IS_ERR(target))
continue;
}
- if (f2fs_is_compressed_page(&target->page)) {
+ if (f2fs_is_compressed_page(target)) {
target = f2fs_compress_control_folio(target);
if (IS_ERR(target))
continue;
@@ -572,9 +569,9 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode,
if (inode && inode == target->mapping->host)
return true;
- if (page && page == &target->page)
+ if (folio && folio == target)
return true;
- if (ino && ino == ino_of_node(&target->page))
+ if (ino && ino == ino_of_node(target))
return true;
}
@@ -641,7 +638,7 @@ unlock_out:
}
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type, bool force)
{
enum temp_type temp;
@@ -653,7 +650,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
f2fs_down_read(&io->io_rwsem);
- ret = __has_merged_page(io->bio, inode, page, ino);
+ ret = __has_merged_page(io->bio, inode, folio, ino);
f2fs_up_read(&io->io_rwsem);
}
if (ret)
@@ -671,10 +668,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type)
{
- __submit_merged_write_cond(sbi, inode, page, ino, type, false);
+ __submit_merged_write_cond(sbi, inode, folio, ino, type, false);
}
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -691,7 +688,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio;
- struct folio *fio_folio = page_folio(fio->page);
+ struct folio *fio_folio = fio->folio;
struct folio *data_folio = fio->encrypted_page ?
page_folio(fio->encrypted_page) : fio_folio;
@@ -713,7 +710,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false));
+ __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false));
if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -779,7 +776,7 @@ static void del_bio_entry(struct bio_entry *be)
static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
struct page *page)
{
- struct folio *fio_folio = page_folio(fio->page);
+ struct folio *fio_folio = fio->folio;
struct f2fs_sb_info *sbi = fio->sbi;
enum temp_type temp;
bool found = false;
@@ -848,7 +845,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
- &folio->page, 0);
+ folio, 0);
if (found)
break;
}
@@ -865,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
- &folio->page, 0);
+ folio, 0);
if (found) {
target = be->bio;
del_bio_entry(be);
@@ -886,15 +883,15 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
- struct page *page = fio->encrypted_page ?
- fio->encrypted_page : fio->page;
- struct folio *folio = page_folio(fio->page);
+ struct folio *data_folio = fio->encrypted_page ?
+ page_folio(fio->encrypted_page) : fio->folio;
+ struct folio *folio = fio->folio;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFSCORRUPTED;
- trace_f2fs_submit_folio_bio(page_folio(page), fio);
+ trace_f2fs_submit_folio_bio(data_folio, fio);
if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
fio->new_blkaddr))
@@ -905,16 +902,16 @@ alloc_new:
f2fs_set_bio_crypt_ctx(bio, folio->mapping->host,
folio->index, fio, GFP_NOIO);
- add_bio_entry(fio->sbi, bio, page, fio->temp);
+ add_bio_entry(fio->sbi, bio, &data_folio->page, fio->temp);
} else {
- if (add_ipu_page(fio, &bio, page))
+ if (add_ipu_page(fio, &bio, &data_folio->page))
goto alloc_new;
}
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
- inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -949,7 +946,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
- struct page *bio_page;
+ struct folio *bio_folio;
enum count_type type;
f2fs_bug_on(sbi, is_read_io(fio->op));
@@ -980,44 +977,44 @@ next:
verify_fio_blkaddr(fio);
if (fio->encrypted_page)
- bio_page = fio->encrypted_page;
+ bio_folio = page_folio(fio->encrypted_page);
else if (fio->compressed_page)
- bio_page = fio->compressed_page;
+ bio_folio = page_folio(fio->compressed_page);
else
- bio_page = fio->page;
+ bio_folio = fio->folio;
/* set submitted = true as a return value */
fio->submitted = 1;
- type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ type = WB_DATA_TYPE(bio_folio, fio->compressed_page);
inc_page_count(sbi, type);
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
fio->new_blkaddr) ||
!f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio),
- page_folio(bio_page)->index, fio)))
+ bio_folio->index, fio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
io->bio = __bio_alloc(fio, BIO_MAX_VECS);
f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio),
- page_folio(bio_page)->index, fio, GFP_NOIO);
+ bio_folio->index, fio, GFP_NOIO);
io->fio = *fio;
}
- if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) {
__submit_merged_bio(io);
goto alloc_new;
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
- PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, fio->folio,
+ folio_size(fio->folio));
io->last_block_in_bio = fio->new_blkaddr;
- trace_f2fs_submit_folio_write(page_folio(fio->page), fio);
+ trace_f2fs_submit_folio_write(fio->folio, fio);
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
@@ -1553,10 +1550,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
unsigned int start_pgofs;
int bidx = 0;
bool is_hole;
+ bool lfs_dio_write;
if (!maxblocks)
return 0;
+ lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
+ map->m_may_create);
+
if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
goto out;
@@ -1572,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
end = pgofs + maxblocks;
next_dnode:
- if (map->m_may_create)
+ if (map->m_may_create) {
+ if (f2fs_lfs_mode(sbi))
+ f2fs_balance_fs(sbi, true);
f2fs_map_lock(sbi, flag);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1589,7 +1593,7 @@ next_dnode:
start_pgofs = pgofs;
prealloc = 0;
last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
next_block:
blkaddr = f2fs_data_blkaddr(&dn);
@@ -1603,7 +1607,7 @@ next_block:
/* use out-place-update for direct IO under LFS mode */
if (map->m_may_create && (is_hole ||
(flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
- !f2fs_is_pinned_file(inode)))) {
+ !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto sync_out;
@@ -1687,10 +1691,15 @@ next_block:
if (map->m_multidev_dio)
map->m_bdev = FDEV(bidx).bdev;
+
+ if (lfs_dio_write)
+ map->m_last_pblk = NULL_ADDR;
} else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
ofs++;
map->m_len++;
} else {
+ if (lfs_dio_write && !f2fs_is_pinned_file(inode))
+ map->m_last_pblk = blkaddr;
goto sync_out;
}
@@ -1715,14 +1724,6 @@ skip:
dn.ofs_in_node = end_offset;
}
- if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
- map->m_may_create) {
- /* the next block to be allocated may not be contiguous. */
- if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) ==
- CAP_BLKS_PER_SEC(sbi) - 1)
- goto sync_out;
- }
-
if (pgofs >= end)
goto sync_out;
else if (dn.ofs_in_node < end_offset)
@@ -2303,7 +2304,7 @@ submit_and_realloc:
}
if (!bio) {
- bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
+ bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages - i,
f2fs_ra_op_flags(rac),
folio->index, for_write);
if (IS_ERR(bio)) {
@@ -2376,6 +2377,14 @@ static int f2fs_mpage_readpages(struct inode *inode,
unsigned max_nr_pages = nr_pages;
int ret = 0;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (f2fs_compressed_file(inode)) {
+ index = rac ? readahead_index(rac) : folio->index;
+ max_nr_pages = round_up(index + nr_pages, cc.cluster_size) -
+ round_down(index, cc.cluster_size);
+ }
+#endif
+
map.m_pblk = 0;
map.m_lblk = 0;
map.m_len = 0;
@@ -2642,7 +2651,7 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
{
- struct folio *folio = page_folio(fio->page);
+ struct folio *folio = fio->folio;
struct inode *inode = folio->mapping->host;
struct dnode_of_data dn;
struct node_info ni;
@@ -2652,7 +2661,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* Use COW inode to make dnode_of_data for atomic write */
atomic_commit = f2fs_is_atomic_file(inode) &&
- page_private_atomic(folio_page(folio, 0));
+ folio_test_f2fs_atomic(folio);
if (atomic_commit)
set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
else
@@ -2683,7 +2692,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* This page is already truncated */
if (fio->old_blkaddr == NULL_ADDR) {
folio_clear_uptodate(folio);
- clear_page_private_gcing(folio_page(folio, 0));
+ folio_clear_f2fs_gcing(folio);
goto out_writepage;
}
got_it:
@@ -2753,7 +2762,7 @@ got_it:
trace_f2fs_do_write_data_page(folio, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (atomic_commit)
- clear_page_private_atomic(folio_page(folio, 0));
+ folio_clear_f2fs_atomic(folio);
out_writepage:
f2fs_put_dnode(&dn);
out:
@@ -2771,7 +2780,6 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted,
bool allow_balance)
{
struct inode *inode = folio->mapping->host;
- struct page *page = folio_page(folio, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long)i_size)
@@ -2788,7 +2796,7 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
.old_blkaddr = NULL_ADDR,
- .page = page,
+ .folio = folio,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
@@ -2890,7 +2898,7 @@ out:
inode_dec_dirty_pages(inode);
if (err) {
folio_clear_uptodate(folio);
- clear_page_private_gcing(page);
+ folio_clear_f2fs_gcing(folio);
}
folio_unlock(folio);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
@@ -3376,7 +3384,7 @@ restart:
f2fs_do_read_inline_data(folio, ifolio);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
- set_page_private_inline(&ifolio->page);
+ folio_set_f2fs_inline(ifolio);
goto out;
}
err = f2fs_convert_inline_folio(&dn, folio);
@@ -3698,7 +3706,7 @@ static int f2fs_write_end(const struct kiocb *iocb,
folio_mark_dirty(folio);
if (f2fs_is_atomic_file(inode))
- set_page_private_atomic(folio_page(folio, 0));
+ folio_set_f2fs_atomic(folio);
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
@@ -3733,7 +3741,7 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
f2fs_remove_dirty_inode(inode);
}
}
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
}
bool f2fs_release_folio(struct folio *folio, gfp_t wait)
@@ -3742,7 +3750,7 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait)
if (folio_test_dirty(folio))
return false;
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
return true;
}
@@ -4160,7 +4168,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap,
struct iomap *srcmap)
{
- struct f2fs_map_blocks map = {};
+ struct f2fs_map_blocks map = { NULL, };
pgoff_t next_pgofs = 0;
int err;
@@ -4169,6 +4177,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
map.m_next_pgofs = &next_pgofs;
map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
+ if (flags & IOMAP_WRITE && iomap->private) {
+ map.m_last_pblk = (unsigned long)iomap->private;
+ iomap->private = NULL;
+ }
/*
* If the blocks being overwritten are already allocated,
@@ -4207,6 +4219,9 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->flags |= IOMAP_F_MERGED;
iomap->bdev = map.m_bdev;
iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
+
+ if (flags & IOMAP_WRITE && map.m_last_pblk)
+ iomap->private = (void *)map.m_last_pblk;
} else {
if (flags & IOMAP_WRITE)
return -ENOTBLK;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 16c2dfb4f595..43a83bbd3bc5 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -21,7 +21,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
-static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
+static DEFINE_SPINLOCK(f2fs_stat_lock);
#ifdef CONFIG_DEBUG_FS
static struct dentry *f2fs_debugfs_root;
#endif
@@ -91,7 +91,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi)
seg_blks = get_seg_entry(sbi, j)->valid_blocks;
/* update segment stats */
- if (IS_CURSEG(sbi, j))
+ if (is_curseg(sbi, j))
dev_stats[i].devstats[0][DEVSTAT_INUSE]++;
else if (seg_blks == BLKS_PER_SEG(sbi))
dev_stats[i].devstats[0][DEVSTAT_FULL]++;
@@ -109,7 +109,7 @@ static void update_multidevice_stats(struct f2fs_sb_info *sbi)
sec_blks = get_sec_entry(sbi, j)->valid_blocks;
/* update section stats */
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j)))
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, j)))
dev_stats[i].devstats[1][DEVSTAT_INUSE]++;
else if (sec_blks == BLKS_PER_SEC(sbi))
dev_stats[i].devstats[1][DEVSTAT_FULL]++;
@@ -439,9 +439,8 @@ static int stat_show(struct seq_file *s, void *v)
{
struct f2fs_stat_info *si;
int i = 0, j = 0;
- unsigned long flags;
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
struct f2fs_sb_info *sbi = si->sbi;
@@ -753,7 +752,7 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
return 0;
}
@@ -765,7 +764,6 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
struct f2fs_dev_stats *dev_stats;
- unsigned long flags;
int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -817,9 +815,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->max_aw_cnt, 0);
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_add_tail(&si->stat_list, &f2fs_stat_list);
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
return 0;
}
@@ -827,11 +825,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
- unsigned long flags;
- raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
+ spin_lock(&f2fs_stat_lock);
list_del(&si->stat_list);
- raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
+ spin_unlock(&f2fs_stat_lock);
kfree(si->dev_stats);
kfree(si);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c36b3b22bfff..fffd7749d6d1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -454,7 +454,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode,
f2fs_folio_wait_writeback(ifolio, NODE, true, true);
/* copy name info. to this inode folio */
- ri = F2FS_INODE(&ifolio->page);
+ ri = F2FS_INODE(ifolio);
ri->i_namelen = cpu_to_le32(fname->disk_name.len);
memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
if (IS_ENCRYPTED(dir)) {
@@ -897,7 +897,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio,
f2fs_clear_page_cache_dirty_tag(folio);
folio_clear_dirty_for_io(folio);
folio_clear_uptodate(folio);
- clear_page_private_all(&folio->page);
+ folio_detach_private(folio);
inode_dec_dirty_pages(dir);
f2fs_remove_dirty_inode(dir);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index cfe925a3d555..199c1e7a83ef 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -19,10 +19,10 @@
#include "node.h"
#include <trace/events/f2fs.h>
-bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
+bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
+ struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
struct extent_info ei;
int devi;
@@ -411,10 +411,10 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
- struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext;
+ struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
struct extent_tree *et;
struct extent_node *en;
- struct extent_info ei;
+ struct extent_info ei = {0};
if (!__may_extent_tree(inode, EX_READ)) {
/* drop largest read extent */
@@ -934,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ
if (!__may_extent_tree(dn->inode, type))
return;
- ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) +
+ ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) +
dn->ofs_in_node;
ei.len = 1;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c78464792ceb..46be7560548c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -386,7 +386,7 @@ struct discard_cmd {
struct rb_node rb_node; /* rb node located in rb-tree */
struct discard_info di; /* discard info */
struct list_head list; /* command list */
- struct completion wait; /* compleation */
+ struct completion wait; /* completion */
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
@@ -732,6 +732,7 @@ struct f2fs_map_blocks {
block_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
+ unsigned long m_last_pblk; /* last allocated block, only used for DIO in LFS mode */
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
@@ -875,6 +876,7 @@ struct f2fs_inode_info {
/* linked in global inode list for cache donation */
struct list_head gdonate_list;
pgoff_t donate_start, donate_end; /* inclusive */
+ atomic_t open_count; /* # of open files */
struct task_struct *atomic_write_task; /* store atomic write task */
struct extent_tree *extent_tree[NR_EXTENT_CACHES];
@@ -1123,8 +1125,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p, f) \
- (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(folio, f) \
+ (f || f2fs_is_cp_guaranteed(folio) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -1240,7 +1242,10 @@ struct f2fs_io_info {
blk_opf_t op_flags; /* req_flag_bits */
block_t new_blkaddr; /* new block address to be written */
block_t old_blkaddr; /* old block address before Cow */
- struct page *page; /* page to be written */
+ union {
+ struct page *page; /* page to be written */
+ struct folio *folio;
+ };
struct page *encrypted_page; /* encrypted page */
struct page *compressed_page; /* compressed page */
struct list_head list; /* serialize IOs */
@@ -1286,7 +1291,7 @@ struct f2fs_bio_info {
struct f2fs_dev_info {
struct file *bdev_file;
struct block_device *bdev;
- char path[MAX_PATH_LEN];
+ char path[MAX_PATH_LEN + 1];
unsigned int total_segments;
block_t start_blk;
block_t end_blk;
@@ -1427,7 +1432,7 @@ enum {
enum {
MEMORY_MODE_NORMAL, /* memory mode for normal devices */
- MEMORY_MODE_LOW, /* memory mode for low memry devices */
+ MEMORY_MODE_LOW, /* memory mode for low memory devices */
};
enum errors_option {
@@ -1491,7 +1496,7 @@ enum compress_flag {
#define COMPRESS_DATA_RESERVED_SIZE 4
struct compress_data {
__le32 clen; /* compressed data size */
- __le32 chksum; /* compressed data chksum */
+ __le32 chksum; /* compressed data checksum */
__le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */
u8 cdata[]; /* compressed data */
};
@@ -1536,6 +1541,7 @@ struct compress_io_ctx {
struct decompress_io_ctx {
u32 magic; /* magic number to indicate page is compressed */
struct inode *inode; /* inode the context belong to */
+ struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
pgoff_t cluster_idx; /* cluster index number */
unsigned int cluster_size; /* page count in cluster */
unsigned int log_cluster_size; /* log of cluster size */
@@ -1576,6 +1582,7 @@ struct decompress_io_ctx {
bool failed; /* IO error occurred before decompression? */
bool need_verity; /* need fs-verity verification after decompression? */
+ unsigned char compress_algorithm; /* backup algorithm type */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
struct work_struct verity_work; /* work to verify the decompressed pages */
@@ -1724,6 +1731,9 @@ struct f2fs_sb_info {
/* for skip statistic */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
+ /* free sections reserved for pinned file */
+ unsigned int reserved_pin_section;
+
/* threshold for gc trials on pinned files */
unsigned short gc_pin_file_threshold;
struct f2fs_rwsem pin_sem;
@@ -2013,16 +2023,11 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
return F2FS_I_SB(mapping->host);
}
-static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio)
+static inline struct f2fs_sb_info *F2FS_F_SB(const struct folio *folio)
{
return F2FS_M_SB(folio->mapping);
}
-static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
-{
- return F2FS_F_SB(page_folio(page));
-}
-
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
{
return (struct f2fs_super_block *)(sbi->raw_super);
@@ -2043,14 +2048,14 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
return (struct f2fs_checkpoint *)(sbi->ckpt);
}
-static inline struct f2fs_node *F2FS_NODE(const struct page *page)
+static inline struct f2fs_node *F2FS_NODE(const struct folio *folio)
{
- return (struct f2fs_node *)page_address(page);
+ return (struct f2fs_node *)folio_address(folio);
}
-static inline struct f2fs_inode *F2FS_INODE(struct page *page)
+static inline struct f2fs_inode *F2FS_INODE(const struct folio *folio)
{
- return &((struct f2fs_node *)page_address(page))->i;
+ return &((struct f2fs_node *)folio_address(folio))->i;
}
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
@@ -2453,6 +2458,13 @@ release_quota:
}
#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
+static inline bool folio_test_f2fs_##name(const struct folio *folio) \
+{ \
+ unsigned long priv = (unsigned long)folio->private; \
+ unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \
+ (1UL << PAGE_PRIVATE_##flagname); \
+ return (priv & v) == v; \
+} \
static inline bool page_private_##name(struct page *page) \
{ \
return PagePrivate(page) && \
@@ -2461,6 +2473,17 @@ static inline bool page_private_##name(struct page *page) \
}
#define PAGE_PRIVATE_SET_FUNC(name, flagname) \
+static inline void folio_set_f2fs_##name(struct folio *folio) \
+{ \
+ unsigned long v = (1UL << PAGE_PRIVATE_NOT_POINTER) | \
+ (1UL << PAGE_PRIVATE_##flagname); \
+ if (!folio->private) \
+ folio_attach_private(folio, (void *)v); \
+ else { \
+ v |= (unsigned long)folio->private; \
+ folio->private = (void *)v; \
+ } \
+} \
static inline void set_page_private_##name(struct page *page) \
{ \
if (!PagePrivate(page)) \
@@ -2470,6 +2493,16 @@ static inline void set_page_private_##name(struct page *page) \
}
#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \
+static inline void folio_clear_f2fs_##name(struct folio *folio) \
+{ \
+ unsigned long v = (unsigned long)folio->private; \
+ \
+ v &= ~(1UL << PAGE_PRIVATE_##flagname); \
+ if (v == (1UL << PAGE_PRIVATE_NOT_POINTER)) \
+ folio_detach_private(folio); \
+ else \
+ folio->private = (void *)v; \
+} \
static inline void clear_page_private_##name(struct page *page) \
{ \
clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
@@ -2492,39 +2525,23 @@ PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE);
-static inline unsigned long get_page_private_data(struct page *page)
+static inline unsigned long folio_get_f2fs_data(struct folio *folio)
{
- unsigned long data = page_private(page);
+ unsigned long data = (unsigned long)folio->private;
if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data))
return 0;
return data >> PAGE_PRIVATE_MAX;
}
-static inline void set_page_private_data(struct page *page, unsigned long data)
+static inline void folio_set_f2fs_data(struct folio *folio, unsigned long data)
{
- if (!PagePrivate(page))
- attach_page_private(page, (void *)0);
- set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
- page_private(page) |= data << PAGE_PRIVATE_MAX;
-}
-
-static inline void clear_page_private_data(struct page *page)
-{
- page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0);
- if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER))
- detach_page_private(page);
-}
+ data = (1UL << PAGE_PRIVATE_NOT_POINTER) | (data << PAGE_PRIVATE_MAX);
-static inline void clear_page_private_all(struct page *page)
-{
- clear_page_private_data(page);
- clear_page_private_reference(page);
- clear_page_private_gcing(page);
- clear_page_private_inline(page);
- clear_page_private_atomic(page);
-
- f2fs_bug_on(F2FS_P_SB(page), page_private(page));
+ if (!folio_test_private(folio))
+ folio_attach_private(folio, (void *)data);
+ else
+ folio->private = (void *)((unsigned long)folio->private | data);
}
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
@@ -3011,9 +3028,9 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
-static inline bool IS_INODE(struct page *page)
+static inline bool IS_INODE(const struct folio *folio)
{
- struct f2fs_node *p = F2FS_NODE(page);
+ struct f2fs_node *p = F2FS_NODE(folio);
return RAW_IS_INODE(p);
}
@@ -3031,20 +3048,20 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
static inline int f2fs_has_extra_attr(struct inode *inode);
static inline unsigned int get_dnode_base(struct inode *inode,
- struct page *node_page)
+ struct folio *node_folio)
{
- if (!IS_INODE(node_page))
+ if (!IS_INODE(node_folio))
return 0;
return inode ? get_extra_isize(inode) :
- offset_in_addr(&F2FS_NODE(node_page)->i);
+ offset_in_addr(&F2FS_NODE(node_folio)->i);
}
static inline __le32 *get_dnode_addr(struct inode *inode,
struct folio *node_folio)
{
- return blkaddr_in_node(F2FS_NODE(&node_folio->page)) +
- get_dnode_base(inode, &node_folio->page);
+ return blkaddr_in_node(F2FS_NODE(node_folio)) +
+ get_dnode_base(inode, node_folio);
}
static inline block_t data_blkaddr(struct inode *inode,
@@ -3366,9 +3383,10 @@ static inline unsigned int addrs_per_page(struct inode *inode,
return addrs;
}
-static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio)
+static inline
+void *inline_xattr_addr(struct inode *inode, const struct folio *folio)
{
- struct f2fs_inode *ri = F2FS_INODE(&folio->page);
+ struct f2fs_inode *ri = F2FS_INODE(folio);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
get_inline_xattr_addrs(inode)]);
@@ -3628,13 +3646,14 @@ int f2fs_pin_file_control(struct inode *inode, bool inc);
*/
void f2fs_set_inode_flags(struct inode *inode);
bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio);
-void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio);
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_update_inode(struct inode *inode, struct folio *node_folio);
void f2fs_update_inode_page(struct inode *inode);
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
+void f2fs_remove_donate_inode(struct inode *inode);
void f2fs_evict_inode(struct inode *inode);
void f2fs_handle_failed_inode(struct inode *inode);
@@ -3784,8 +3803,8 @@ void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio);
-int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
-int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio);
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
@@ -3852,7 +3871,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
bool recover_newaddr);
enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi,
enum log_type seg_type);
-int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
@@ -3886,7 +3905,7 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi,
static inline struct inode *fio_inode(struct f2fs_io_info *fio)
{
- return page_folio(fio->page)->mapping->host;
+ return fio->folio->mapping->host;
}
#define DEF_FRAGMENT_SIZE 4
@@ -3953,7 +3972,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
-bool f2fs_is_cp_guaranteed(struct page *page);
+bool f2fs_is_cp_guaranteed(const struct folio *folio);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
@@ -3961,7 +3980,7 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
- struct inode *inode, struct page *page,
+ struct inode *inode, struct folio *folio,
nid_t ino, enum page_type type);
void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
struct bio **bio, struct folio *folio);
@@ -4303,7 +4322,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
* inline.c
*/
bool f2fs_may_inline_data(struct inode *inode);
-bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage);
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio);
bool f2fs_may_inline_dentry(struct inode *inode);
void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio);
void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio,
@@ -4345,7 +4364,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
/*
* extent_cache.c
*/
-bool sanity_check_extent_cache(struct inode *inode, struct page *ipage);
+bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio);
void f2fs_init_extent_tree(struct inode *inode);
void f2fs_drop_extent_tree(struct inode *inode);
void f2fs_destroy_extent_node(struct inode *inode);
@@ -4435,20 +4454,20 @@ enum cluster_check_type {
CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */
CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */
};
-bool f2fs_is_compressed_page(struct page *page);
+bool f2fs_is_compressed_page(struct folio *folio);
struct folio *f2fs_compress_control_folio(struct folio *folio);
int f2fs_prepare_compress_overwrite(struct inode *inode,
struct page **pagep, pgoff_t index, void **fsdata);
bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
pgoff_t index, unsigned copied);
int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
-void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
+void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio);
bool f2fs_is_compress_backend_ready(struct inode *inode);
bool f2fs_is_compress_level_valid(int alg, int lvl);
int __init f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
-void f2fs_end_read_compressed_page(struct page *page, bool failed,
+void f2fs_end_read_compressed_page(struct folio *folio, bool failed,
block_t blkaddr, bool in_task);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
@@ -4486,8 +4505,6 @@ void f2fs_destroy_compress_cache(void);
struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi);
void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
block_t blkaddr, unsigned int len);
-void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
- nid_t ino, block_t blkaddr);
bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio,
block_t blkaddr);
void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino);
@@ -4504,7 +4521,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino);
sbi->compr_saved_block += diff; \
} while (0)
#else
-static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
+static inline bool f2fs_is_compressed_page(struct folio *folio) { return false; }
static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
{
if (!f2fs_compressed_file(inode))
@@ -4522,7 +4539,7 @@ static inline int __init f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic,
bool in_task) { }
-static inline void f2fs_end_read_compressed_page(struct page *page,
+static inline void f2fs_end_read_compressed_page(struct folio *folio,
bool failed, block_t blkaddr, bool in_task)
{
WARN_ON_ONCE(1);
@@ -4542,8 +4559,6 @@ static inline int __init f2fs_init_compress_cache(void) { return 0; }
static inline void f2fs_destroy_compress_cache(void) { }
static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi,
block_t blkaddr, unsigned int len) { }
-static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi,
- struct page *page, nid_t ino, block_t blkaddr) { }
static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi,
struct folio *folio, block_t blkaddr) { return false; }
static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c677230699fd..42faaed6a02d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -489,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
}
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
@@ -629,7 +629,10 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
- return finish_preallocate_blocks(inode);
+ err = finish_preallocate_blocks(inode);
+ if (!err)
+ atomic_inc(&F2FS_I(inode)->open_count);
+ return err;
}
void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -708,7 +711,7 @@ next:
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
- fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page),
+ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio),
dn->inode) + ofs;
f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
f2fs_update_age_extent_cache_range(dn, fofs, len);
@@ -815,12 +818,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
goto out;
}
- count = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ count = ADDRS_PER_PAGE(dn.node_folio, inode);
count -= dn.ofs_in_node;
f2fs_bug_on(sbi, count < 0);
- if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) {
+ if (dn.ofs_in_node || IS_INODE(dn.node_folio)) {
f2fs_truncate_data_blocks_range(&dn, count);
free_from += count;
}
@@ -1043,11 +1046,24 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ err = setattr_prepare(idmap, dentry, attr);
+ if (err)
+ return err;
+
+ err = fscrypt_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
+ err = fsverity_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
@@ -1064,20 +1080,19 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
!IS_ALIGNED(attr->ia_size,
F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
return -EINVAL;
+ /*
+ * To prevent scattered pin block generation, we don't allow
+ * smaller/equal size unaligned truncation for pinned file.
+ * We only support overwrite IO to pinned file, so don't
+ * care about larger size truncation.
+ */
+ if (f2fs_is_pinned_file(inode) &&
+ attr->ia_size <= i_size_read(inode) &&
+ !IS_ALIGNED(attr->ia_size,
+ F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi))))
+ return -EINVAL;
}
- err = setattr_prepare(idmap, dentry, attr);
- if (err)
- return err;
-
- err = fscrypt_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
- err = fsverity_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
if (is_quota_modification(idmap, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
@@ -1085,12 +1100,11 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
}
if (i_uid_needs_update(idmap, attr, inode) ||
i_gid_needs_update(idmap, attr, inode)) {
- f2fs_lock_op(F2FS_I_SB(inode));
+ f2fs_lock_op(sbi);
err = dquot_transfer(idmap, inode, attr);
if (err) {
- set_sbi_flag(F2FS_I_SB(inode),
- SBI_QUOTA_NEED_REPAIR);
- f2fs_unlock_op(F2FS_I_SB(inode));
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ f2fs_unlock_op(sbi);
return err;
}
/*
@@ -1100,7 +1114,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
i_uid_update(idmap, attr, inode);
i_gid_update(idmap, attr, inode);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_unlock_op(F2FS_I_SB(inode));
+ f2fs_unlock_op(sbi);
}
if (attr->ia_valid & ATTR_SIZE) {
@@ -1163,7 +1177,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
return err;
}
@@ -1223,7 +1237,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
return err;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
@@ -1322,7 +1336,7 @@ next_dnode:
goto next;
}
- done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) -
+ done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) -
dn.ofs_in_node, len);
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = f2fs_data_blkaddr(&dn);
@@ -1411,7 +1425,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
}
ilen = min((pgoff_t)
- ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) -
+ ADDRS_PER_PAGE(dn.node_folio, dst_inode) -
dn.ofs_in_node, len - i);
do {
dn.data_blkaddr = f2fs_data_blkaddr(&dn);
@@ -1453,7 +1467,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE);
folio_mark_dirty(fdst);
- set_page_private_gcing(&fdst->page);
+ folio_set_f2fs_gcing(fdst);
f2fs_folio_put(fdst, true);
f2fs_folio_put(fsrc, true);
@@ -1707,7 +1721,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
goto out;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
end = min(pg_end, end_offset - dn.ofs_in_node + index);
ret = f2fs_do_zero_range(&dn, index, end);
@@ -1888,9 +1902,8 @@ next_alloc:
}
}
- if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ?
- ZONED_PIN_SEC_REQUIRED_COUNT :
- GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
+ if (has_not_enough_free_secs(sbi, 0,
+ sbi->reserved_pin_section)) {
f2fs_down_write(&sbi->gc_lock);
stat_inc_gc_call_count(sbi, FOREGROUND);
err = f2fs_gc(sbi, &gc_control);
@@ -2028,6 +2041,9 @@ out:
static int f2fs_release_file(struct inode *inode, struct file *filp)
{
+ if (atomic_dec_and_test(&F2FS_I(inode)->open_count))
+ f2fs_remove_donate_inode(inode);
+
/*
* f2fs_release_file is called at every close calls. So we should
* not drop any inmemory pages by close called by other process.
@@ -2978,7 +2994,7 @@ do_map:
f2fs_folio_wait_writeback(folio, DATA, true, true);
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
f2fs_folio_put(folio, true);
idx++;
@@ -3876,7 +3892,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
break;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
count = round_up(count, fi->i_cluster_size);
@@ -4054,7 +4070,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
break;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
count = round_up(count, fi->i_cluster_size);
@@ -4218,7 +4234,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
goto out;
}
- end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode);
+ end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
count = min(end_offset - dn.ofs_in_node, pg_end - index);
for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
struct block_device *cur_bdev;
@@ -4415,7 +4431,7 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
f2fs_folio_wait_writeback(folio, DATA, true, true);
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
redirty_idx = folio_next_index(folio);
folio_unlock(folio);
folio_put_refs(folio, 2);
@@ -4825,6 +4841,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
const loff_t pos = iocb->ki_pos;
ssize_t ret;
+ bool dio;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
@@ -4833,12 +4850,15 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
iov_iter_count(to), READ);
+ dio = f2fs_should_use_dio(inode, iocb, to);
+
/* In LFS mode, if there is inflight dio, wait for its completion */
if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
- get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE))
+ get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) &&
+ (!f2fs_is_pinned_file(inode) || !dio))
inode_dio_wait(inode);
- if (f2fs_should_use_dio(inode, iocb, to)) {
+ if (dio) {
ret = f2fs_dio_read_iter(iocb, to);
} else {
ret = filemap_read(iocb, to, 0);
@@ -4846,8 +4866,7 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_READ_IO, ret);
}
- if (trace_f2fs_dataread_end_enabled())
- trace_f2fs_dataread_end(inode, pos, ret);
+ trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}
@@ -4870,8 +4889,7 @@ static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_READ_IO, ret);
- if (trace_f2fs_dataread_end_enabled())
- trace_f2fs_dataread_end(inode, pos, ret);
+ trace_f2fs_dataread_end(inode, pos, ret);
return ret;
}
@@ -5216,8 +5234,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
f2fs_dio_write_iter(iocb, from, &may_need_sync) :
f2fs_buffered_write_iter(iocb, from);
- if (trace_f2fs_datawrite_end_enabled())
- trace_f2fs_datawrite_end(inode, orig_pos, ret);
+ trace_f2fs_datawrite_end(inode, orig_pos, ret);
}
/* Don't leave any preallocated blocks around past i_size. */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 3cb5242f4ddf..098e9f71421e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -141,10 +141,10 @@ do_gc:
FOREGROUND : BACKGROUND);
sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) ||
- gc_control.one_time;
+ (gc_control.one_time && gc_th->boost_gc_greedy);
/* foreground GC was been triggered via f2fs_balance_fs() */
- if (foreground)
+ if (foreground && !f2fs_sb_has_blkzoned(sbi))
sync_mode = false;
gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
@@ -197,6 +197,8 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
+ gc_th->boost_gc_multiple = BOOST_GC_MULTIPLE;
+ gc_th->boost_gc_greedy = GC_GREEDY;
if (f2fs_sb_has_blkzoned(sbi)) {
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED;
@@ -278,12 +280,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- if (p->alloc_mode == SSR) {
- p->gc_mode = GC_GREEDY;
- p->dirty_bitmap = dirty_i->dirty_segmap[type];
- p->max_search = dirty_i->nr_dirty[type];
- p->ofs_unit = 1;
- } else if (p->alloc_mode == AT_SSR) {
+ if (p->alloc_mode == SSR || p->alloc_mode == AT_SSR) {
p->gc_mode = GC_GREEDY;
p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
@@ -389,14 +386,15 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
- unsigned int segno, struct victim_sel_policy *p)
+ unsigned int segno, struct victim_sel_policy *p,
+ unsigned int valid_thresh_ratio)
{
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
- if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >=
- CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio /
- 100))
+ if (p->one_time_gc && (valid_thresh_ratio < 100) &&
+ (get_valid_blocks(sbi, segno, true) >=
+ CAP_BLKS_PER_SEC(sbi) * valid_thresh_ratio / 100))
return UINT_MAX;
/* alloc_mode == LFS */
@@ -777,6 +775,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
unsigned int secno, last_victim;
unsigned int last_segment;
unsigned int nsearched;
+ unsigned int valid_thresh_ratio = 100;
bool is_atgc;
int ret = 0;
@@ -786,7 +785,11 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
p.alloc_mode = alloc_mode;
p.age = age;
p.age_threshold = sbi->am.age_threshold;
- p.one_time_gc = one_time;
+ if (one_time) {
+ p.one_time_gc = one_time;
+ if (has_enough_free_secs(sbi, 0, NR_PERSISTENT_LOG))
+ valid_thresh_ratio = sbi->gc_thread->valid_thresh_ratio;
+ }
retry:
select_policy(sbi, gc_type, type, &p);
@@ -912,7 +915,7 @@ retry:
goto next;
}
- cost = get_gc_cost(sbi, segno, &p);
+ cost = get_gc_cost(sbi, segno, &p, valid_thresh_ratio);
if (p.min_cost > cost) {
p.min_segno = segno;
@@ -1162,8 +1165,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return false;
}
- if (IS_INODE(&node_folio->page)) {
- base = offset_in_addr(F2FS_INODE(&node_folio->page));
+ if (IS_INODE(node_folio)) {
+ base = offset_in_addr(F2FS_INODE(node_folio));
max_addrs = DEF_ADDRS_PER_INODE;
} else {
base = 0;
@@ -1177,7 +1180,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return false;
}
- *nofs = ofs_of_node(&node_folio->page);
+ *nofs = ofs_of_node(node_folio);
source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node);
f2fs_folio_put(node_folio, true);
@@ -1249,7 +1252,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
}
got_it:
/* read folio */
- fio.page = &folio->page;
+ fio.folio = folio;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
/*
@@ -1353,7 +1356,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
goto put_out;
/* read page */
- fio.page = &folio->page;
+ fio.folio = folio;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
if (lfs_mode)
@@ -1473,7 +1476,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
goto out;
}
folio_mark_dirty(folio);
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
} else {
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
@@ -1483,7 +1486,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.need_lock = LOCK_REQ,
.io_type = FS_GC_DATA_IO,
@@ -1499,11 +1502,11 @@ retry:
f2fs_remove_dirty_inode(inode);
}
- set_page_private_gcing(&folio->page);
+ folio_set_f2fs_gcing(folio);
err = f2fs_do_write_data_page(&fio);
if (err) {
- clear_page_private_gcing(&folio->page);
+ folio_clear_f2fs_gcing(folio);
if (err == -ENOMEM) {
memalloc_retry_wait(GFP_NOFS);
goto retry;
@@ -1749,7 +1752,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
!has_enough_free_blocks(sbi,
sbi->gc_thread->boost_zoned_gc_percent))
window_granularity *=
- BOOST_GC_MULTIPLE;
+ sbi->gc_thread->boost_gc_multiple;
end_segno = start_segno + window_granularity;
}
@@ -1891,6 +1894,7 @@ gc_more:
/* Let's run FG_GC, if we don't have enough space. */
if (has_not_enough_free_secs(sbi, 0, 0)) {
gc_type = FG_GC;
+ gc_control->one_time = false;
/*
* For example, if there are many prefree_segments below given
@@ -2064,7 +2068,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno)))
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
continue;
do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 5c1eaf55e127..24e8b1c27acc 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -68,6 +68,8 @@ struct f2fs_gc_kthread {
unsigned int no_zoned_gc_percent;
unsigned int boost_zoned_gc_percent;
unsigned int valid_thresh_ratio;
+ unsigned int boost_gc_multiple;
+ unsigned int boost_gc_greedy;
};
struct gc_inode_list {
@@ -194,6 +196,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi)
{
if (f2fs_sb_has_blkzoned(sbi))
- return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC);
+ return !has_enough_free_blocks(sbi,
+ sbi->gc_thread->boost_zoned_gc_percent);
return has_enough_invalid_blocks(sbi);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 901c630685ce..58ac831ef704 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -33,9 +33,9 @@ bool f2fs_may_inline_data(struct inode *inode)
return !f2fs_post_read_required(inode);
}
-static bool inode_has_blocks(struct inode *inode, struct page *ipage)
+static bool inode_has_blocks(struct inode *inode, struct folio *ifolio)
{
- struct f2fs_inode *ri = F2FS_INODE(ipage);
+ struct f2fs_inode *ri = F2FS_INODE(ifolio);
int i;
if (F2FS_HAS_BLOCKS(inode))
@@ -48,12 +48,12 @@ static bool inode_has_blocks(struct inode *inode, struct page *ipage)
return false;
}
-bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage)
+bool f2fs_sanity_check_inline_data(struct inode *inode, struct folio *ifolio)
{
if (!f2fs_has_inline_data(inode))
return false;
- if (inode_has_blocks(inode, ipage))
+ if (inode_has_blocks(inode, ifolio))
return false;
if (!support_inline_data(inode))
@@ -150,7 +150,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_PRIO,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
@@ -206,7 +206,7 @@ int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio)
/* clear inline data and flag after data writeback */
f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0);
- clear_page_private_inline(&dn->inode_folio->page);
+ folio_clear_f2fs_inline(dn->inode_folio);
clear_out:
stat_dec_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
@@ -286,7 +286,7 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio)
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
- clear_page_private_inline(&ifolio->page);
+ folio_clear_f2fs_inline(ifolio);
f2fs_folio_put(ifolio, 1);
return 0;
}
@@ -305,8 +305,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio)
* x o -> remove data blocks, and then recover inline_data
* x x -> recover data blocks
*/
- if (IS_INODE(&nfolio->page))
- ri = F2FS_INODE(&nfolio->page);
+ if (IS_INODE(nfolio))
+ ri = F2FS_INODE(nfolio);
if (f2fs_has_inline_data(inode) &&
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
@@ -825,7 +825,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ifolio) -
- (char *)F2FS_INODE(&ifolio->page);
+ (char *)F2FS_INODE(ifolio);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
out:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 083d52a42bfb..8c4eafe9ffac 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -108,7 +108,7 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio)
f2fs_folio_wait_writeback(ifolio, NODE, true, true);
set_inode_flag(inode, FI_DATA_EXIST);
- set_raw_inline(inode, F2FS_INODE(&ifolio->page));
+ set_raw_inline(inode, F2FS_INODE(ifolio));
folio_mark_dirty(ifolio);
return;
}
@@ -116,14 +116,15 @@ static void __recover_inline_status(struct inode *inode, struct folio *ifolio)
return;
}
-static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
+static
+bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_inode *ri = &F2FS_NODE(page)->i;
+ struct f2fs_inode *ri = &F2FS_NODE(folio)->i;
if (!f2fs_sb_has_inode_chksum(sbi))
return false;
- if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(folio) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
@@ -133,9 +134,9 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
return true;
}
-static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
+static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_node *node = F2FS_NODE(page);
+ struct f2fs_node *node = F2FS_NODE(folio);
struct f2fs_inode *ri = &node->i;
__le32 ino = node->footer.ino;
__le32 gen = ri->i_generation;
@@ -164,34 +165,34 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio)
return true;
#ifdef CONFIG_F2FS_CHECK_FS
- if (!f2fs_enable_inode_chksum(sbi, &folio->page))
+ if (!f2fs_enable_inode_chksum(sbi, folio))
#else
- if (!f2fs_enable_inode_chksum(sbi, &folio->page) ||
+ if (!f2fs_enable_inode_chksum(sbi, folio) ||
folio_test_dirty(folio) ||
folio_test_writeback(folio))
#endif
return true;
- ri = &F2FS_NODE(&folio->page)->i;
+ ri = &F2FS_NODE(folio)->i;
provided = le32_to_cpu(ri->i_inode_checksum);
- calculated = f2fs_inode_chksum(sbi, &folio->page);
+ calculated = f2fs_inode_chksum(sbi, folio);
if (provided != calculated)
f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
- folio->index, ino_of_node(&folio->page),
+ folio->index, ino_of_node(folio),
provided, calculated);
return provided == calculated;
}
-void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
+void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct folio *folio)
{
- struct f2fs_inode *ri = &F2FS_NODE(page)->i;
+ struct f2fs_inode *ri = &F2FS_NODE(folio)->i;
- if (!f2fs_enable_inode_chksum(sbi, page))
+ if (!f2fs_enable_inode_chksum(sbi, folio))
return;
- ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
+ ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, folio));
}
static bool sanity_check_compress_inode(struct inode *inode,
@@ -266,28 +267,28 @@ err_level:
return false;
}
-static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+static bool sanity_check_inode(struct inode *inode, struct folio *node_folio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_inode *ri = F2FS_INODE(node_page);
+ struct f2fs_inode *ri = F2FS_INODE(node_folio);
unsigned long long iblocks;
- iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ iblocks = le64_to_cpu(F2FS_INODE(node_folio)->i_blocks);
if (!iblocks) {
f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
__func__, inode->i_ino, iblocks);
return false;
}
- if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ if (ino_of_node(node_folio) != nid_of_node(node_folio)) {
f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
__func__, inode->i_ino,
- ino_of_node(node_page), nid_of_node(node_page));
+ ino_of_node(node_folio), nid_of_node(node_folio));
return false;
}
- if (ino_of_node(node_page) == fi->i_xattr_nid) {
+ if (ino_of_node(node_folio) == fi->i_xattr_nid) {
f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.",
__func__, inode->i_ino, fi->i_xattr_nid);
return false;
@@ -354,7 +355,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
- if (f2fs_sanity_check_inline_data(inode, node_page)) {
+ if (f2fs_sanity_check_inline_data(inode, node_folio)) {
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
@@ -419,7 +420,7 @@ static int do_read_inode(struct inode *inode)
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);
- ri = F2FS_INODE(&node_folio->page);
+ ri = F2FS_INODE(node_folio);
inode->i_mode = le16_to_cpu(ri->i_mode);
i_uid_write(inode, le32_to_cpu(ri->i_uid));
@@ -469,7 +470,7 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
- if (!sanity_check_inode(inode, &node_folio->page)) {
+ if (!sanity_check_inode(inode, node_folio)) {
f2fs_folio_put(node_folio, true);
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
@@ -481,9 +482,9 @@ static int do_read_inode(struct inode *inode)
__recover_inline_status(inode, node_folio);
/* try to recover cold bit for non-dir inode */
- if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) {
+ if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_folio)) {
f2fs_folio_wait_writeback(node_folio, NODE, true, true);
- set_cold_node(&node_folio->page, false);
+ set_cold_node(node_folio, false);
folio_mark_dirty(node_folio);
}
@@ -531,7 +532,7 @@ static int do_read_inode(struct inode *inode)
init_idisk_time(inode);
- if (!sanity_check_extent_cache(inode, &node_folio->page)) {
+ if (!sanity_check_extent_cache(inode, node_folio)) {
f2fs_folio_put(node_folio, true);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
@@ -669,7 +670,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio)
f2fs_inode_synced(inode);
- ri = F2FS_INODE(&node_folio->page);
+ ri = F2FS_INODE(node_folio);
ri->i_mode = cpu_to_le16(inode->i_mode);
ri->i_advise = fi->i_advise;
@@ -748,11 +749,11 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio)
/* deleted inode */
if (inode->i_nlink == 0)
- clear_page_private_inline(&node_folio->page);
+ folio_clear_f2fs_inline(node_folio);
init_idisk_time(inode);
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page);
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_folio);
#endif
}
@@ -820,7 +821,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0;
}
-static void f2fs_remove_donate_inode(struct inode *inode)
+void f2fs_remove_donate_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -933,6 +934,19 @@ retry:
f2fs_update_inode_page(inode);
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
+ /*
+ * If both f2fs_truncate() and f2fs_update_inode_page() failed
+ * due to fuzzed corrupted inode, call f2fs_inode_synced() to
+ * avoid triggering later f2fs_bug_on().
+ */
+ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+ f2fs_warn(sbi,
+ "f2fs_evict_inode: inode is dirty, ino:%lu",
+ inode->i_ino);
+ f2fs_inode_synced(inode);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
}
if (freeze_protected)
sb_end_intwrite(inode->i_sb);
@@ -949,8 +963,12 @@ no_delete:
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
- else
- f2fs_inode_synced(inode);
+
+ /*
+ * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META]
+ * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint.
+ */
+ f2fs_inode_synced(inode);
/* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */
if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 07e333ee21b7..b882771e4699 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1298,19 +1298,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page;
+ struct folio *folio;
const char *target;
if (!dentry)
return ERR_PTR(-ECHILD);
- page = read_mapping_page(inode->i_mapping, 0, NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ folio = read_mapping_folio(inode->i_mapping, 0, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
- target = fscrypt_get_symlink(inode, page_address(page),
+ target = fscrypt_get_symlink(inode, folio_address(folio),
inode->i_sb->s_blocksize, done);
- put_page(page);
+ folio_put(folio);
return target;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bfe104db284e..27743b93e186 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -135,7 +135,7 @@ static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid)
return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid));
}
-static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+static struct folio *get_next_nat_folio(struct f2fs_sb_info *sbi, nid_t nid)
{
struct folio *src_folio;
struct folio *dst_folio;
@@ -149,7 +149,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
/* get current nat block page with lock */
src_folio = get_current_nat_folio(sbi, nid);
if (IS_ERR(src_folio))
- return &src_folio->page;
+ return src_folio;
dst_folio = f2fs_grab_meta_folio(sbi, dst_off);
f2fs_bug_on(sbi, folio_test_dirty(src_folio));
@@ -161,7 +161,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
set_to_next_nat(nm_i, nid);
- return &dst_folio->page;
+ return dst_folio;
}
static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
@@ -185,7 +185,7 @@ static void __free_nat_entry(struct nat_entry *e)
/* must be locked by nat_tree_lock */
static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
- struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
+ struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail, bool init_dirty)
{
if (no_fail)
f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
@@ -195,6 +195,12 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+ if (init_dirty) {
+ INIT_LIST_HEAD(&ne->list);
+ nm_i->nat_cnt[TOTAL_NAT]++;
+ return ne;
+ }
+
spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
spin_unlock(&nm_i->nat_list_lock);
@@ -204,14 +210,17 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
return ne;
}
-static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
+static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n, bool for_dirty)
{
struct nat_entry *ne;
ne = radix_tree_lookup(&nm_i->nat_root, n);
- /* for recent accessed nat entry, move it to tail of lru list */
- if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ /*
+ * for recent accessed nat entry which will not be dirtied soon
+ * later, move it to tail of lru list.
+ */
+ if (ne && !get_nat_flag(ne, IS_DIRTY) && !for_dirty) {
spin_lock(&nm_i->nat_list_lock);
if (!list_empty(&ne->list))
list_move_tail(&ne->list, &nm_i->nat_entries);
@@ -256,7 +265,7 @@ static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
}
static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
- struct nat_entry *ne)
+ struct nat_entry *ne, bool init_dirty)
{
struct nat_entry_set *head;
bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
@@ -279,7 +288,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
goto refresh_list;
nm_i->nat_cnt[DIRTY_NAT]++;
- nm_i->nat_cnt[RECLAIMABLE_NAT]--;
+ if (!init_dirty)
+ nm_i->nat_cnt[RECLAIMABLE_NAT]--;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
spin_lock(&nm_i->nat_list_lock);
@@ -312,8 +322,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio)
{
- return is_node_folio(folio) && IS_DNODE(&folio->page) &&
- is_cold_node(&folio->page);
+ return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio);
}
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
@@ -384,7 +393,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
bool need = false;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
@@ -401,7 +410,7 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
bool is_cp = true;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
f2fs_up_read(&nm_i->nat_tree_lock);
@@ -415,7 +424,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
bool need_update = true;
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, ino);
+ e = __lookup_nat_cache(nm_i, ino, false);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
@@ -440,9 +449,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
return;
f2fs_down_write(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (!e)
- e = __init_nat_entry(nm_i, new, ne, false);
+ e = __init_nat_entry(nm_i, new, ne, false, false);
else
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
@@ -459,11 +468,13 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
+ bool init_dirty = false;
f2fs_down_write(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, ni->nid);
+ e = __lookup_nat_cache(nm_i, ni->nid, true);
if (!e) {
- e = __init_nat_entry(nm_i, new, NULL, true);
+ init_dirty = true;
+ e = __init_nat_entry(nm_i, new, NULL, true, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -499,11 +510,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
nat_set_blkaddr(e, new_blkaddr);
if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
- __set_nat_cache_dirty(nm_i, e);
+ __set_nat_cache_dirty(nm_i, e, init_dirty);
/* update fsync_mark if its inode nat entry is still alive */
if (ni->nid != ni->ino)
- e = __lookup_nat_cache(nm_i, ni->ino);
+ e = __lookup_nat_cache(nm_i, ni->ino, false);
if (e) {
if (fsync_done && ni->nid == ni->ino)
set_nat_flag(e, HAS_FSYNCED_INODE, true);
@@ -555,20 +566,24 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
- block_t blkaddr;
int i;
+ bool need_cache = true;
ni->flag = 0;
ni->nid = nid;
retry:
/* Check nat cache */
f2fs_down_read(&nm_i->nat_tree_lock);
- e = __lookup_nat_cache(nm_i, nid);
+ e = __lookup_nat_cache(nm_i, nid, false);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
f2fs_up_read(&nm_i->nat_tree_lock);
+ if (IS_ENABLED(CONFIG_F2FS_CHECK_FS)) {
+ need_cache = false;
+ goto sanity_check;
+ }
return 0;
}
@@ -594,7 +609,7 @@ retry:
up_read(&curseg->journal_rwsem);
if (i >= 0) {
f2fs_up_read(&nm_i->nat_tree_lock);
- goto cache;
+ goto sanity_check;
}
/* Fill node_info from nat page */
@@ -609,14 +624,23 @@ retry:
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
f2fs_folio_put(folio, true);
-cache:
- blkaddr = le32_to_cpu(ne.block_addr);
- if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
- return -EFAULT;
+sanity_check:
+ if (__is_valid_data_blkaddr(ni->blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni->blk_addr,
+ DATA_GENERIC_ENHANCE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_get_node_info of %pS: inconsistent nat entry, "
+ "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u",
+ __builtin_return_address(0),
+ ni->ino, ni->nid, ni->blk_addr, ni->version, ni->flag);
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT);
+ return -EFSCORRUPTED;
+ }
/* cache nat entry */
- cache_nat_entry(sbi, nid, &ne);
+ if (need_cache)
+ cache_nat_entry(sbi, nid, &ne);
return 0;
}
@@ -636,7 +660,7 @@ static void f2fs_ra_node_pages(struct folio *parent, int start, int n)
end = start + n;
end = min(end, (int)NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
- nid = get_nid(&parent->page, i, false);
+ nid = get_nid(parent, i, false);
f2fs_ra_node_page(sbi, nid);
}
@@ -795,7 +819,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
parent = nfolio[0];
if (level != 0)
- nids[1] = get_nid(&parent->page, offset[0], true);
+ nids[1] = get_nid(parent, offset[0], true);
dn->inode_folio = nfolio[0];
dn->inode_folio_locked = true;
@@ -803,6 +827,16 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
for (i = 1; i <= level; i++) {
bool done = false;
+ if (nids[i] && nids[i] == dn->inode->i_ino) {
+ err = -EFSCORRUPTED;
+ f2fs_err_ratelimited(sbi,
+ "inode mapping table is corrupted, run fsck to fix it, "
+ "ino:%lu, nid:%u, level:%d, offset:%d",
+ dn->inode->i_ino, nids[i], level, offset[level]);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto release_pages;
+ }
+
if (!nids[i] && mode == ALLOC_NODE) {
/* alloc new node */
if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
@@ -846,7 +880,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
if (i < level) {
parent = nfolio[i];
- nids[i + 1] = get_nid(&parent->page, offset[i], false);
+ nids[i + 1] = get_nid(parent, offset[i], false);
}
}
dn->nid = nids[level];
@@ -961,9 +995,9 @@ static int truncate_dnode(struct dnode_of_data *dn)
else if (IS_ERR(folio))
return PTR_ERR(folio);
- if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) {
+ if (IS_INODE(folio) || ino_of_node(folio) != dn->inode->i_ino) {
f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
- dn->inode->i_ino, dn->nid, ino_of_node(&folio->page));
+ dn->inode->i_ino, dn->nid, ino_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
f2fs_folio_put(folio, true);
@@ -1007,7 +1041,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK);
- rn = F2FS_NODE(&folio->page);
+ rn = F2FS_NODE(folio);
if (depth < 3) {
for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
child_nid = le32_to_cpu(rn->in.nid[i]);
@@ -1070,7 +1104,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
int i;
int idx = depth - 2;
- nid[0] = get_nid(&dn->inode_folio->page, offset[0], true);
+ nid[0] = get_nid(dn->inode_folio, offset[0], true);
if (!nid[0])
return 0;
@@ -1083,14 +1117,14 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
idx = i - 1;
goto fail;
}
- nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false);
+ nid[i + 1] = get_nid(folios[i], offset[i + 1], false);
}
f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK);
/* free direct nodes linked to a partial indirect node */
for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
- child_nid = get_nid(&folios[idx]->page, i, false);
+ child_nid = get_nid(folios[idx], i, false);
if (!child_nid)
continue;
dn->nid = child_nid;
@@ -1159,7 +1193,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
set_new_dnode(&dn, inode, folio, NULL, 0);
folio_unlock(folio);
- ri = F2FS_INODE(&folio->page);
+ ri = F2FS_INODE(folio);
switch (level) {
case 0:
case 1:
@@ -1188,7 +1222,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
skip_partial:
while (cont) {
- dn.nid = get_nid(&folio->page, offset[0], true);
+ dn.nid = get_nid(folio, offset[0], true);
switch (offset[0]) {
case NODE_DIR1_BLOCK:
case NODE_DIR2_BLOCK:
@@ -1220,7 +1254,7 @@ skip_partial:
}
if (err < 0)
goto fail;
- if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) {
+ if (offset[1] == 0 && get_nid(folio, offset[0], true)) {
folio_lock(folio);
BUG_ON(!is_node_folio(folio));
set_nid(folio, offset[0], 0, true);
@@ -1367,8 +1401,8 @@ struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs)
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
f2fs_folio_wait_writeback(folio, NODE, true, true);
- fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true);
- set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode));
+ fill_node_footer(folio, dn->nid, dn->inode->i_ino, ofs, true);
+ set_cold_node(folio, S_ISDIR(dn->inode->i_mode));
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
if (folio_mark_dirty(folio))
@@ -1400,7 +1434,7 @@ static int read_node_folio(struct folio *folio, blk_opf_t op_flags)
.type = NODE,
.op = REQ_OP_READ,
.op_flags = op_flags,
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
};
int err;
@@ -1462,17 +1496,15 @@ static int sanity_check_node_footer(struct f2fs_sb_info *sbi,
struct folio *folio, pgoff_t nid,
enum node_type ntype)
{
- struct page *page = &folio->page;
-
- if (unlikely(nid != nid_of_node(page) ||
- (ntype == NODE_TYPE_INODE && !IS_INODE(page)) ||
+ if (unlikely(nid != nid_of_node(folio) ||
+ (ntype == NODE_TYPE_INODE && !IS_INODE(folio)) ||
(ntype == NODE_TYPE_XATTR &&
- !f2fs_has_xattr_block(ofs_of_node(page))) ||
+ !f2fs_has_xattr_block(ofs_of_node(folio))) ||
time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) {
f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, "
"node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- ntype, nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
+ ntype, nid, nid_of_node(folio), ino_of_node(folio),
+ ofs_of_node(folio), cpver_of_node(folio),
next_blkaddr_of_node(folio));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
@@ -1553,7 +1585,7 @@ struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid)
static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_F_SB(parent);
- nid_t nid = get_nid(&parent->page, start, false);
+ nid_t nid = get_nid(parent, start, false);
return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR);
}
@@ -1618,9 +1650,9 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
return ERR_PTR(-EIO);
}
- if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page))
+ if (!IS_DNODE(folio) || !is_cold_node(folio))
continue;
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
continue;
folio_lock(folio);
@@ -1630,7 +1662,7 @@ continue_unlock:
folio_unlock(folio);
continue;
}
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
goto continue_unlock;
if (!folio_test_dirty(folio)) {
@@ -1660,11 +1692,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
- .ino = ino_of_node(&folio->page),
+ .ino = ino_of_node(folio),
.type = NODE,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
- .page = &folio->page,
+ .folio = folio,
.encrypted_page = NULL,
.submitted = 0,
.io_type = io_type,
@@ -1689,11 +1721,11 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
wbc->sync_mode == WB_SYNC_NONE &&
- IS_DNODE(&folio->page) && is_cold_node(&folio->page))
+ IS_DNODE(folio) && is_cold_node(folio))
goto redirty_out;
/* get old block addr of this node page */
- nid = nid_of_node(&folio->page);
+ nid = nid_of_node(folio);
f2fs_bug_on(sbi, folio->index != nid);
if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
@@ -1731,7 +1763,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page));
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(folio));
dec_page_count(sbi, F2FS_DIRTY_NODES);
f2fs_up_read(&sbi->node_write);
@@ -1827,9 +1859,9 @@ retry:
goto out;
}
- if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page))
+ if (!IS_DNODE(folio) || !is_cold_node(folio))
continue;
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
continue;
folio_lock(folio);
@@ -1839,7 +1871,7 @@ continue_unlock:
folio_unlock(folio);
continue;
}
- if (ino_of_node(&folio->page) != ino)
+ if (ino_of_node(folio) != ino)
goto continue_unlock;
if (!folio_test_dirty(folio) && folio != last_folio) {
@@ -1849,17 +1881,17 @@ continue_unlock:
f2fs_folio_wait_writeback(folio, NODE, true, true);
- set_fsync_mark(&folio->page, 0);
- set_dentry_mark(&folio->page, 0);
+ set_fsync_mark(folio, 0);
+ set_dentry_mark(folio, 0);
if (!atomic || folio == last_folio) {
- set_fsync_mark(&folio->page, 1);
+ set_fsync_mark(folio, 1);
percpu_counter_inc(&sbi->rf_node_block_count);
- if (IS_INODE(&folio->page)) {
+ if (IS_INODE(folio)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
f2fs_update_inode(inode, folio);
- set_dentry_mark(&folio->page,
+ set_dentry_mark(folio,
f2fs_need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
@@ -1935,7 +1967,7 @@ static bool flush_dirty_inode(struct folio *folio)
{
struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
struct inode *inode;
- nid_t ino = ino_of_node(&folio->page);
+ nid_t ino = ino_of_node(folio);
inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
if (!inode)
@@ -1964,7 +1996,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- if (!IS_INODE(&folio->page))
+ if (!IS_INODE(folio))
continue;
folio_lock(folio);
@@ -1975,10 +2007,10 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
goto unlock;
/* flush inline_data, if it's async context. */
- if (page_private_inline(&folio->page)) {
- clear_page_private_inline(&folio->page);
+ if (folio_test_f2fs_inline(folio)) {
+ folio_clear_f2fs_inline(folio);
folio_unlock(folio);
- flush_inline_data(sbi, ino_of_node(&folio->page));
+ flush_inline_data(sbi, ino_of_node(folio));
continue;
}
unlock:
@@ -2027,13 +2059,13 @@ next_step:
* 1. dentry dnodes
* 2. file dnodes
*/
- if (step == 0 && IS_DNODE(&folio->page))
+ if (step == 0 && IS_DNODE(folio))
continue;
- if (step == 1 && (!IS_DNODE(&folio->page) ||
- is_cold_node(&folio->page)))
+ if (step == 1 && (!IS_DNODE(folio) ||
+ is_cold_node(folio)))
continue;
- if (step == 2 && (!IS_DNODE(&folio->page) ||
- !is_cold_node(&folio->page)))
+ if (step == 2 && (!IS_DNODE(folio) ||
+ !is_cold_node(folio)))
continue;
lock_node:
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -2057,15 +2089,15 @@ continue_unlock:
goto write_node;
/* flush inline_data */
- if (page_private_inline(&folio->page)) {
- clear_page_private_inline(&folio->page);
+ if (folio_test_f2fs_inline(folio)) {
+ folio_clear_f2fs_inline(folio);
folio_unlock(folio);
- flush_inline_data(sbi, ino_of_node(&folio->page));
+ flush_inline_data(sbi, ino_of_node(folio));
goto lock_node;
}
/* flush dirty inode */
- if (IS_INODE(&folio->page) && flush_dirty_inode(folio))
+ if (IS_INODE(folio) && flush_dirty_inode(folio))
goto lock_node;
write_node:
f2fs_folio_wait_writeback(folio, NODE, true, true);
@@ -2073,8 +2105,8 @@ write_node:
if (!folio_clear_dirty_for_io(folio))
goto continue_unlock;
- set_fsync_mark(&folio->page, 0);
- set_dentry_mark(&folio->page, 0);
+ set_fsync_mark(folio, 0);
+ set_dentry_mark(folio, 0);
if (!__write_node_folio(folio, false, &submitted,
wbc, do_balance, io_type, NULL)) {
@@ -2201,12 +2233,12 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping,
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
#ifdef CONFIG_F2FS_CHECK_FS
- if (IS_INODE(&folio->page))
- f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page);
+ if (IS_INODE(folio))
+ f2fs_inode_chksum_set(F2FS_M_SB(mapping), folio);
#endif
if (filemap_dirty_folio(mapping, folio)) {
inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
- set_page_private_reference(&folio->page);
+ folio_set_f2fs_reference(folio);
return true;
}
return false;
@@ -2351,7 +2383,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
* - __remove_nid_from_list(PREALLOC_NID)
* - __insert_nid_to_list(FREE_NID)
*/
- ne = __lookup_nat_cache(nm_i, nid);
+ ne = __lookup_nat_cache(nm_i, nid, false);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
goto err_out;
@@ -2714,7 +2746,7 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio)
if (IS_ERR(ifolio))
return PTR_ERR(ifolio);
- ri = F2FS_INODE(&folio->page);
+ ri = F2FS_INODE(folio);
if (ri->i_inline & F2FS_INLINE_XATTR) {
if (!f2fs_has_inline_xattr(inode)) {
set_inode_flag(inode, FI_INLINE_XATTR);
@@ -2740,7 +2772,7 @@ update_inode:
return 0;
}
-int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
+int f2fs_recover_xattr_data(struct inode *inode, struct folio *folio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
@@ -2778,8 +2810,8 @@ recover_xnid:
f2fs_update_inode_page(inode);
/* 3: update and set xattr node page dirty */
- if (page) {
- memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page),
+ if (folio) {
+ memcpy(F2FS_NODE(xfolio), F2FS_NODE(folio),
VALID_XATTR_BLOCK_SIZE);
folio_mark_dirty(xfolio);
}
@@ -2788,10 +2820,10 @@ recover_xnid:
return 0;
}
-int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct folio *folio)
{
struct f2fs_inode *src, *dst;
- nid_t ino = ino_of_node(page);
+ nid_t ino = ino_of_node(folio);
struct node_info old_ni, new_ni;
struct folio *ifolio;
int err;
@@ -2814,11 +2846,11 @@ retry:
if (!folio_test_uptodate(ifolio))
folio_mark_uptodate(ifolio);
- fill_node_footer(&ifolio->page, ino, ino, 0, true);
- set_cold_node(&ifolio->page, false);
+ fill_node_footer(ifolio, ino, ino, 0, true);
+ set_cold_node(ifolio, false);
- src = F2FS_INODE(page);
- dst = F2FS_INODE(&ifolio->page);
+ src = F2FS_INODE(folio);
+ dst = F2FS_INODE(ifolio);
memcpy(dst, src, offsetof(struct f2fs_inode, i_ext));
dst->i_size = 0;
@@ -2884,7 +2916,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
if (IS_ERR(folio))
return PTR_ERR(folio);
- rn = F2FS_NODE(&folio->page);
+ rn = F2FS_NODE(folio);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
@@ -2904,6 +2936,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
int i;
+ bool init_dirty;
down_write(&curseg->journal_rwsem);
for (i = 0; i < nats_in_cursum(journal); i++) {
@@ -2914,12 +2947,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
if (f2fs_check_nid_range(sbi, nid))
continue;
+ init_dirty = false;
+
raw_ne = nat_in_journal(journal, i);
- ne = __lookup_nat_cache(nm_i, nid);
+ ne = __lookup_nat_cache(nm_i, nid, true);
if (!ne) {
+ init_dirty = true;
ne = __alloc_nat_entry(sbi, nid, true);
- __init_nat_entry(nm_i, ne, &raw_ne, true);
+ __init_nat_entry(nm_i, ne, &raw_ne, true, true);
}
/*
@@ -2934,7 +2970,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->nid_list_lock);
}
- __set_nat_cache_dirty(nm_i, ne);
+ __set_nat_cache_dirty(nm_i, ne, init_dirty);
}
update_nats_in_cursum(journal, -i);
up_write(&curseg->journal_rwsem);
@@ -2959,11 +2995,10 @@ add_out:
}
static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
- struct page *page)
+ const struct f2fs_nat_block *nat_blk)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
- struct f2fs_nat_block *nat_blk = page_address(page);
int valid = 0;
int i = 0;
@@ -3000,7 +3035,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
bool to_journal = true;
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
- struct page *page = NULL;
+ struct folio *folio = NULL;
/*
* there are two steps to flush nat entries:
@@ -3014,11 +3049,11 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
down_write(&curseg->journal_rwsem);
} else {
- page = get_next_nat_page(sbi, start_nid);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ folio = get_next_nat_folio(sbi, start_nid);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- nat_blk = page_address(page);
+ nat_blk = folio_address(folio);
f2fs_bug_on(sbi, !nat_blk);
}
@@ -3054,8 +3089,8 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
up_write(&curseg->journal_rwsem);
} else {
- __update_nat_bits(sbi, start_nid, page);
- f2fs_put_page(page, 1);
+ __update_nat_bits(sbi, start_nid, nat_blk);
+ f2fs_folio_put(folio, true);
}
/* Allow dirty nats by node block allocation in write_begin */
@@ -3395,10 +3430,10 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
}
kvfree(nm_i->free_nid_count);
- kvfree(nm_i->nat_bitmap);
+ kfree(nm_i->nat_bitmap);
kvfree(nm_i->nat_bits);
#ifdef CONFIG_F2FS_CHECK_FS
- kvfree(nm_i->nat_bitmap_mir);
+ kfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
kfree(nm_i);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 1446c433b3ec..030390543b54 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -31,7 +31,7 @@
/* control total # of nats */
#define DEF_NAT_CACHE_THRESHOLD 100000
-/* control total # of node writes used for roll-fowrad recovery */
+/* control total # of node writes used for roll-forward recovery */
#define DEF_RF_NODE_BLOCKS 0
/* vector size for gang look-up from nat cache that consists of radix tree */
@@ -243,41 +243,41 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
#endif
}
-static inline nid_t ino_of_node(struct page *node_page)
+static inline nid_t ino_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.ino);
}
-static inline nid_t nid_of_node(struct page *node_page)
+static inline nid_t nid_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.nid);
}
-static inline unsigned int ofs_of_node(const struct page *node_page)
+static inline unsigned int ofs_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
-static inline __u64 cpver_of_node(struct page *node_page)
+static inline __u64 cpver_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(node_page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le64_to_cpu(rn->footer.cp_ver);
}
-static inline block_t next_blkaddr_of_node(struct folio *node_folio)
+static inline block_t next_blkaddr_of_node(const struct folio *node_folio)
{
- struct f2fs_node *rn = F2FS_NODE(&node_folio->page);
+ struct f2fs_node *rn = F2FS_NODE(node_folio);
return le32_to_cpu(rn->footer.next_blkaddr);
}
-static inline void fill_node_footer(struct page *page, nid_t nid,
+static inline void fill_node_footer(const struct folio *folio, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int old_flag = 0;
if (reset)
@@ -293,17 +293,18 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
(old_flag & OFFSET_BIT_MASK));
}
-static inline void copy_node_footer(struct page *dst, struct page *src)
+static inline void copy_node_footer(const struct folio *dst,
+ const struct folio *src)
{
struct f2fs_node *src_rn = F2FS_NODE(src);
struct f2fs_node *dst_rn = F2FS_NODE(dst);
memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
}
-static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
+static inline void fill_node_footer_blkaddr(struct folio *folio, block_t blkaddr)
{
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio));
+ struct f2fs_node *rn = F2FS_NODE(folio);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
@@ -313,19 +314,19 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
-static inline bool is_recoverable_dnode(struct page *page)
+static inline bool is_recoverable_dnode(const struct folio *folio)
{
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_F_SB(folio));
__u64 cp_ver = cur_cp_version(ckpt);
/* Don't care crc part, if fsck.f2fs sets it. */
if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG))
- return (cp_ver << 32) == (cpver_of_node(page) << 32);
+ return (cp_ver << 32) == (cpver_of_node(folio) << 32);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
- return cp_ver == cpver_of_node(page);
+ return cp_ver == cpver_of_node(folio);
}
/*
@@ -349,9 +350,9 @@ static inline bool is_recoverable_dnode(struct page *page)
* `- indirect node ((6 + 2N) + (N - 1)(N + 1))
* `- direct node
*/
-static inline bool IS_DNODE(const struct page *node_page)
+static inline bool IS_DNODE(const struct folio *node_folio)
{
- unsigned int ofs = ofs_of_node(node_page);
+ unsigned int ofs = ofs_of_node(node_folio);
if (f2fs_has_xattr_block(ofs))
return true;
@@ -369,7 +370,7 @@ static inline bool IS_DNODE(const struct page *node_page)
static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i)
{
- struct f2fs_node *rn = F2FS_NODE(&folio->page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
f2fs_folio_wait_writeback(folio, NODE, true, true);
@@ -380,9 +381,9 @@ static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i)
return folio_mark_dirty(folio);
}
-static inline nid_t get_nid(struct page *p, int off, bool i)
+static inline nid_t get_nid(const struct folio *folio, int off, bool i)
{
- struct f2fs_node *rn = F2FS_NODE(p);
+ struct f2fs_node *rn = F2FS_NODE(folio);
if (i)
return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
@@ -396,19 +397,19 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
* - Mark cold data pages in page cache
*/
-static inline int is_node(const struct page *page, int type)
+static inline int is_node(const struct folio *folio, int type)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
return le32_to_cpu(rn->footer.flag) & BIT(type);
}
-#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT)
-#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
-#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
+#define is_cold_node(folio) is_node(folio, COLD_BIT_SHIFT)
+#define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT)
+#define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT)
-static inline void set_cold_node(struct page *page, bool is_dir)
+static inline void set_cold_node(const struct folio *folio, bool is_dir)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (is_dir)
@@ -418,9 +419,9 @@ static inline void set_cold_node(struct page *page, bool is_dir)
rn->footer.flag = cpu_to_le32(flag);
}
-static inline void set_mark(struct page *page, int mark, int type)
+static inline void set_mark(struct folio *folio, int mark, int type)
{
- struct f2fs_node *rn = F2FS_NODE(page);
+ struct f2fs_node *rn = F2FS_NODE(folio);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
flag |= BIT(type);
@@ -429,8 +430,8 @@ static inline void set_mark(struct page *page, int mark, int type)
rn->footer.flag = cpu_to_le32(flag);
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+ f2fs_inode_chksum_set(F2FS_F_SB(folio), folio);
#endif
}
-#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
-#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
+#define set_dentry_mark(folio, mark) set_mark(folio, mark, DENT_BIT_SHIFT)
+#define set_fsync_mark(folio, mark) set_mark(folio, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 51ebed4e1521..4cb3a91801b4 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -157,10 +157,10 @@ static int init_recovered_filename(const struct inode *dir,
return 0;
}
-static int recover_dentry(struct inode *inode, struct page *ipage,
+static int recover_dentry(struct inode *inode, struct folio *ifolio,
struct list_head *dir_list)
{
- struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
+ struct f2fs_inode *raw_inode = F2FS_INODE(ifolio);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct f2fs_filename fname;
@@ -233,14 +233,14 @@ out:
else
name = raw_inode->i_name;
f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
+ __func__, ino_of_node(ifolio), name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
-static int recover_quota_data(struct inode *inode, struct page *page)
+static int recover_quota_data(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
struct iattr attr;
uid_t i_uid = le32_to_cpu(raw->i_uid);
gid_t i_gid = le32_to_cpu(raw->i_gid);
@@ -277,16 +277,16 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
clear_inode_flag(inode, FI_DATA_EXIST);
}
-static int recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct folio *folio)
{
- struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode *raw = F2FS_INODE(folio);
struct f2fs_inode_info *fi = F2FS_I(inode);
char *name;
int err;
inode->i_mode = le16_to_cpu(raw->i_mode);
- err = recover_quota_data(inode, page);
+ err = recover_quota_data(inode, folio);
if (err)
return err;
@@ -333,10 +333,10 @@ static int recover_inode(struct inode *inode, struct page *page)
if (file_enc_name(inode))
name = "<encrypted>";
else
- name = F2FS_INODE(page)->i_name;
+ name = F2FS_INODE(folio)->i_name;
f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ ino_of_node(folio), name, raw->i_inline);
return 0;
}
@@ -375,7 +375,7 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
if (IS_ERR(folio))
return PTR_ERR(folio);
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
*is_detecting = false;
return 0;
@@ -424,22 +424,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
break;
}
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
break;
}
- if (!is_fsync_dnode(&folio->page))
+ if (!is_fsync_dnode(folio))
goto next;
- entry = get_fsync_inode(head, ino_of_node(&folio->page));
+ entry = get_fsync_inode(head, ino_of_node(folio));
if (!entry) {
bool quota_inode = false;
if (!check_only &&
- IS_INODE(&folio->page) &&
- is_dent_dnode(&folio->page)) {
- err = f2fs_recover_inode_page(sbi, &folio->page);
+ IS_INODE(folio) &&
+ is_dent_dnode(folio)) {
+ err = f2fs_recover_inode_page(sbi, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
@@ -451,7 +451,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
- entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page),
+ entry = add_fsync_inode(sbi, head, ino_of_node(folio),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
@@ -463,7 +463,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
}
entry->blkaddr = blkaddr;
- if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page))
+ if (IS_INODE(folio) && is_dent_dnode(folio))
entry->last_dentry = blkaddr;
next:
/* check next segment */
@@ -527,7 +527,7 @@ got_it:
nid = le32_to_cpu(sum.nid);
ofs_in_node = le16_to_cpu(sum.ofs_in_node);
- max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode);
+ max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
if (ofs_in_node >= max_addrs) {
f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
ofs_in_node, dn->inode->i_ino, nid, max_addrs);
@@ -552,8 +552,8 @@ got_it:
if (IS_ERR(node_folio))
return PTR_ERR(node_folio);
- offset = ofs_of_node(&node_folio->page);
- ino = ino_of_node(&node_folio->page);
+ offset = ofs_of_node(node_folio);
+ ino = ino_of_node(node_folio);
f2fs_folio_put(node_folio, true);
if (ino != dn->inode->i_ino) {
@@ -624,16 +624,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
{
struct dnode_of_data dn;
struct node_info ni;
- unsigned int start, end;
+ unsigned int start = 0, end = 0, index;
int err = 0, recovered = 0;
/* step 1: recover xattr */
- if (IS_INODE(&folio->page)) {
+ if (IS_INODE(folio)) {
err = f2fs_recover_inline_xattr(inode, folio);
if (err)
goto out;
- } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) {
- err = f2fs_recover_xattr_data(inode, &folio->page);
+ } else if (f2fs_has_xattr_block(ofs_of_node(folio))) {
+ err = f2fs_recover_xattr_data(inode, folio);
if (!err)
recovered++;
goto out;
@@ -648,8 +648,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* step 3: recover data indices */
- start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode);
- end = start + ADDRS_PER_PAGE(&folio->page, inode);
+ start = f2fs_start_bidx_of_node(ofs_of_node(folio), inode);
+ end = start + ADDRS_PER_PAGE(folio, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
retry_dn:
@@ -668,18 +668,18 @@ retry_dn:
if (err)
goto err;
- f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page));
+ f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
- if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) {
+ if (ofs_of_node(dn.node_folio) != ofs_of_node(folio)) {
f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
- inode->i_ino, ofs_of_node(&dn.node_folio->page),
- ofs_of_node(&folio->page));
+ inode->i_ino, ofs_of_node(dn.node_folio),
+ ofs_of_node(folio));
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
goto err;
}
- for (; start < end; start++, dn.ofs_in_node++) {
+ for (index = start; index < end; index++, dn.ofs_in_node++) {
block_t src, dest;
src = f2fs_data_blkaddr(&dn);
@@ -708,9 +708,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
- (i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+ (i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT)))
f2fs_i_size_write(inode,
- (loff_t)(start + 1) << PAGE_SHIFT);
+ (loff_t)(index + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -758,16 +758,18 @@ retry_prev:
}
}
- copy_node_footer(&dn.node_folio->page, &folio->page);
- fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino,
- ofs_of_node(&folio->page), false);
+ copy_node_footer(dn.node_folio, folio);
+ fill_node_footer(dn.node_folio, dn.nid, ni.ino,
+ ofs_of_node(folio), false);
folio_mark_dirty(dn.node_folio);
err:
f2fs_put_dnode(&dn);
out:
- f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
+ "range (%u, %u), recovered = %d, err = %d",
+ inode->i_ino, nid_of_node(folio),
+ file_keep_isize(inode) ? "keep" : "recover",
+ start, end, recovered, err);
return err;
}
@@ -778,6 +780,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
int err = 0;
block_t blkaddr;
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
+ unsigned int recoverable_dnode = 0;
+ unsigned int fsynced_dnode = 0;
+ unsigned int total_dnode = 0;
+ unsigned int recovered_inode = 0;
+ unsigned int recovered_dentry = 0;
+ unsigned int recovered_dnode = 0;
+
+ f2fs_notice(sbi, "do_recover_data: start to recover dnode");
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -796,38 +806,43 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
break;
}
- if (!is_recoverable_dnode(&folio->page)) {
+ if (!is_recoverable_dnode(folio)) {
f2fs_folio_put(folio, true);
break;
}
+ recoverable_dnode++;
- entry = get_fsync_inode(inode_list, ino_of_node(&folio->page));
+ entry = get_fsync_inode(inode_list, ino_of_node(folio));
if (!entry)
goto next;
+ fsynced_dnode++;
/*
* inode(x) | CP | inode(x) | dnode(F)
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (IS_INODE(&folio->page)) {
- err = recover_inode(entry->inode, &folio->page);
+ if (IS_INODE(folio)) {
+ err = recover_inode(entry->inode, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_inode++;
}
if (entry->last_dentry == blkaddr) {
- err = recover_dentry(entry->inode, &folio->page, dir_list);
+ err = recover_dentry(entry->inode, folio, dir_list);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_dentry++;
}
err = do_recover_data(sbi, entry->inode, folio);
if (err) {
f2fs_folio_put(folio, true);
break;
}
+ recovered_dnode++;
if (entry->blkaddr == blkaddr)
list_move_tail(&entry->list, tmp_inode_list);
@@ -840,9 +855,15 @@ next:
f2fs_folio_put(folio, true);
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
+ total_dnode++;
}
if (!err)
err = f2fs_allocate_new_segments(sbi);
+
+ f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, "
+ "total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d",
+ recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode,
+ recovered_dentry, recovered_dnode, err);
return err;
}
@@ -855,6 +876,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+ f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
+ "check_only: %d", check_only);
+
if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "recover fsync data on readonly fs");
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ae1223ef648f..cc82d42ef14c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -334,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
goto next;
}
- blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode),
+ blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, cow_inode),
len);
index = off;
for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
@@ -455,7 +455,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
} else {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
- .init_gc_type = BG_GC,
+ .init_gc_type = f2fs_sb_has_blkzoned(sbi) ?
+ FG_GC : BG_GC,
.no_bg_gc = true,
.should_migrate_blocks = false,
.err_gc_skipped = false,
@@ -772,7 +773,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
/* need not be added */
- if (IS_CURSEG(sbi, segno))
+ if (is_curseg(sbi, segno))
return;
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
@@ -799,7 +800,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
!valid_blocks) ||
valid_blocks == CAP_BLKS_PER_SEC(sbi));
- if (!IS_CURSEC(sbi, secno))
+ if (!is_cursec(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
}
}
@@ -838,7 +839,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
return;
}
- if (!IS_CURSEC(sbi, secno))
+ if (!is_cursec(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
}
}
@@ -855,7 +856,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned short valid_blocks, ckpt_valid_blocks;
unsigned int usable_blocks;
- if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
+ if (segno == NULL_SEGNO || is_curseg(sbi, segno))
return;
usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
@@ -888,7 +889,7 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (IS_CURSEG(sbi, segno))
+ if (is_curseg(sbi, segno))
continue;
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
@@ -2107,7 +2108,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
if (!force) {
if (!f2fs_realtime_discard_enable(sbi) ||
(!se->valid_blocks &&
- !IS_CURSEG(sbi, cpc->trim_start)) ||
+ !is_curseg(sbi, cpc->trim_start)) ||
SM_I(sbi)->dcc_info->nr_discards >=
SM_I(sbi)->dcc_info->max_discards)
return false;
@@ -2235,7 +2236,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
next:
secno = GET_SEC_FROM_SEG(sbi, start);
start_segno = GET_SEG_FROM_SEC(sbi, secno);
- if (!IS_CURSEC(sbi, secno) &&
+ if (!is_cursec(sbi, secno) &&
!get_valid_blocks(sbi, start, true))
f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
BLKS_PER_SEC(sbi));
@@ -3619,7 +3620,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(fio->page) && is_cold_node(fio->page))
+ if (IS_DNODE(fio->folio) && is_cold_node(fio->folio))
return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
@@ -3665,8 +3666,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (file_is_cold(inode) || f2fs_need_compress_data(inode))
return CURSEG_COLD_DATA;
- type = __get_age_segment_type(inode,
- page_folio(fio->page)->index);
+ type = __get_age_segment_type(inode, fio->folio->index);
if (type != NO_CHECK_TYPE)
return type;
@@ -3677,8 +3677,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
inode->i_write_hint);
} else {
- if (IS_DNODE(fio->page))
- return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
+ if (IS_DNODE(fio->folio))
+ return is_cold_node(fio->folio) ? CURSEG_WARM_NODE :
CURSEG_HOT_NODE;
return CURSEG_COLD_NODE;
}
@@ -3746,7 +3746,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
-int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct folio *folio,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio)
@@ -3850,10 +3850,10 @@ skip_new_segment:
up_write(&sit_i->sentry_lock);
- if (page && IS_NODESEG(curseg->seg_type)) {
- fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
+ if (folio && IS_NODESEG(curseg->seg_type)) {
+ fill_node_footer_blkaddr(folio, NEXT_FREE_BLKADDR(sbi, curseg));
- f2fs_inode_chksum_set(sbi, page);
+ f2fs_inode_chksum_set(sbi, folio);
}
if (fio) {
@@ -3931,7 +3931,7 @@ static int log_type_to_seg_type(enum log_type type)
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
- struct folio *folio = page_folio(fio->page);
+ struct folio *folio = fio->folio;
enum log_type type = __get_segment_type(fio);
int seg_type = log_type_to_seg_type(type);
bool keep_order = (f2fs_lfs_mode(fio->sbi) &&
@@ -3940,15 +3940,21 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
- if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ if (f2fs_allocate_data_block(fio->sbi, folio, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio)) {
if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
folio_end_writeback(folio);
if (f2fs_in_warm_node_list(fio->sbi, folio))
f2fs_del_fsync_node_entry(fio->sbi, folio);
+ f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi,
+ CP_ERROR_FLAG));
goto out;
}
+
+ f2fs_bug_on(fio->sbi, !f2fs_is_valid_blkaddr_raw(fio->sbi,
+ fio->new_blkaddr, DATA_GENERIC_ENHANCE));
+
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1);
@@ -3972,7 +3978,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = folio->index,
.new_blkaddr = folio->index,
- .page = folio_page(folio, 0),
+ .folio = folio,
.encrypted_page = NULL,
.in_list = 0,
};
@@ -4100,14 +4106,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg) {
/* for recovery flow */
- if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
+ if (se->valid_blocks == 0 && !is_curseg(sbi, segno)) {
if (old_blkaddr == NULL_ADDR)
type = CURSEG_COLD_DATA;
else
type = CURSEG_WARM_DATA;
}
} else {
- if (IS_CURSEG(sbi, segno)) {
+ if (is_curseg(sbi, segno)) {
/* se->type is volatile as SSR allocation */
type = __f2fs_get_curseg(sbi, segno);
f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
@@ -4191,7 +4197,7 @@ void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type,
struct f2fs_sb_info *sbi = F2FS_F_SB(folio);
/* submit cached LFS IO */
- f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type);
+ f2fs_submit_merged_write_cond(sbi, NULL, folio, 0, type);
/* submit cached IPU IO */
f2fs_submit_merged_ipu_write(sbi, NULL, folio);
if (ordered) {
@@ -5143,7 +5149,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
continue;
- if (IS_CURSEC(sbi, secno))
+ if (is_cursec(sbi, secno))
continue;
set_bit(secno, dirty_i->dirty_secmap);
}
@@ -5279,7 +5285,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
* Get # of valid block of the zone.
*/
valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
- if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
zone_segno, valid_block_cnt,
blk_zone_cond_str(zone->cond));
@@ -5806,9 +5812,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->dirty_sentries_bitmap);
SM_I(sbi)->sit_info = NULL;
- kvfree(sit_i->sit_bitmap);
+ kfree(sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
- kvfree(sit_i->sit_bitmap_mir);
+ kfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
kfree(sit_i);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index db619fd2f51a..5e2ee5c686b1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -34,34 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
}
-#define IS_CURSEG(sbi, seg) \
- (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
-
-#define IS_CURSEC(sbi, secno) \
- (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
- SEGS_PER_SEC(sbi)) || \
- ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
- SEGS_PER_SEC(sbi)))
-
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
@@ -318,6 +290,28 @@ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
+static inline bool is_curseg(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (segno == CURSEG_I(sbi, i)->segno)
+ return true;
+ }
+ return false;
+}
+
+static inline bool is_cursec(struct f2fs_sb_info *sbi, unsigned int secno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (secno == GET_SEC_FROM_SEG(sbi, CURSEG_I(sbi, i)->segno))
+ return true;
+ }
+ return false;
+}
+
static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
unsigned int segno)
{
@@ -509,7 +503,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_segments++;
- if (!inmem && IS_CURSEC(sbi, secno))
+ if (!inmem && is_cursec(sbi, secno))
goto unlock_out;
/* check large section */
@@ -674,8 +668,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int data_blocks = 0;
- if (f2fs_lfs_mode(sbi) &&
- unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (f2fs_lfs_mode(sbi)) {
total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA);
data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi);
data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi);
@@ -684,7 +677,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
if (lower_p)
*lower_p = node_secs + dent_secs + data_secs;
if (upper_p)
- *upper_p = node_secs + dent_secs +
+ *upper_p = node_secs + dent_secs + data_secs +
(node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) +
(data_blocks ? 1 : 0);
if (curseg_p)
@@ -986,7 +979,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
- if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
+ if (is_cursec(sbi, secno) || (sbi->cur_victim_sec == secno))
return true;
return false;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bbf1dad6843f..e16c4e2830c2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -27,6 +27,8 @@
#include <linux/part_stat.h>
#include <linux/zstd.h>
#include <linux/lz4.h>
+#include <linux/ctype.h>
+#include <linux/fs_parser.h>
#include "f2fs.h"
#include "node.h"
@@ -125,29 +127,20 @@ enum {
Opt_disable_roll_forward,
Opt_norecovery,
Opt_discard,
- Opt_nodiscard,
Opt_noheap,
Opt_heap,
Opt_user_xattr,
- Opt_nouser_xattr,
Opt_acl,
- Opt_noacl,
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
- Opt_noinline_xattr,
Opt_inline_xattr_size,
Opt_inline_data,
Opt_inline_dentry,
- Opt_noinline_dentry,
Opt_flush_merge,
- Opt_noflush_merge,
Opt_barrier,
- Opt_nobarrier,
Opt_fastboot,
Opt_extent_cache,
- Opt_noextent_cache,
- Opt_noinline_data,
Opt_data_flush,
Opt_reserve_root,
Opt_resgid,
@@ -156,21 +149,13 @@ enum {
Opt_fault_injection,
Opt_fault_type,
Opt_lazytime,
- Opt_nolazytime,
Opt_quota,
- Opt_noquota,
Opt_usrquota,
Opt_grpquota,
Opt_prjquota,
Opt_usrjquota,
Opt_grpjquota,
Opt_prjjquota,
- Opt_offusrjquota,
- Opt_offgrpjquota,
- Opt_offprjjquota,
- Opt_jqfmt_vfsold,
- Opt_jqfmt_vfsv0,
- Opt_jqfmt_vfsv1,
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
@@ -180,107 +165,209 @@ enum {
Opt_checkpoint_disable_cap_perc,
Opt_checkpoint_enable,
Opt_checkpoint_merge,
- Opt_nocheckpoint_merge,
Opt_compress_algorithm,
Opt_compress_log_size,
- Opt_compress_extension,
Opt_nocompress_extension,
+ Opt_compress_extension,
Opt_compress_chksum,
Opt_compress_mode,
Opt_compress_cache,
Opt_atgc,
Opt_gc_merge,
- Opt_nogc_merge,
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
Opt_errors,
Opt_nat_bits,
+ Opt_jqfmt,
+ Opt_checkpoint,
Opt_err,
};
-static match_table_t f2fs_tokens = {
- {Opt_gc_background, "background_gc=%s"},
- {Opt_disable_roll_forward, "disable_roll_forward"},
- {Opt_norecovery, "norecovery"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_noheap, "no_heap"},
- {Opt_heap, "heap"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_active_logs, "active_logs=%u"},
- {Opt_disable_ext_identify, "disable_ext_identify"},
- {Opt_inline_xattr, "inline_xattr"},
- {Opt_noinline_xattr, "noinline_xattr"},
- {Opt_inline_xattr_size, "inline_xattr_size=%u"},
- {Opt_inline_data, "inline_data"},
- {Opt_inline_dentry, "inline_dentry"},
- {Opt_noinline_dentry, "noinline_dentry"},
- {Opt_flush_merge, "flush_merge"},
- {Opt_noflush_merge, "noflush_merge"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_fastboot, "fastboot"},
- {Opt_extent_cache, "extent_cache"},
- {Opt_noextent_cache, "noextent_cache"},
- {Opt_noinline_data, "noinline_data"},
- {Opt_data_flush, "data_flush"},
- {Opt_reserve_root, "reserve_root=%u"},
- {Opt_resgid, "resgid=%u"},
- {Opt_resuid, "resuid=%u"},
- {Opt_mode, "mode=%s"},
- {Opt_fault_injection, "fault_injection=%u"},
- {Opt_fault_type, "fault_type=%u"},
- {Opt_lazytime, "lazytime"},
- {Opt_nolazytime, "nolazytime"},
- {Opt_quota, "quota"},
- {Opt_noquota, "noquota"},
- {Opt_usrquota, "usrquota"},
- {Opt_grpquota, "grpquota"},
- {Opt_prjquota, "prjquota"},
- {Opt_usrjquota, "usrjquota=%s"},
- {Opt_grpjquota, "grpjquota=%s"},
- {Opt_prjjquota, "prjjquota=%s"},
- {Opt_offusrjquota, "usrjquota="},
- {Opt_offgrpjquota, "grpjquota="},
- {Opt_offprjjquota, "prjjquota="},
- {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
- {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
- {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_alloc, "alloc_mode=%s"},
- {Opt_fsync, "fsync_mode=%s"},
- {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
- {Opt_test_dummy_encryption, "test_dummy_encryption"},
- {Opt_inlinecrypt, "inlinecrypt"},
- {Opt_checkpoint_disable, "checkpoint=disable"},
- {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
- {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
- {Opt_checkpoint_enable, "checkpoint=enable"},
- {Opt_checkpoint_merge, "checkpoint_merge"},
- {Opt_nocheckpoint_merge, "nocheckpoint_merge"},
- {Opt_compress_algorithm, "compress_algorithm=%s"},
- {Opt_compress_log_size, "compress_log_size=%u"},
- {Opt_compress_extension, "compress_extension=%s"},
- {Opt_nocompress_extension, "nocompress_extension=%s"},
- {Opt_compress_chksum, "compress_chksum"},
- {Opt_compress_mode, "compress_mode=%s"},
- {Opt_compress_cache, "compress_cache"},
- {Opt_atgc, "atgc"},
- {Opt_gc_merge, "gc_merge"},
- {Opt_nogc_merge, "nogc_merge"},
- {Opt_discard_unit, "discard_unit=%s"},
- {Opt_memory_mode, "memory=%s"},
- {Opt_age_extent_cache, "age_extent_cache"},
- {Opt_errors, "errors=%s"},
- {Opt_nat_bits, "nat_bits"},
+static const struct constant_table f2fs_param_background_gc[] = {
+ {"on", BGGC_MODE_ON},
+ {"off", BGGC_MODE_OFF},
+ {"sync", BGGC_MODE_SYNC},
+ {}
+};
+
+static const struct constant_table f2fs_param_mode[] = {
+ {"adaptive", FS_MODE_ADAPTIVE},
+ {"lfs", FS_MODE_LFS},
+ {"fragment:segment", FS_MODE_FRAGMENT_SEG},
+ {"fragment:block", FS_MODE_FRAGMENT_BLK},
+ {}
+};
+
+static const struct constant_table f2fs_param_jqfmt[] = {
+ {"vfsold", QFMT_VFS_OLD},
+ {"vfsv0", QFMT_VFS_V0},
+ {"vfsv1", QFMT_VFS_V1},
+ {}
+};
+
+static const struct constant_table f2fs_param_alloc_mode[] = {
+ {"default", ALLOC_MODE_DEFAULT},
+ {"reuse", ALLOC_MODE_REUSE},
+ {}
+};
+static const struct constant_table f2fs_param_fsync_mode[] = {
+ {"posix", FSYNC_MODE_POSIX},
+ {"strict", FSYNC_MODE_STRICT},
+ {"nobarrier", FSYNC_MODE_NOBARRIER},
+ {}
+};
+
+static const struct constant_table f2fs_param_compress_mode[] = {
+ {"fs", COMPR_MODE_FS},
+ {"user", COMPR_MODE_USER},
+ {}
+};
+
+static const struct constant_table f2fs_param_discard_unit[] = {
+ {"block", DISCARD_UNIT_BLOCK},
+ {"segment", DISCARD_UNIT_SEGMENT},
+ {"section", DISCARD_UNIT_SECTION},
+ {}
+};
+
+static const struct constant_table f2fs_param_memory_mode[] = {
+ {"normal", MEMORY_MODE_NORMAL},
+ {"low", MEMORY_MODE_LOW},
+ {}
+};
+
+static const struct constant_table f2fs_param_errors[] = {
+ {"remount-ro", MOUNT_ERRORS_READONLY},
+ {"continue", MOUNT_ERRORS_CONTINUE},
+ {"panic", MOUNT_ERRORS_PANIC},
+ {}
+};
+
+static const struct fs_parameter_spec f2fs_param_specs[] = {
+ fsparam_enum("background_gc", Opt_gc_background, f2fs_param_background_gc),
+ fsparam_flag("disable_roll_forward", Opt_disable_roll_forward),
+ fsparam_flag("norecovery", Opt_norecovery),
+ fsparam_flag_no("discard", Opt_discard),
+ fsparam_flag("no_heap", Opt_noheap),
+ fsparam_flag("heap", Opt_heap),
+ fsparam_flag_no("user_xattr", Opt_user_xattr),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_s32("active_logs", Opt_active_logs),
+ fsparam_flag("disable_ext_identify", Opt_disable_ext_identify),
+ fsparam_flag_no("inline_xattr", Opt_inline_xattr),
+ fsparam_s32("inline_xattr_size", Opt_inline_xattr_size),
+ fsparam_flag_no("inline_data", Opt_inline_data),
+ fsparam_flag_no("inline_dentry", Opt_inline_dentry),
+ fsparam_flag_no("flush_merge", Opt_flush_merge),
+ fsparam_flag_no("barrier", Opt_barrier),
+ fsparam_flag("fastboot", Opt_fastboot),
+ fsparam_flag_no("extent_cache", Opt_extent_cache),
+ fsparam_flag("data_flush", Opt_data_flush),
+ fsparam_u32("reserve_root", Opt_reserve_root),
+ fsparam_gid("resgid", Opt_resgid),
+ fsparam_uid("resuid", Opt_resuid),
+ fsparam_enum("mode", Opt_mode, f2fs_param_mode),
+ fsparam_s32("fault_injection", Opt_fault_injection),
+ fsparam_u32("fault_type", Opt_fault_type),
+ fsparam_flag_no("lazytime", Opt_lazytime),
+ fsparam_flag_no("quota", Opt_quota),
+ fsparam_flag("usrquota", Opt_usrquota),
+ fsparam_flag("grpquota", Opt_grpquota),
+ fsparam_flag("prjquota", Opt_prjquota),
+ fsparam_string_empty("usrjquota", Opt_usrjquota),
+ fsparam_string_empty("grpjquota", Opt_grpjquota),
+ fsparam_string_empty("prjjquota", Opt_prjjquota),
+ fsparam_flag("nat_bits", Opt_nat_bits),
+ fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt),
+ fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode),
+ fsparam_enum("fsync_mode", Opt_fsync, f2fs_param_fsync_mode),
+ fsparam_string("test_dummy_encryption", Opt_test_dummy_encryption),
+ fsparam_flag("test_dummy_encryption", Opt_test_dummy_encryption),
+ fsparam_flag("inlinecrypt", Opt_inlinecrypt),
+ fsparam_string("checkpoint", Opt_checkpoint),
+ fsparam_flag_no("checkpoint_merge", Opt_checkpoint_merge),
+ fsparam_string("compress_algorithm", Opt_compress_algorithm),
+ fsparam_u32("compress_log_size", Opt_compress_log_size),
+ fsparam_string("compress_extension", Opt_compress_extension),
+ fsparam_string("nocompress_extension", Opt_nocompress_extension),
+ fsparam_flag("compress_chksum", Opt_compress_chksum),
+ fsparam_enum("compress_mode", Opt_compress_mode, f2fs_param_compress_mode),
+ fsparam_flag("compress_cache", Opt_compress_cache),
+ fsparam_flag("atgc", Opt_atgc),
+ fsparam_flag_no("gc_merge", Opt_gc_merge),
+ fsparam_enum("discard_unit", Opt_discard_unit, f2fs_param_discard_unit),
+ fsparam_enum("memory", Opt_memory_mode, f2fs_param_memory_mode),
+ fsparam_flag("age_extent_cache", Opt_age_extent_cache),
+ fsparam_enum("errors", Opt_errors, f2fs_param_errors),
+ {}
+};
+
+/* Resort to a match_table for this interestingly formatted option */
+static match_table_t f2fs_checkpoint_tokens = {
+ {Opt_checkpoint_disable, "disable"},
+ {Opt_checkpoint_disable_cap, "disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "disable:%u%%"},
+ {Opt_checkpoint_enable, "enable"},
{Opt_err, NULL},
};
+#define F2FS_SPEC_background_gc (1 << 0)
+#define F2FS_SPEC_inline_xattr_size (1 << 1)
+#define F2FS_SPEC_active_logs (1 << 2)
+#define F2FS_SPEC_reserve_root (1 << 3)
+#define F2FS_SPEC_resgid (1 << 4)
+#define F2FS_SPEC_resuid (1 << 5)
+#define F2FS_SPEC_mode (1 << 6)
+#define F2FS_SPEC_fault_injection (1 << 7)
+#define F2FS_SPEC_fault_type (1 << 8)
+#define F2FS_SPEC_jqfmt (1 << 9)
+#define F2FS_SPEC_alloc_mode (1 << 10)
+#define F2FS_SPEC_fsync_mode (1 << 11)
+#define F2FS_SPEC_checkpoint_disable_cap (1 << 12)
+#define F2FS_SPEC_checkpoint_disable_cap_perc (1 << 13)
+#define F2FS_SPEC_compress_level (1 << 14)
+#define F2FS_SPEC_compress_algorithm (1 << 15)
+#define F2FS_SPEC_compress_log_size (1 << 16)
+#define F2FS_SPEC_compress_extension (1 << 17)
+#define F2FS_SPEC_nocompress_extension (1 << 18)
+#define F2FS_SPEC_compress_chksum (1 << 19)
+#define F2FS_SPEC_compress_mode (1 << 20)
+#define F2FS_SPEC_discard_unit (1 << 21)
+#define F2FS_SPEC_memory_mode (1 << 22)
+#define F2FS_SPEC_errors (1 << 23)
+
+struct f2fs_fs_context {
+ struct f2fs_mount_info info;
+ unsigned int opt_mask; /* Bits changed */
+ unsigned int spec_mask;
+ unsigned short qname_mask;
+};
+
+#define F2FS_CTX_INFO(ctx) ((ctx)->info)
+
+static inline void ctx_set_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ ctx->info.opt |= flag;
+ ctx->opt_mask |= flag;
+}
+
+static inline void ctx_clear_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ ctx->info.opt &= ~flag;
+ ctx->opt_mask |= flag;
+}
+
+static inline bool ctx_test_opt(struct f2fs_fs_context *ctx,
+ unsigned int flag)
+{
+ return ctx->info.opt & flag;
+}
+
void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
- const char *fmt, ...)
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -292,11 +379,19 @@ void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
if (limit_rate)
- printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (sbi)
+ printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk_ratelimited("%c%cF2FS-fs: %pV\n",
+ KERN_SOH_ASCII, level, &vaf);
else
- printk("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (sbi)
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk("%c%cF2FS-fs: %pV\n",
+ KERN_SOH_ASCII, level, &vaf);
va_end(args);
}
@@ -390,159 +485,90 @@ static void init_once(void *foo)
#ifdef CONFIG_QUOTA
static const char * const quotatypes[] = INITQFNAMES;
#define QTYPE2NAME(t) (quotatypes[t])
-static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype,
- substring_t *args)
+/*
+ * Note the name of the specified quota file.
+ */
+static int f2fs_note_qf_name(struct fs_context *fc, int qtype,
+ struct fs_parameter *param)
{
- struct super_block *sb = sbi->sb;
+ struct f2fs_fs_context *ctx = fc->fs_private;
char *qname;
- int ret = -EINVAL;
- if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
+ if (param->size < 1) {
+ f2fs_err(NULL, "Missing quota name");
return -EINVAL;
}
- if (f2fs_sb_has_quota_ino(sbi)) {
- f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
+ if (strchr(param->string, '/')) {
+ f2fs_err(NULL, "quotafile must be on filesystem root");
+ return -EINVAL;
+ }
+ if (ctx->info.s_qf_names[qtype]) {
+ if (strcmp(ctx->info.s_qf_names[qtype], param->string) != 0) {
+ f2fs_err(NULL, "Quota file already specified");
+ return -EINVAL;
+ }
return 0;
}
- qname = match_strdup(args);
+ qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
if (!qname) {
- f2fs_err(sbi, "Not enough memory for storing quotafile name");
+ f2fs_err(NULL, "Not enough memory for storing quotafile name");
return -ENOMEM;
}
- if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
- if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
- ret = 0;
- else
- f2fs_err(sbi, "%s quota file already specified",
- QTYPE2NAME(qtype));
- goto errout;
- }
- if (strchr(qname, '/')) {
- f2fs_err(sbi, "quotafile must be on filesystem root");
- goto errout;
- }
- F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
- set_opt(sbi, QUOTA);
+ F2FS_CTX_INFO(ctx).s_qf_names[qtype] = qname;
+ ctx->qname_mask |= 1 << qtype;
return 0;
-errout:
- kfree(qname);
- return ret;
}
-static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype)
+/*
+ * Clear the name of the specified quota file.
+ */
+static int f2fs_unnote_qf_name(struct fs_context *fc, int qtype)
{
- struct super_block *sb = sbi->sb;
+ struct f2fs_fs_context *ctx = fc->fs_private;
- if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
- return -EINVAL;
- }
- kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
- F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
+ kfree(ctx->info.s_qf_names[qtype]);
+ ctx->info.s_qf_names[qtype] = NULL;
+ ctx->qname_mask |= 1 << qtype;
return 0;
}
-static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
+static void f2fs_unnote_qf_name_all(struct fs_context *fc)
{
- /*
- * We do the test below only for project quotas. 'usrquota' and
- * 'grpquota' mount options are allowed even without quota feature
- * to support legacy quotas in quota files.
- */
- if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
- f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
- return -1;
- }
- if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
- F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
- F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) {
- if (test_opt(sbi, USRQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[USRQUOTA])
- clear_opt(sbi, USRQUOTA);
-
- if (test_opt(sbi, GRPQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[GRPQUOTA])
- clear_opt(sbi, GRPQUOTA);
-
- if (test_opt(sbi, PRJQUOTA) &&
- F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
- clear_opt(sbi, PRJQUOTA);
-
- if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
- test_opt(sbi, PRJQUOTA)) {
- f2fs_err(sbi, "old and new quota format mixing");
- return -1;
- }
-
- if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_err(sbi, "journaled quota format not specified");
- return -1;
- }
- }
+ int i;
- if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
- F2FS_OPTION(sbi).s_jquota_fmt = 0;
- }
- return 0;
+ for (i = 0; i < MAXQUOTAS; i++)
+ f2fs_unnote_qf_name(fc, i);
}
#endif
-static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi,
- const char *opt,
- const substring_t *arg,
- bool is_remount)
+static int f2fs_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct f2fs_fs_context *ctx)
{
- struct fs_parameter param = {
- .type = fs_value_is_string,
- .string = arg->from ? arg->from : "",
- };
- struct fscrypt_dummy_policy *policy =
- &F2FS_OPTION(sbi).dummy_enc_policy;
int err;
if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
- f2fs_warn(sbi, "test_dummy_encryption option not supported");
+ f2fs_warn(NULL, "test_dummy_encryption option not supported");
return -EINVAL;
}
-
- if (!f2fs_sb_has_encrypt(sbi)) {
- f2fs_err(sbi, "Encrypt feature is off");
- return -EINVAL;
- }
-
- /*
- * This mount option is just for testing, and it's not worthwhile to
- * implement the extra complexity (e.g. RCU protection) that would be
- * needed to allow it to be set or changed during remount. We do allow
- * it to be specified during remount, but only if there is no change.
- */
- if (is_remount && !fscrypt_is_dummy_policy_set(policy)) {
- f2fs_warn(sbi, "Can't set test_dummy_encryption on remount");
- return -EINVAL;
- }
-
- err = fscrypt_parse_test_dummy_encryption(&param, policy);
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->info.dummy_enc_policy);
if (err) {
- if (err == -EEXIST)
- f2fs_warn(sbi,
- "Can't change test_dummy_encryption on remount");
- else if (err == -EINVAL)
- f2fs_warn(sbi, "Value of option \"%s\" is unrecognized",
- opt);
+ if (err == -EINVAL)
+ f2fs_warn(NULL, "Value of option \"%s\" is unrecognized",
+ param->key);
+ else if (err == -EEXIST)
+ f2fs_warn(NULL, "Conflicting test_dummy_encryption options");
else
- f2fs_warn(sbi, "Error processing option \"%s\" [%d]",
- opt, err);
+ f2fs_warn(NULL, "Error processing option \"%s\" [%d]",
+ param->key, err);
return -EINVAL;
}
- f2fs_warn(sbi, "Test dummy encryption mode enabled");
return 0;
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
-static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
+static bool is_compress_extension_exist(struct f2fs_mount_info *info,
const char *new_ext, bool is_ext)
{
unsigned char (*ext)[F2FS_EXTENSION_LEN];
@@ -550,11 +576,11 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
int i;
if (is_ext) {
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ ext = info->extensions;
+ ext_cnt = info->compress_ext_cnt;
} else {
- ext = F2FS_OPTION(sbi).noextensions;
- ext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ ext = info->noextensions;
+ ext_cnt = info->nocompress_ext_cnt;
}
for (i = 0; i < ext_cnt; i++) {
@@ -572,28 +598,28 @@ static bool is_compress_extension_exist(struct f2fs_sb_info *sbi,
* extension will be treated as special cases and will not be compressed.
* 3. Don't allow the non-compress extension specifies all files.
*/
-static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi)
+static int f2fs_test_compress_extension(unsigned char (*noext)[F2FS_EXTENSION_LEN],
+ int noext_cnt,
+ unsigned char (*ext)[F2FS_EXTENSION_LEN],
+ int ext_cnt)
{
- unsigned char (*ext)[F2FS_EXTENSION_LEN];
- unsigned char (*noext)[F2FS_EXTENSION_LEN];
- int ext_cnt, noext_cnt, index = 0, no_index = 0;
-
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
- noext = F2FS_OPTION(sbi).noextensions;
- noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ int index = 0, no_index = 0;
if (!noext_cnt)
return 0;
for (no_index = 0; no_index < noext_cnt; no_index++) {
+ if (strlen(noext[no_index]) == 0)
+ continue;
if (!strcasecmp("*", noext[no_index])) {
- f2fs_info(sbi, "Don't allow the nocompress extension specifies all files");
+ f2fs_info(NULL, "Don't allow the nocompress extension specifies all files");
return -EINVAL;
}
for (index = 0; index < ext_cnt; index++) {
+ if (strlen(ext[index]) == 0)
+ continue;
if (!strcasecmp(ext[index], noext[no_index])) {
- f2fs_info(sbi, "Don't allow the same extension %s appear in both compress and nocompress extension",
+ f2fs_info(NULL, "Don't allow the same extension %s appear in both compress and nocompress extension",
ext[index]);
return -EINVAL;
}
@@ -603,58 +629,62 @@ static int f2fs_test_compress_extension(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_F2FS_FS_LZ4
-static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
+static int f2fs_set_lz4hc_level(struct f2fs_fs_context *ctx, const char *str)
{
#ifdef CONFIG_F2FS_FS_LZ4HC
unsigned int level;
if (strlen(str) == 3) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
str += 3;
if (str[0] != ':') {
- f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
if (kstrtouint(str + 1, 10, &level))
return -EINVAL;
if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) {
- f2fs_info(sbi, "invalid lz4hc compress level: %d", level);
+ f2fs_info(NULL, "invalid lz4hc compress level: %d", level);
return -EINVAL;
}
- F2FS_OPTION(sbi).compress_level = level;
+ F2FS_CTX_INFO(ctx).compress_level = level;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
#else
if (strlen(str) == 3) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
- f2fs_info(sbi, "kernel doesn't support lz4hc compression");
+ f2fs_info(NULL, "kernel doesn't support lz4hc compression");
return -EINVAL;
#endif
}
#endif
#ifdef CONFIG_F2FS_FS_ZSTD
-static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
+static int f2fs_set_zstd_level(struct f2fs_fs_context *ctx, const char *str)
{
int level;
int len = 4;
if (strlen(str) == len) {
- F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ F2FS_CTX_INFO(ctx).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
str += len;
if (str[0] != ':') {
- f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
+ f2fs_info(NULL, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
if (kstrtoint(str + 1, 10, &level))
@@ -662,685 +692,750 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
/* f2fs does not support negative compress level now */
if (level < 0) {
- f2fs_info(sbi, "do not support negative compress level: %d", level);
+ f2fs_info(NULL, "do not support negative compress level: %d", level);
return -ERANGE;
}
if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
- f2fs_info(sbi, "invalid zstd compress level: %d", level);
+ f2fs_info(NULL, "invalid zstd compress level: %d", level);
return -EINVAL;
}
- F2FS_OPTION(sbi).compress_level = level;
+ F2FS_CTX_INFO(ctx).compress_level = level;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
return 0;
}
#endif
#endif
-static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount)
+static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- substring_t args[MAX_OPT_ARGS];
+ struct f2fs_fs_context *ctx = fc->fs_private;
#ifdef CONFIG_F2FS_FS_COMPRESSION
unsigned char (*ext)[F2FS_EXTENSION_LEN];
unsigned char (*noext)[F2FS_EXTENSION_LEN];
int ext_cnt, noext_cnt;
+ char *name;
#endif
- char *p, *name;
- int arg = 0;
- kuid_t uid;
- kgid_t gid;
- int ret;
-
- if (!options)
- return 0;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ substring_t args[MAX_OPT_ARGS];
+ struct fs_parse_result result;
+ int token, ret, arg;
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, f2fs_tokens, args);
+ token = fs_parse(fc, f2fs_param_specs, param, &result);
+ if (token < 0)
+ return token;
- switch (token) {
- case Opt_gc_background:
- name = match_strdup(&args[0]);
-
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "on")) {
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
- } else if (!strcmp(name, "off")) {
- if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_warn(sbi, "zoned devices need bggc");
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
- } else if (!strcmp(name, "sync")) {
- F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_disable_roll_forward:
- set_opt(sbi, DISABLE_ROLL_FORWARD);
- break;
- case Opt_norecovery:
- /* requires ro mount, checked in f2fs_default_check */
- set_opt(sbi, NORECOVERY);
- break;
- case Opt_discard:
- if (!f2fs_hw_support_discard(sbi)) {
- f2fs_warn(sbi, "device does not support discard");
- break;
- }
- set_opt(sbi, DISCARD);
- break;
- case Opt_nodiscard:
- if (f2fs_hw_should_discard(sbi)) {
- f2fs_warn(sbi, "discard is required for zoned block devices");
- return -EINVAL;
- }
- clear_opt(sbi, DISCARD);
- break;
- case Opt_noheap:
- case Opt_heap:
- f2fs_warn(sbi, "heap/no_heap options were deprecated");
- break;
+ switch (token) {
+ case Opt_gc_background:
+ F2FS_CTX_INFO(ctx).bggc_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_background_gc;
+ break;
+ case Opt_disable_roll_forward:
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_ROLL_FORWARD);
+ break;
+ case Opt_norecovery:
+ /* requires ro mount, checked in f2fs_validate_options */
+ ctx_set_opt(ctx, F2FS_MOUNT_NORECOVERY);
+ break;
+ case Opt_discard:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_DISCARD);
+ break;
+ case Opt_noheap:
+ case Opt_heap:
+ f2fs_warn(NULL, "heap/no_heap options were deprecated");
+ break;
#ifdef CONFIG_F2FS_FS_XATTR
- case Opt_user_xattr:
- set_opt(sbi, XATTR_USER);
- break;
- case Opt_nouser_xattr:
- clear_opt(sbi, XATTR_USER);
- break;
- case Opt_inline_xattr:
- set_opt(sbi, INLINE_XATTR);
- break;
- case Opt_noinline_xattr:
- clear_opt(sbi, INLINE_XATTR);
- break;
- case Opt_inline_xattr_size:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- set_opt(sbi, INLINE_XATTR_SIZE);
- F2FS_OPTION(sbi).inline_xattr_size = arg;
- break;
+ case Opt_user_xattr:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_XATTR_USER);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_XATTR_USER);
+ break;
+ case Opt_inline_xattr:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_XATTR);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR);
+ break;
+ case Opt_inline_xattr_size:
+ if (result.int_32 < MIN_INLINE_XATTR_SIZE ||
+ result.int_32 > MAX_INLINE_XATTR_SIZE) {
+ f2fs_err(NULL, "inline xattr size is out of range: %u ~ %u",
+ (u32)MIN_INLINE_XATTR_SIZE, (u32)MAX_INLINE_XATTR_SIZE);
+ return -EINVAL;
+ }
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE);
+ F2FS_CTX_INFO(ctx).inline_xattr_size = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_inline_xattr_size;
+ break;
#else
- case Opt_user_xattr:
- case Opt_nouser_xattr:
- case Opt_inline_xattr:
- case Opt_noinline_xattr:
- case Opt_inline_xattr_size:
- f2fs_info(sbi, "xattr options not supported");
- break;
+ case Opt_user_xattr:
+ case Opt_inline_xattr:
+ case Opt_inline_xattr_size:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
- case Opt_acl:
- set_opt(sbi, POSIX_ACL);
- break;
- case Opt_noacl:
- clear_opt(sbi, POSIX_ACL);
- break;
+ case Opt_acl:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_POSIX_ACL);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_POSIX_ACL);
+ break;
#else
- case Opt_acl:
- case Opt_noacl:
- f2fs_info(sbi, "acl options not supported");
- break;
+ case Opt_acl:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
- case Opt_active_logs:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg != 2 && arg != 4 &&
- arg != NR_CURSEG_PERSIST_TYPE)
- return -EINVAL;
- F2FS_OPTION(sbi).active_logs = arg;
- break;
- case Opt_disable_ext_identify:
- set_opt(sbi, DISABLE_EXT_IDENTIFY);
- break;
- case Opt_inline_data:
- set_opt(sbi, INLINE_DATA);
- break;
- case Opt_inline_dentry:
- set_opt(sbi, INLINE_DENTRY);
- break;
- case Opt_noinline_dentry:
- clear_opt(sbi, INLINE_DENTRY);
- break;
- case Opt_flush_merge:
- set_opt(sbi, FLUSH_MERGE);
- break;
- case Opt_noflush_merge:
- clear_opt(sbi, FLUSH_MERGE);
- break;
- case Opt_nobarrier:
- set_opt(sbi, NOBARRIER);
- break;
- case Opt_barrier:
- clear_opt(sbi, NOBARRIER);
- break;
- case Opt_fastboot:
- set_opt(sbi, FASTBOOT);
- break;
- case Opt_extent_cache:
- set_opt(sbi, READ_EXTENT_CACHE);
- break;
- case Opt_noextent_cache:
- if (f2fs_sb_has_device_alias(sbi)) {
- f2fs_err(sbi, "device aliasing requires extent cache");
- return -EINVAL;
- }
- clear_opt(sbi, READ_EXTENT_CACHE);
- break;
- case Opt_noinline_data:
- clear_opt(sbi, INLINE_DATA);
- break;
- case Opt_data_flush:
- set_opt(sbi, DATA_FLUSH);
- break;
- case Opt_reserve_root:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_info(sbi, "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
- } else {
- F2FS_OPTION(sbi).root_reserved_blocks = arg;
- set_opt(sbi, RESERVE_ROOT);
- }
- break;
- case Opt_resuid:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- uid = make_kuid(current_user_ns(), arg);
- if (!uid_valid(uid)) {
- f2fs_err(sbi, "Invalid uid value %d", arg);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).s_resuid = uid;
- break;
- case Opt_resgid:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- gid = make_kgid(current_user_ns(), arg);
- if (!gid_valid(gid)) {
- f2fs_err(sbi, "Invalid gid value %d", arg);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).s_resgid = gid;
- break;
- case Opt_mode:
- name = match_strdup(&args[0]);
-
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "adaptive")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
- } else if (!strcmp(name, "lfs")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- } else if (!strcmp(name, "fragment:segment")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
- } else if (!strcmp(name, "fragment:block")) {
- F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
+ case Opt_active_logs:
+ if (result.int_32 != 2 && result.int_32 != 4 &&
+ result.int_32 != NR_CURSEG_PERSIST_TYPE)
+ return -EINVAL;
+ ctx->spec_mask |= F2FS_SPEC_active_logs;
+ F2FS_CTX_INFO(ctx).active_logs = result.int_32;
+ break;
+ case Opt_disable_ext_identify:
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_EXT_IDENTIFY);
+ break;
+ case Opt_inline_data:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DATA);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DATA);
+ break;
+ case Opt_inline_dentry:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_INLINE_DENTRY);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINE_DENTRY);
+ break;
+ case Opt_flush_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_FLUSH_MERGE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_FLUSH_MERGE);
+ break;
+ case Opt_barrier:
+ if (result.negated)
+ ctx_set_opt(ctx, F2FS_MOUNT_NOBARRIER);
+ else
+ ctx_clear_opt(ctx, F2FS_MOUNT_NOBARRIER);
+ break;
+ case Opt_fastboot:
+ ctx_set_opt(ctx, F2FS_MOUNT_FASTBOOT);
+ break;
+ case Opt_extent_cache:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE);
+ break;
+ case Opt_data_flush:
+ ctx_set_opt(ctx, F2FS_MOUNT_DATA_FLUSH);
+ break;
+ case Opt_reserve_root:
+ ctx_set_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
+ F2FS_CTX_INFO(ctx).root_reserved_blocks = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_reserve_root;
+ break;
+ case Opt_resuid:
+ F2FS_CTX_INFO(ctx).s_resuid = result.uid;
+ ctx->spec_mask |= F2FS_SPEC_resuid;
+ break;
+ case Opt_resgid:
+ F2FS_CTX_INFO(ctx).s_resgid = result.gid;
+ ctx->spec_mask |= F2FS_SPEC_resgid;
+ break;
+ case Opt_mode:
+ F2FS_CTX_INFO(ctx).fs_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_mode;
+ break;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- case Opt_fault_injection:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE))
- return -EINVAL;
- set_opt(sbi, FAULT_INJECTION);
- break;
+ case Opt_fault_injection:
+ F2FS_CTX_INFO(ctx).fault_info.inject_rate = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_fault_injection;
+ ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION);
+ break;
- case Opt_fault_type:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE))
- return -EINVAL;
- set_opt(sbi, FAULT_INJECTION);
- break;
+ case Opt_fault_type:
+ if (result.uint_32 > BIT(FAULT_MAX))
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).fault_info.inject_type = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_fault_type;
+ ctx_set_opt(ctx, F2FS_MOUNT_FAULT_INJECTION);
+ break;
#else
- case Opt_fault_injection:
- case Opt_fault_type:
- f2fs_info(sbi, "fault injection options not supported");
- break;
+ case Opt_fault_injection:
+ case Opt_fault_type:
+ f2fs_info(NULL, "%s options not supported", param->key);
+ break;
#endif
- case Opt_lazytime:
- set_opt(sbi, LAZYTIME);
- break;
- case Opt_nolazytime:
- clear_opt(sbi, LAZYTIME);
- break;
+ case Opt_lazytime:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_LAZYTIME);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_LAZYTIME);
+ break;
#ifdef CONFIG_QUOTA
- case Opt_quota:
- case Opt_usrquota:
- set_opt(sbi, USRQUOTA);
- break;
- case Opt_grpquota:
- set_opt(sbi, GRPQUOTA);
- break;
- case Opt_prjquota:
- set_opt(sbi, PRJQUOTA);
- break;
- case Opt_usrjquota:
- ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_grpjquota:
- ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_prjjquota:
- ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]);
- if (ret)
- return ret;
- break;
- case Opt_offusrjquota:
- ret = f2fs_clear_qf_name(sbi, USRQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_offgrpjquota:
- ret = f2fs_clear_qf_name(sbi, GRPQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_offprjjquota:
- ret = f2fs_clear_qf_name(sbi, PRJQUOTA);
- if (ret)
- return ret;
- break;
- case Opt_jqfmt_vfsold:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD;
- break;
- case Opt_jqfmt_vfsv0:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0;
- break;
- case Opt_jqfmt_vfsv1:
- F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1;
- break;
- case Opt_noquota:
- clear_opt(sbi, QUOTA);
- clear_opt(sbi, USRQUOTA);
- clear_opt(sbi, GRPQUOTA);
- clear_opt(sbi, PRJQUOTA);
- break;
+ case Opt_quota:
+ if (result.negated) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_QUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ } else
+ ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ break;
+ case Opt_usrquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ break;
+ case Opt_grpquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ break;
+ case Opt_prjquota:
+ ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ break;
+ case Opt_usrjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, USRQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, USRQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_grpjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, GRPQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, GRPQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_prjjquota:
+ if (!*param->string)
+ ret = f2fs_unnote_qf_name(fc, PRJQUOTA);
+ else
+ ret = f2fs_note_qf_name(fc, PRJQUOTA, param);
+ if (ret)
+ return ret;
+ break;
+ case Opt_jqfmt:
+ F2FS_CTX_INFO(ctx).s_jquota_fmt = result.int_32;
+ ctx->spec_mask |= F2FS_SPEC_jqfmt;
+ break;
#else
- case Opt_quota:
- case Opt_usrquota:
- case Opt_grpquota:
- case Opt_prjquota:
- case Opt_usrjquota:
- case Opt_grpjquota:
- case Opt_prjjquota:
- case Opt_offusrjquota:
- case Opt_offgrpjquota:
- case Opt_offprjjquota:
- case Opt_jqfmt_vfsold:
- case Opt_jqfmt_vfsv0:
- case Opt_jqfmt_vfsv1:
- case Opt_noquota:
- f2fs_info(sbi, "quota operations not supported");
- break;
+ case Opt_quota:
+ case Opt_usrquota:
+ case Opt_grpquota:
+ case Opt_prjquota:
+ case Opt_usrjquota:
+ case Opt_grpjquota:
+ case Opt_prjjquota:
+ f2fs_info(NULL, "quota operations not supported");
+ break;
#endif
- case Opt_alloc:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
-
- if (!strcmp(name, "default")) {
- F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
- } else if (!strcmp(name, "reuse")) {
- F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_fsync:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "posix")) {
- F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
- } else if (!strcmp(name, "strict")) {
- F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
- } else if (!strcmp(name, "nobarrier")) {
- F2FS_OPTION(sbi).fsync_mode =
- FSYNC_MODE_NOBARRIER;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_test_dummy_encryption:
- ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0],
- is_remount);
- if (ret)
- return ret;
- break;
- case Opt_inlinecrypt:
+ case Opt_alloc:
+ F2FS_CTX_INFO(ctx).alloc_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_alloc_mode;
+ break;
+ case Opt_fsync:
+ F2FS_CTX_INFO(ctx).fsync_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_fsync_mode;
+ break;
+ case Opt_test_dummy_encryption:
+ ret = f2fs_parse_test_dummy_encryption(param, ctx);
+ if (ret)
+ return ret;
+ break;
+ case Opt_inlinecrypt:
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
- set_opt(sbi, INLINECRYPT);
+ ctx_set_opt(ctx, F2FS_MOUNT_INLINECRYPT);
#else
- f2fs_info(sbi, "inline encryption not supported");
+ f2fs_info(NULL, "inline encryption not supported");
#endif
- break;
+ break;
+ case Opt_checkpoint:
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].from = args[0].to = NULL;
+ arg = 0;
+
+ /* revert to match_table for checkpoint= options */
+ token = match_token(param->string, f2fs_checkpoint_tokens, args);
+ switch (token) {
case Opt_checkpoint_disable_cap_perc:
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg < 0 || arg > 100)
return -EINVAL;
- F2FS_OPTION(sbi).unusable_cap_perc = arg;
- set_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_CTX_INFO(ctx).unusable_cap_perc = arg;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable_cap:
if (args->from && match_int(args, &arg))
return -EINVAL;
- F2FS_OPTION(sbi).unusable_cap = arg;
- set_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_CTX_INFO(ctx).unusable_cap = arg;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable:
- set_opt(sbi, DISABLE_CHECKPOINT);
+ ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
- clear_opt(sbi, DISABLE_CHECKPOINT);
- break;
- case Opt_checkpoint_merge:
- set_opt(sbi, MERGE_CHECKPOINT);
- break;
- case Opt_nocheckpoint_merge:
- clear_opt(sbi, MERGE_CHECKPOINT);
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case Opt_checkpoint_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_MERGE_CHECKPOINT);
+ break;
#ifdef CONFIG_F2FS_FS_COMPRESSION
- case Opt_compress_algorithm:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "lzo")) {
+ case Opt_compress_algorithm:
+ name = param->string;
+ if (!strcmp(name, "lzo")) {
#ifdef CONFIG_F2FS_FS_LZO
- F2FS_OPTION(sbi).compress_level = 0;
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZO;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZO;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lzo compression");
+ f2fs_info(NULL, "kernel doesn't support lzo compression");
#endif
- } else if (!strncmp(name, "lz4", 3)) {
+ } else if (!strncmp(name, "lz4", 3)) {
#ifdef CONFIG_F2FS_FS_LZ4
- ret = f2fs_set_lz4hc_level(sbi, name);
- if (ret) {
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZ4;
+ ret = f2fs_set_lz4hc_level(ctx, name);
+ if (ret)
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZ4;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lz4 compression");
+ f2fs_info(NULL, "kernel doesn't support lz4 compression");
#endif
- } else if (!strncmp(name, "zstd", 4)) {
+ } else if (!strncmp(name, "zstd", 4)) {
#ifdef CONFIG_F2FS_FS_ZSTD
- ret = f2fs_set_zstd_level(sbi, name);
- if (ret) {
- kfree(name);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_ZSTD;
+ ret = f2fs_set_zstd_level(ctx, name);
+ if (ret)
+ return -EINVAL;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_ZSTD;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support zstd compression");
+ f2fs_info(NULL, "kernel doesn't support zstd compression");
#endif
- } else if (!strcmp(name, "lzo-rle")) {
+ } else if (!strcmp(name, "lzo-rle")) {
#ifdef CONFIG_F2FS_FS_LZORLE
- F2FS_OPTION(sbi).compress_level = 0;
- F2FS_OPTION(sbi).compress_algorithm =
- COMPRESS_LZORLE;
+ F2FS_CTX_INFO(ctx).compress_level = 0;
+ F2FS_CTX_INFO(ctx).compress_algorithm = COMPRESS_LZORLE;
+ ctx->spec_mask |= F2FS_SPEC_compress_level;
+ ctx->spec_mask |= F2FS_SPEC_compress_algorithm;
#else
- f2fs_info(sbi, "kernel doesn't support lzorle compression");
+ f2fs_info(NULL, "kernel doesn't support lzorle compression");
#endif
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
+ } else
+ return -EINVAL;
+ break;
+ case Opt_compress_log_size:
+ if (result.uint_32 < MIN_COMPRESS_LOG_SIZE ||
+ result.uint_32 > MAX_COMPRESS_LOG_SIZE) {
+ f2fs_err(NULL,
+ "Compress cluster log size is out of range");
+ return -EINVAL;
+ }
+ F2FS_CTX_INFO(ctx).compress_log_size = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_compress_log_size;
+ break;
+ case Opt_compress_extension:
+ name = param->string;
+ ext = F2FS_CTX_INFO(ctx).extensions;
+ ext_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN ||
+ ext_cnt >= COMPRESS_EXT_NUM) {
+ f2fs_err(NULL, "invalid extension length/number");
+ return -EINVAL;
+ }
+
+ if (is_compress_extension_exist(&ctx->info, name, true))
break;
- case Opt_compress_log_size:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg < MIN_COMPRESS_LOG_SIZE ||
- arg > MAX_COMPRESS_LOG_SIZE) {
- f2fs_err(sbi,
- "Compress cluster log size is out of range");
- return -EINVAL;
- }
- F2FS_OPTION(sbi).compress_log_size = arg;
+
+ ret = strscpy(ext[ext_cnt], name, F2FS_EXTENSION_LEN);
+ if (ret < 0)
+ return ret;
+ F2FS_CTX_INFO(ctx).compress_ext_cnt++;
+ ctx->spec_mask |= F2FS_SPEC_compress_extension;
+ break;
+ case Opt_nocompress_extension:
+ name = param->string;
+ noext = F2FS_CTX_INFO(ctx).noextensions;
+ noext_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+
+ if (strlen(name) >= F2FS_EXTENSION_LEN ||
+ noext_cnt >= COMPRESS_EXT_NUM) {
+ f2fs_err(NULL, "invalid extension length/number");
+ return -EINVAL;
+ }
+
+ if (is_compress_extension_exist(&ctx->info, name, false))
break;
- case Opt_compress_extension:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- ext = F2FS_OPTION(sbi).extensions;
- ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ ret = strscpy(noext[noext_cnt], name, F2FS_EXTENSION_LEN);
+ if (ret < 0)
+ return ret;
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt++;
+ ctx->spec_mask |= F2FS_SPEC_nocompress_extension;
+ break;
+ case Opt_compress_chksum:
+ F2FS_CTX_INFO(ctx).compress_chksum = true;
+ ctx->spec_mask |= F2FS_SPEC_compress_chksum;
+ break;
+ case Opt_compress_mode:
+ F2FS_CTX_INFO(ctx).compress_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_compress_mode;
+ break;
+ case Opt_compress_cache:
+ ctx_set_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE);
+ break;
+#else
+ case Opt_compress_algorithm:
+ case Opt_compress_log_size:
+ case Opt_compress_extension:
+ case Opt_nocompress_extension:
+ case Opt_compress_chksum:
+ case Opt_compress_mode:
+ case Opt_compress_cache:
+ f2fs_info(NULL, "compression options not supported");
+ break;
+#endif
+ case Opt_atgc:
+ ctx_set_opt(ctx, F2FS_MOUNT_ATGC);
+ break;
+ case Opt_gc_merge:
+ if (result.negated)
+ ctx_clear_opt(ctx, F2FS_MOUNT_GC_MERGE);
+ else
+ ctx_set_opt(ctx, F2FS_MOUNT_GC_MERGE);
+ break;
+ case Opt_discard_unit:
+ F2FS_CTX_INFO(ctx).discard_unit = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_discard_unit;
+ break;
+ case Opt_memory_mode:
+ F2FS_CTX_INFO(ctx).memory_mode = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_memory_mode;
+ break;
+ case Opt_age_extent_cache:
+ ctx_set_opt(ctx, F2FS_MOUNT_AGE_EXTENT_CACHE);
+ break;
+ case Opt_errors:
+ F2FS_CTX_INFO(ctx).errors = result.uint_32;
+ ctx->spec_mask |= F2FS_SPEC_errors;
+ break;
+ case Opt_nat_bits:
+ ctx_set_opt(ctx, F2FS_MOUNT_NAT_BITS);
+ break;
+ }
+ return 0;
+}
- if (strlen(name) >= F2FS_EXTENSION_LEN ||
- ext_cnt >= COMPRESS_EXT_NUM) {
- f2fs_err(sbi,
- "invalid extension length/number");
- kfree(name);
- return -EINVAL;
- }
+/*
+ * Check quota settings consistency.
+ */
+static int f2fs_check_quota_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ #ifdef CONFIG_QUOTA
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ bool quota_feature = f2fs_sb_has_quota_ino(sbi);
+ bool quota_turnon = sb_any_quota_loaded(sb);
+ char *old_qname, *new_qname;
+ bool usr_qf_name, grp_qf_name, prj_qf_name, usrquota, grpquota, prjquota;
+ int i;
- if (is_compress_extension_exist(sbi, name, true)) {
- kfree(name);
- break;
- }
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA) &&
+ !f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
+ return -EINVAL;
+ }
- ret = strscpy(ext[ext_cnt], name);
- if (ret < 0) {
- kfree(name);
- return ret;
- }
- F2FS_OPTION(sbi).compress_ext_cnt++;
- kfree(name);
- break;
- case Opt_nocompress_extension:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
+ if (ctx->qname_mask) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (!(ctx->qname_mask & (1 << i)))
+ continue;
- noext = F2FS_OPTION(sbi).noextensions;
- noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ old_qname = F2FS_OPTION(sbi).s_qf_names[i];
+ new_qname = F2FS_CTX_INFO(ctx).s_qf_names[i];
+ if (quota_turnon &&
+ !!old_qname != !!new_qname)
+ goto err_jquota_change;
- if (strlen(name) >= F2FS_EXTENSION_LEN ||
- noext_cnt >= COMPRESS_EXT_NUM) {
- f2fs_err(sbi,
- "invalid extension length/number");
- kfree(name);
- return -EINVAL;
+ if (old_qname) {
+ if (strcmp(old_qname, new_qname) == 0) {
+ ctx->qname_mask &= ~(1 << i);
+ continue;
+ }
+ goto err_jquota_specified;
}
- if (is_compress_extension_exist(sbi, name, false)) {
- kfree(name);
- break;
+ if (quota_feature) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
+ ctx->qname_mask &= ~(1 << i);
+ kfree(F2FS_CTX_INFO(ctx).s_qf_names[i]);
+ F2FS_CTX_INFO(ctx).s_qf_names[i] = NULL;
}
+ }
+ }
+
+ /* Make sure we don't mix old and new quota format */
+ usr_qf_name = F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[USRQUOTA];
+ grp_qf_name = F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[GRPQUOTA];
+ prj_qf_name = F2FS_OPTION(sbi).s_qf_names[PRJQUOTA] ||
+ F2FS_CTX_INFO(ctx).s_qf_names[PRJQUOTA];
+ usrquota = test_opt(sbi, USRQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ grpquota = test_opt(sbi, GRPQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ prjquota = test_opt(sbi, PRJQUOTA) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+
+ if (usr_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_USRQUOTA);
+ usrquota = false;
+ }
+ if (grp_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_GRPQUOTA);
+ grpquota = false;
+ }
+ if (prj_qf_name) {
+ ctx_clear_opt(ctx, F2FS_MOUNT_PRJQUOTA);
+ prjquota = false;
+ }
+ if (usr_qf_name || grp_qf_name || prj_qf_name) {
+ if (grpquota || usrquota || prjquota) {
+ f2fs_err(sbi, "old and new quota format mixing");
+ return -EINVAL;
+ }
+ if (!(ctx->spec_mask & F2FS_SPEC_jqfmt ||
+ F2FS_OPTION(sbi).s_jquota_fmt)) {
+ f2fs_err(sbi, "journaled quota format not specified");
+ return -EINVAL;
+ }
+ }
+ return 0;
+
+err_jquota_change:
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
+ return -EINVAL;
+err_jquota_specified:
+ f2fs_err(sbi, "%s quota file already specified",
+ QTYPE2NAME(i));
+ return -EINVAL;
- ret = strscpy(noext[noext_cnt], name);
- if (ret < 0) {
- kfree(name);
- return ret;
- }
- F2FS_OPTION(sbi).nocompress_ext_cnt++;
- kfree(name);
- break;
- case Opt_compress_chksum:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- F2FS_OPTION(sbi).compress_chksum = true;
- break;
- case Opt_compress_mode:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "fs")) {
- F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
- } else if (!strcmp(name, "user")) {
- F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_compress_cache:
- if (!f2fs_sb_has_compression(sbi)) {
- f2fs_info(sbi, "Image doesn't support compression");
- break;
- }
- set_opt(sbi, COMPRESS_CACHE);
- break;
#else
- case Opt_compress_algorithm:
- case Opt_compress_log_size:
- case Opt_compress_extension:
- case Opt_nocompress_extension:
- case Opt_compress_chksum:
- case Opt_compress_mode:
- case Opt_compress_cache:
- f2fs_info(sbi, "compression options not supported");
- break;
+ if (f2fs_readonly(sbi->sb))
+ return 0;
+ if (f2fs_sb_has_quota_ino(sbi)) {
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi)) {
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+
+ return 0;
#endif
- case Opt_atgc:
- set_opt(sbi, ATGC);
- break;
- case Opt_gc_merge:
- set_opt(sbi, GC_MERGE);
- break;
- case Opt_nogc_merge:
- clear_opt(sbi, GC_MERGE);
- break;
- case Opt_discard_unit:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "block")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_BLOCK;
- } else if (!strcmp(name, "segment")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SEGMENT;
- } else if (!strcmp(name, "section")) {
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SECTION;
- } else {
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- break;
- case Opt_memory_mode:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "normal")) {
- F2FS_OPTION(sbi).memory_mode =
- MEMORY_MODE_NORMAL;
- } else if (!strcmp(name, "low")) {
- F2FS_OPTION(sbi).memory_mode =
- MEMORY_MODE_LOW;
- } else {
- kfree(name);
- return -EINVAL;
+}
+
+static int f2fs_check_test_dummy_encryption(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy))
+ return 0;
+
+ if (!f2fs_sb_has_encrypt(sbi)) {
+ f2fs_err(sbi, "Encrypt feature is off");
+ return -EINVAL;
+ }
+
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&F2FS_OPTION(sbi).dummy_enc_policy,
+ &F2FS_CTX_INFO(ctx).dummy_enc_policy))
+ return 0;
+ f2fs_warn(sbi, "Can't set or change test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static inline bool test_compression_spec(unsigned int mask)
+{
+ return mask & (F2FS_SPEC_compress_algorithm
+ | F2FS_SPEC_compress_log_size
+ | F2FS_SPEC_compress_extension
+ | F2FS_SPEC_nocompress_extension
+ | F2FS_SPEC_compress_chksum
+ | F2FS_SPEC_compress_mode);
+}
+
+static inline void clear_compression_spec(struct f2fs_fs_context *ctx)
+{
+ ctx->spec_mask &= ~(F2FS_SPEC_compress_algorithm
+ | F2FS_SPEC_compress_log_size
+ | F2FS_SPEC_compress_extension
+ | F2FS_SPEC_nocompress_extension
+ | F2FS_SPEC_compress_chksum
+ | F2FS_SPEC_compress_mode);
+}
+
+static int f2fs_check_compression(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i, cnt;
+
+ if (!f2fs_sb_has_compression(sbi)) {
+ if (test_compression_spec(ctx->spec_mask) ||
+ ctx_test_opt(ctx, F2FS_MOUNT_COMPRESS_CACHE))
+ f2fs_info(sbi, "Image doesn't support compression");
+ clear_compression_spec(ctx);
+ ctx->opt_mask &= ~F2FS_MOUNT_COMPRESS_CACHE;
+ return 0;
+ }
+ if (ctx->spec_mask & F2FS_SPEC_compress_extension) {
+ cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+ for (i = 0; i < F2FS_CTX_INFO(ctx).compress_ext_cnt; i++) {
+ if (is_compress_extension_exist(&F2FS_OPTION(sbi),
+ F2FS_CTX_INFO(ctx).extensions[i], true)) {
+ F2FS_CTX_INFO(ctx).extensions[i][0] = '\0';
+ cnt--;
}
- kfree(name);
- break;
- case Opt_age_extent_cache:
- set_opt(sbi, AGE_EXTENT_CACHE);
- break;
- case Opt_errors:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "remount-ro")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_READONLY;
- } else if (!strcmp(name, "continue")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_CONTINUE;
- } else if (!strcmp(name, "panic")) {
- F2FS_OPTION(sbi).errors =
- MOUNT_ERRORS_PANIC;
- } else {
- kfree(name);
- return -EINVAL;
+ }
+ if (F2FS_OPTION(sbi).compress_ext_cnt + cnt > COMPRESS_EXT_NUM) {
+ f2fs_err(sbi, "invalid extension length/number");
+ return -EINVAL;
+ }
+ }
+ if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) {
+ cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+ for (i = 0; i < F2FS_CTX_INFO(ctx).nocompress_ext_cnt; i++) {
+ if (is_compress_extension_exist(&F2FS_OPTION(sbi),
+ F2FS_CTX_INFO(ctx).noextensions[i], false)) {
+ F2FS_CTX_INFO(ctx).noextensions[i][0] = '\0';
+ cnt--;
}
- kfree(name);
- break;
- case Opt_nat_bits:
- set_opt(sbi, NAT_BITS);
- break;
- default:
- f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
- p);
+ }
+ if (F2FS_OPTION(sbi).nocompress_ext_cnt + cnt > COMPRESS_EXT_NUM) {
+ f2fs_err(sbi, "invalid noextension length/number");
return -EINVAL;
}
}
+
+ if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions,
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt,
+ F2FS_CTX_INFO(ctx).extensions,
+ F2FS_CTX_INFO(ctx).compress_ext_cnt)) {
+ f2fs_err(sbi, "new noextensions conflicts with new extensions");
+ return -EINVAL;
+ }
+ if (f2fs_test_compress_extension(F2FS_CTX_INFO(ctx).noextensions,
+ F2FS_CTX_INFO(ctx).nocompress_ext_cnt,
+ F2FS_OPTION(sbi).extensions,
+ F2FS_OPTION(sbi).compress_ext_cnt)) {
+ f2fs_err(sbi, "new noextensions conflicts with old extensions");
+ return -EINVAL;
+ }
+ if (f2fs_test_compress_extension(F2FS_OPTION(sbi).noextensions,
+ F2FS_OPTION(sbi).nocompress_ext_cnt,
+ F2FS_CTX_INFO(ctx).extensions,
+ F2FS_CTX_INFO(ctx).compress_ext_cnt)) {
+ f2fs_err(sbi, "new extensions conflicts with old noextensions");
+ return -EINVAL;
+ }
+#endif
return 0;
}
-static int f2fs_default_check(struct f2fs_sb_info *sbi)
+static int f2fs_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb)
{
-#ifdef CONFIG_QUOTA
- if (f2fs_check_quota_options(sbi))
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int err;
+
+ if (ctx_test_opt(ctx, F2FS_MOUNT_NORECOVERY) && !f2fs_readonly(sb))
return -EINVAL;
-#else
- if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+
+ if (f2fs_hw_should_discard(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_DISCARD) &&
+ !ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) {
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
- if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
+
+ if (!f2fs_hw_support_discard(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_DISCARD) &&
+ ctx_test_opt(ctx, F2FS_MOUNT_DISCARD)) {
+ f2fs_warn(sbi, "device does not support discard");
+ ctx_clear_opt(ctx, F2FS_MOUNT_DISCARD);
+ ctx->opt_mask &= ~F2FS_MOUNT_DISCARD;
+ }
+
+ if (f2fs_sb_has_device_alias(sbi) &&
+ (ctx->opt_mask & F2FS_MOUNT_READ_EXTENT_CACHE) &&
+ !ctx_test_opt(ctx, F2FS_MOUNT_READ_EXTENT_CACHE)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
return -EINVAL;
}
-#endif
+
+ if (test_opt(sbi, RESERVE_ROOT) &&
+ (ctx->opt_mask & F2FS_MOUNT_RESERVE_ROOT) &&
+ ctx_test_opt(ctx, F2FS_MOUNT_RESERVE_ROOT)) {
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
+ ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT);
+ ctx->opt_mask &= ~F2FS_MOUNT_RESERVE_ROOT;
+ }
+
+ err = f2fs_check_test_dummy_encryption(fc, sb);
+ if (err)
+ return err;
+
+ err = f2fs_check_compression(fc, sb);
+ if (err)
+ return err;
+
+ err = f2fs_check_quota_consistency(fc, sb);
+ if (err)
+ return err;
if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
@@ -1354,15 +1449,19 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
* devices, but mandatory for host-managed zoned block devices.
*/
if (f2fs_sb_has_blkzoned(sbi)) {
+ if (F2FS_CTX_INFO(ctx).bggc_mode == BGGC_MODE_OFF) {
+ f2fs_warn(sbi, "zoned devices need bggc");
+ return -EINVAL;
+ }
#ifdef CONFIG_BLK_DEV_ZONED
- if (F2FS_OPTION(sbi).discard_unit !=
- DISCARD_UNIT_SECTION) {
+ if ((ctx->spec_mask & F2FS_SPEC_discard_unit) &&
+ F2FS_CTX_INFO(ctx).discard_unit != DISCARD_UNIT_SECTION) {
f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
- F2FS_OPTION(sbi).discard_unit =
- DISCARD_UNIT_SECTION;
+ F2FS_CTX_INFO(ctx).discard_unit = DISCARD_UNIT_SECTION;
}
- if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) {
+ if ((ctx->spec_mask & F2FS_SPEC_mode) &&
+ F2FS_CTX_INFO(ctx).fs_mode != FS_MODE_LFS) {
f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature");
return -EINVAL;
}
@@ -1372,43 +1471,25 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
#endif
}
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (f2fs_test_compress_extension(sbi)) {
- f2fs_err(sbi, "invalid compress or nocompress extension");
- return -EINVAL;
- }
-#endif
-
- if (test_opt(sbi, INLINE_XATTR_SIZE)) {
- int min_size, max_size;
-
+ if (ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR_SIZE)) {
if (!f2fs_sb_has_extra_attr(sbi) ||
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
- if (!test_opt(sbi, INLINE_XATTR)) {
+ if (!ctx_test_opt(ctx, F2FS_MOUNT_INLINE_XATTR) && !test_opt(sbi, INLINE_XATTR)) {
f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
-
- min_size = MIN_INLINE_XATTR_SIZE;
- max_size = MAX_INLINE_XATTR_SIZE;
-
- if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
- F2FS_OPTION(sbi).inline_xattr_size > max_size) {
- f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
- min_size, max_size);
- return -EINVAL;
- }
}
- if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
+ if (ctx_test_opt(ctx, F2FS_MOUNT_ATGC) &&
+ F2FS_CTX_INFO(ctx).fs_mode == FS_MODE_LFS) {
f2fs_err(sbi, "LFS is not compatible with ATGC");
return -EINVAL;
}
- if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) {
+ if (f2fs_is_readonly(sbi) && ctx_test_opt(ctx, F2FS_MOUNT_FLUSH_MERGE)) {
f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode");
return -EINVAL;
}
@@ -1417,12 +1498,190 @@ static int f2fs_default_check(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
}
+ return 0;
+}
+
+static void f2fs_apply_quota_options(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ bool quota_feature = f2fs_sb_has_quota_ino(sbi);
+ char *qname;
+ int i;
+
+ if (quota_feature)
+ return;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (!(ctx->qname_mask & (1 << i)))
+ continue;
+
+ qname = F2FS_CTX_INFO(ctx).s_qf_names[i];
+ if (qname) {
+ qname = kstrdup(F2FS_CTX_INFO(ctx).s_qf_names[i],
+ GFP_KERNEL | __GFP_NOFAIL);
+ set_opt(sbi, QUOTA);
+ }
+ F2FS_OPTION(sbi).s_qf_names[i] = qname;
+ }
+
+ if (ctx->spec_mask & F2FS_SPEC_jqfmt)
+ F2FS_OPTION(sbi).s_jquota_fmt = F2FS_CTX_INFO(ctx).s_jquota_fmt;
+
+ if (quota_feature && F2FS_OPTION(sbi).s_jquota_fmt) {
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
+ F2FS_OPTION(sbi).s_jquota_fmt = 0;
+ }
+#endif
+}
+
+static void f2fs_apply_test_dummy_encryption(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&F2FS_CTX_INFO(ctx).dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&F2FS_OPTION(sbi).dummy_enc_policy))
+ return;
+ swap(F2FS_OPTION(sbi).dummy_enc_policy, F2FS_CTX_INFO(ctx).dummy_enc_policy);
+ f2fs_warn(sbi, "Test dummy encryption mode enabled");
+}
- if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) {
- f2fs_err(sbi, "norecovery requires readonly mount");
+static void f2fs_apply_compression(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned char (*ctx_ext)[F2FS_EXTENSION_LEN];
+ unsigned char (*sbi_ext)[F2FS_EXTENSION_LEN];
+ int ctx_cnt, sbi_cnt, i;
+
+ if (ctx->spec_mask & F2FS_SPEC_compress_level)
+ F2FS_OPTION(sbi).compress_level =
+ F2FS_CTX_INFO(ctx).compress_level;
+ if (ctx->spec_mask & F2FS_SPEC_compress_algorithm)
+ F2FS_OPTION(sbi).compress_algorithm =
+ F2FS_CTX_INFO(ctx).compress_algorithm;
+ if (ctx->spec_mask & F2FS_SPEC_compress_log_size)
+ F2FS_OPTION(sbi).compress_log_size =
+ F2FS_CTX_INFO(ctx).compress_log_size;
+ if (ctx->spec_mask & F2FS_SPEC_compress_chksum)
+ F2FS_OPTION(sbi).compress_chksum =
+ F2FS_CTX_INFO(ctx).compress_chksum;
+ if (ctx->spec_mask & F2FS_SPEC_compress_mode)
+ F2FS_OPTION(sbi).compress_mode =
+ F2FS_CTX_INFO(ctx).compress_mode;
+ if (ctx->spec_mask & F2FS_SPEC_compress_extension) {
+ ctx_ext = F2FS_CTX_INFO(ctx).extensions;
+ ctx_cnt = F2FS_CTX_INFO(ctx).compress_ext_cnt;
+ sbi_ext = F2FS_OPTION(sbi).extensions;
+ sbi_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+ for (i = 0; i < ctx_cnt; i++) {
+ if (strlen(ctx_ext[i]) == 0)
+ continue;
+ strscpy(sbi_ext[sbi_cnt], ctx_ext[i]);
+ sbi_cnt++;
+ }
+ F2FS_OPTION(sbi).compress_ext_cnt = sbi_cnt;
+ }
+ if (ctx->spec_mask & F2FS_SPEC_nocompress_extension) {
+ ctx_ext = F2FS_CTX_INFO(ctx).noextensions;
+ ctx_cnt = F2FS_CTX_INFO(ctx).nocompress_ext_cnt;
+ sbi_ext = F2FS_OPTION(sbi).noextensions;
+ sbi_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+ for (i = 0; i < ctx_cnt; i++) {
+ if (strlen(ctx_ext[i]) == 0)
+ continue;
+ strscpy(sbi_ext[sbi_cnt], ctx_ext[i]);
+ sbi_cnt++;
+ }
+ F2FS_OPTION(sbi).nocompress_ext_cnt = sbi_cnt;
+ }
+#endif
+}
+
+static void f2fs_apply_options(struct fs_context *fc, struct super_block *sb)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ F2FS_OPTION(sbi).opt &= ~ctx->opt_mask;
+ F2FS_OPTION(sbi).opt |= F2FS_CTX_INFO(ctx).opt;
+
+ if (ctx->spec_mask & F2FS_SPEC_background_gc)
+ F2FS_OPTION(sbi).bggc_mode = F2FS_CTX_INFO(ctx).bggc_mode;
+ if (ctx->spec_mask & F2FS_SPEC_inline_xattr_size)
+ F2FS_OPTION(sbi).inline_xattr_size =
+ F2FS_CTX_INFO(ctx).inline_xattr_size;
+ if (ctx->spec_mask & F2FS_SPEC_active_logs)
+ F2FS_OPTION(sbi).active_logs = F2FS_CTX_INFO(ctx).active_logs;
+ if (ctx->spec_mask & F2FS_SPEC_reserve_root)
+ F2FS_OPTION(sbi).root_reserved_blocks =
+ F2FS_CTX_INFO(ctx).root_reserved_blocks;
+ if (ctx->spec_mask & F2FS_SPEC_resgid)
+ F2FS_OPTION(sbi).s_resgid = F2FS_CTX_INFO(ctx).s_resgid;
+ if (ctx->spec_mask & F2FS_SPEC_resuid)
+ F2FS_OPTION(sbi).s_resuid = F2FS_CTX_INFO(ctx).s_resuid;
+ if (ctx->spec_mask & F2FS_SPEC_mode)
+ F2FS_OPTION(sbi).fs_mode = F2FS_CTX_INFO(ctx).fs_mode;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (ctx->spec_mask & F2FS_SPEC_fault_injection)
+ (void)f2fs_build_fault_attr(sbi,
+ F2FS_CTX_INFO(ctx).fault_info.inject_rate, 0, FAULT_RATE);
+ if (ctx->spec_mask & F2FS_SPEC_fault_type)
+ (void)f2fs_build_fault_attr(sbi, 0,
+ F2FS_CTX_INFO(ctx).fault_info.inject_type, FAULT_TYPE);
+#endif
+ if (ctx->spec_mask & F2FS_SPEC_alloc_mode)
+ F2FS_OPTION(sbi).alloc_mode = F2FS_CTX_INFO(ctx).alloc_mode;
+ if (ctx->spec_mask & F2FS_SPEC_fsync_mode)
+ F2FS_OPTION(sbi).fsync_mode = F2FS_CTX_INFO(ctx).fsync_mode;
+ if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap)
+ F2FS_OPTION(sbi).unusable_cap = F2FS_CTX_INFO(ctx).unusable_cap;
+ if (ctx->spec_mask & F2FS_SPEC_checkpoint_disable_cap_perc)
+ F2FS_OPTION(sbi).unusable_cap_perc =
+ F2FS_CTX_INFO(ctx).unusable_cap_perc;
+ if (ctx->spec_mask & F2FS_SPEC_discard_unit)
+ F2FS_OPTION(sbi).discard_unit = F2FS_CTX_INFO(ctx).discard_unit;
+ if (ctx->spec_mask & F2FS_SPEC_memory_mode)
+ F2FS_OPTION(sbi).memory_mode = F2FS_CTX_INFO(ctx).memory_mode;
+ if (ctx->spec_mask & F2FS_SPEC_errors)
+ F2FS_OPTION(sbi).errors = F2FS_CTX_INFO(ctx).errors;
+
+ f2fs_apply_compression(fc, sb);
+ f2fs_apply_test_dummy_encryption(fc, sb);
+ f2fs_apply_quota_options(fc, sb);
+}
+
+static int f2fs_sanity_check_options(struct f2fs_sb_info *sbi, bool remount)
+{
+ if (f2fs_sb_has_device_alias(sbi) &&
+ !test_opt(sbi, READ_EXTENT_CACHE)) {
+ f2fs_err(sbi, "device aliasing requires extent cache");
return -EINVAL;
}
+ if (!remount)
+ return 0;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
+#endif
+ if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) {
+ f2fs_warn(sbi, "LFS is not compatible with IPU");
+ return -EINVAL;
+ }
return 0;
}
@@ -1442,6 +1701,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
atomic_set(&fi->i_compr_blocks, 0);
+ atomic_set(&fi->open_count, 0);
init_f2fs_rwsem(&fi->i_sem);
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
@@ -1718,7 +1978,7 @@ static void f2fs_put_super(struct super_block *sb)
destroy_percpu_info(sbi);
f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
- kvfree(sbi->write_io[i]);
+ kfree(sbi->write_io[i]);
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
@@ -2329,11 +2589,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
f2fs_flush_ckpt_thread(sbi);
}
-static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+static int __f2fs_remount(struct fs_context *fc, struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
unsigned long old_sb_flags;
+ unsigned int flags = fc->sb_flags;
int err;
bool need_restart_gc = false, need_stop_gc = false;
bool need_restart_flush = false, need_stop_flush = false;
@@ -2379,7 +2640,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
#endif
/* recover superblocks we couldn't write due to previous RO mount */
- if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
+ if (!(flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
err);
@@ -2389,23 +2650,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
default_options(sbi, true);
- /* parse mount options */
- err = parse_options(sbi, data, true);
+ err = f2fs_check_opt_consistency(fc, sb);
if (err)
goto restore_opts;
-#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) &&
- sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
- f2fs_err(sbi,
- "zoned: max open zones %u is too small, need at least %u open zones",
- sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
- err = -EINVAL;
- goto restore_opts;
- }
-#endif
+ f2fs_apply_options(fc, sb);
- err = f2fs_default_check(sbi);
+ err = f2fs_sanity_check_options(sbi, true);
if (err)
goto restore_opts;
@@ -2416,20 +2667,20 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
*/
- if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
+ if (f2fs_readonly(sb) && (flags & SB_RDONLY))
goto skip;
- if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) {
+ if (f2fs_dev_is_readonly(sbi) && !(flags & SB_RDONLY)) {
err = -EROFS;
goto restore_opts;
}
#ifdef CONFIG_QUOTA
- if (!f2fs_readonly(sb) && (*flags & SB_RDONLY)) {
+ if (!f2fs_readonly(sb) && (flags & SB_RDONLY)) {
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
- } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
+ } else if (f2fs_readonly(sb) && !(flags & SB_RDONLY)) {
/* dquot_resume needs RW */
sb->s_flags &= ~SB_RDONLY;
if (sb_any_quota_suspended(sb)) {
@@ -2441,12 +2692,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
- if (f2fs_lfs_mode(sbi) && !IS_F2FS_IPU_DISABLE(sbi)) {
- err = -EINVAL;
- f2fs_warn(sbi, "LFS is not compatible with IPU");
- goto restore_opts;
- }
-
/* disallow enable atgc dynamically */
if (no_atgc == !!test_opt(sbi, ATGC)) {
err = -EINVAL;
@@ -2485,7 +2730,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+ if ((flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
@@ -2496,7 +2741,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if background_gc = off is passed in mount
* option. Also sync the filesystem.
*/
- if ((*flags & SB_RDONLY) ||
+ if ((flags & SB_RDONLY) ||
(F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
!test_opt(sbi, GC_MERGE))) {
if (sbi->gc_thread) {
@@ -2510,7 +2755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & SB_RDONLY) {
+ if (flags & SB_RDONLY) {
sync_inodes_sb(sb);
set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -2523,7 +2768,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
*/
- if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ if ((flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
need_restart_flush = true;
@@ -2565,11 +2810,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* triggered while remount and we need to take care of it before
* returning from remount.
*/
- if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ if ((flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
!test_opt(sbi, MERGE_CHECKPOINT)) {
f2fs_stop_ckpt_thread(sbi);
} else {
- /* Flush if the prevous checkpoint, if exists. */
+ /* Flush if the previous checkpoint, if exists. */
f2fs_flush_ckpt_thread(sbi);
err = f2fs_start_ckpt_thread(sbi);
@@ -2592,7 +2837,7 @@ skip:
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
- *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
+ fc->sb_flags = (flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
sbi->umount_lock_holder = NULL;
return 0;
@@ -3263,7 +3508,6 @@ static const struct super_operations f2fs_sops = {
.freeze_fs = f2fs_freeze,
.unfreeze_fs = f2fs_unfreeze,
.statfs = f2fs_statfs,
- .remount_fs = f2fs_remount,
.shutdown = f2fs_shutdown,
};
@@ -3451,6 +3695,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio,
f2fs_bug_on(sbi, 1);
ret = submit_bio_wait(bio);
+ bio_put(bio);
folio_end_writeback(folio);
return ret;
@@ -4522,14 +4767,14 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sbi->readdir_ra = true;
}
-static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
+static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct f2fs_fs_context *ctx = fc->fs_private;
struct f2fs_sb_info *sbi;
struct f2fs_super_block *raw_super;
struct inode *root;
int err;
bool skip_recovery = false, need_fsck = false;
- char *options = NULL;
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
int retry_cnt = 1;
@@ -4592,18 +4837,14 @@ try_onemore:
sizeof(raw_super->uuid));
default_options(sbi, false);
- /* parse mount options */
- options = kstrdup((const char *)data, GFP_KERNEL);
- if (data && !options) {
- err = -ENOMEM;
- goto free_sb_buf;
- }
- err = parse_options(sbi, options, false);
+ err = f2fs_check_opt_consistency(fc, sb);
if (err)
- goto free_options;
+ goto free_sb_buf;
+
+ f2fs_apply_options(fc, sb);
- err = f2fs_default_check(sbi);
+ err = f2fs_sanity_check_options(sbi, false);
if (err)
goto free_options;
@@ -4770,6 +5011,10 @@ try_onemore:
/* get segno of first zoned block device */
sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi);
+ sbi->reserved_pin_section = f2fs_sb_has_blkzoned(sbi) ?
+ ZONED_PIN_SEC_REQUIRED_COUNT :
+ GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi));
+
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
if (__exist_node_summaries(sbi))
@@ -4930,7 +5175,6 @@ reset_checkpoint:
if (err)
goto sync_free_meta;
}
- kvfree(options);
/* recover broken superblock */
if (recovery) {
@@ -5013,7 +5257,7 @@ free_iostat:
f2fs_destroy_iostat(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
- kvfree(sbi->write_io[i]);
+ kfree(sbi->write_io[i]);
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
@@ -5024,8 +5268,8 @@ free_options:
for (i = 0; i < MAXQUOTAS; i++)
kfree(F2FS_OPTION(sbi).s_qf_names[i]);
#endif
- fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
- kvfree(options);
+ /* no need to free dummy_enc_policy, we just keep it in ctx when failed */
+ swap(F2FS_CTX_INFO(ctx).dummy_enc_policy, F2FS_OPTION(sbi).dummy_enc_policy);
free_sb_buf:
kfree(raw_super);
free_sbi:
@@ -5041,12 +5285,39 @@ free_sbi:
return err;
}
-static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+static int f2fs_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
+ return get_tree_bdev(fc, f2fs_fill_super);
}
+static int f2fs_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+
+ return __f2fs_remount(fc, sb);
+}
+
+static void f2fs_fc_free(struct fs_context *fc)
+{
+ struct f2fs_fs_context *ctx = fc->fs_private;
+
+ if (!ctx)
+ return;
+
+#ifdef CONFIG_QUOTA
+ f2fs_unnote_qf_name_all(fc);
+#endif
+ fscrypt_free_dummy_policy(&F2FS_CTX_INFO(ctx).dummy_enc_policy);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations f2fs_context_ops = {
+ .parse_param = f2fs_parse_param,
+ .get_tree = f2fs_get_tree,
+ .reconfigure = f2fs_reconfigure,
+ .free = f2fs_fc_free,
+};
+
static void kill_f2fs_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -5088,10 +5359,24 @@ static void kill_f2fs_super(struct super_block *sb)
}
}
+static int f2fs_init_fs_context(struct fs_context *fc)
+{
+ struct f2fs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct f2fs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->fs_private = ctx;
+ fc->ops = &f2fs_context_ops;
+
+ return 0;
+}
+
static struct file_system_type f2fs_fs_type = {
.owner = THIS_MODULE,
.name = "f2fs",
- .mount = f2fs_mount,
+ .init_fs_context = f2fs_init_fs_context,
.kill_sb = kill_f2fs_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 75134d69a0bd..f736052dea50 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -628,6 +628,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -824,6 +845,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "reserved_pin_section")) {
+ if (t > GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) {
+ if (t < 1 || t > SEGS_PER_SEC(sbi))
+ return -EINVAL;
+ sbi->gc_thread->boost_gc_multiple = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_gc_greedy")) {
+ if (t > GC_GREEDY)
+ return -EINVAL;
+ sbi->gc_thread->boost_gc_greedy = (unsigned int)t;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -1050,6 +1092,8 @@ GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent);
GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent);
GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio);
+GC_THREAD_RW_ATTR(gc_boost_gc_multiple, boost_gc_multiple);
+GC_THREAD_RW_ATTR(gc_boost_gc_greedy, boost_gc_greedy);
/* SM_INFO ATTR */
SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments);
@@ -1130,6 +1174,7 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec);
F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
#endif
F2FS_SBI_GENERAL_RW_ATTR(carve_out);
+F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
/* STAT_INFO ATTR */
#ifdef CONFIG_F2FS_STAT_FS
@@ -1220,6 +1265,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_no_zoned_gc_percent),
ATTR_LIST(gc_boost_zoned_gc_percent),
ATTR_LIST(gc_valid_thresh_ratio),
+ ATTR_LIST(gc_boost_gc_multiple),
+ ATTR_LIST(gc_boost_gc_greedy),
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
@@ -1323,6 +1370,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(last_age_weight),
ATTR_LIST(max_read_extent_count),
ATTR_LIST(carve_out),
+ ATTR_LIST(reserved_pin_section),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 32048052c64a..5d07c469b571 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -127,6 +127,8 @@
#define __diag_GCC_8(s)
#endif
+#define __diag_GCC_all(s) __diag(s)
+
#define __diag_ignore_all(option, comment) \
__diag(__diag_GCC_ignore option)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 5206d63b3386..2f8b8bfc0e73 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -268,7 +268,7 @@ struct node_footer {
/* Node IDs in an Indirect Block */
#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
-#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page)))
+#define ADDRS_PER_PAGE(folio, inode) (addrs_per_page(inode, IS_INODE(folio)))
#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 8d0e3ad89b94..10dd161690a2 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -332,12 +332,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return (struct page *)page_private(bounce_page);
}
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
{
return folio->mapping == NULL;
}
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
{
return bounce_folio->private;
}
@@ -517,12 +518,13 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return ERR_PTR(-EINVAL);
}
-static inline bool fscrypt_is_bounce_folio(struct folio *folio)
+static inline bool fscrypt_is_bounce_folio(const struct folio *folio)
{
return false;
}
-static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
+static inline
+struct folio *fscrypt_pagecache_folio(const struct folio *bounce_folio)
{
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h
index 5a41c3bbcbe3..01da4582db6d 100644
--- a/include/linux/rtc/ds1685.h
+++ b/include/linux/rtc/ds1685.h
@@ -8,7 +8,7 @@
* include larger, battery-backed NV-SRAM, burst-mode access, and an RTC
* write counter.
*
- * Copyright (C) 2011-2014 Joshua Kinard <kumba@gentoo.org>.
+ * Copyright (C) 2011-2014 Joshua Kinard <linux@kumba.dev>.
* Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd-electronics.com>.
*
* References:
diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h
index 876130091384..f06f7b785091 100644
--- a/include/linux/sprintf.h
+++ b/include/linux/sprintf.h
@@ -23,7 +23,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list);
/* These are for specific cases, do not use without real need */
extern bool no_hash_pointers;
-int no_hash_pointers_enable(char *str);
+void hash_pointers_finalize(bool slub_debug);
/* Used for Rust formatting ('%pA') */
char *rust_fmt_argument(char *buf, char *end, const void *ptr);
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index bbed41ad29cf..ef282001f200 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -64,6 +64,7 @@ struct dev_printk_info;
extern struct printk_ringbuffer *prb;
extern bool printk_kthreads_running;
+extern bool printk_kthreads_ready;
extern bool debug_non_panic_cpus;
__printf(4, 0)
@@ -179,6 +180,7 @@ static inline void nbcon_kthread_wake(struct console *con)
#define PRINTKRB_RECORD_MAX 0
#define printk_kthreads_running (false)
+#define printk_kthreads_ready (false)
/*
* In !PRINTK builds we still export console_sem
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index fd12efcc4aed..646801813415 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -214,8 +214,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
/**
* nbcon_context_try_acquire_direct - Try to acquire directly
- * @ctxt: The context of the caller
- * @cur: The current console state
+ * @ctxt: The context of the caller
+ * @cur: The current console state
+ * @is_reacquire: This acquire is a reacquire
*
* Acquire the console when it is released. Also acquire the console when
* the current owner has a lower priority and the console is in a safe state.
@@ -225,17 +226,17 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
*
* Errors:
*
- * -EPERM: A panic is in progress and this is not the panic CPU.
- * Or the current owner or waiter has the same or higher
- * priority. No acquire method can be successful in
- * this case.
+ * -EPERM: A panic is in progress and this is neither the panic
+ * CPU nor is this a reacquire. Or the current owner or
+ * waiter has the same or higher priority. No acquire
+ * method can be successful in these cases.
*
* -EBUSY: The current owner has a lower priority but the console
* in an unsafe state. The caller should try using
* the handover acquire method.
*/
static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
- struct nbcon_state *cur)
+ struct nbcon_state *cur, bool is_reacquire)
{
unsigned int cpu = smp_processor_id();
struct console *con = ctxt->console;
@@ -243,14 +244,20 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
do {
/*
- * Panic does not imply that the console is owned. However, it
- * is critical that non-panic CPUs during panic are unable to
- * acquire ownership in order to satisfy the assumptions of
- * nbcon_waiter_matches(). In particular, the assumption that
- * lower priorities are ignored during panic.
+ * Panic does not imply that the console is owned. However,
+ * since all non-panic CPUs are stopped during panic(), it
+ * is safer to have them avoid gaining console ownership.
+ *
+ * If this acquire is a reacquire (and an unsafe takeover
+ * has not previously occurred) then it is allowed to attempt
+ * a direct acquire in panic. This gives console drivers an
+ * opportunity to perform any necessary cleanup if they were
+ * interrupted by the panic CPU while printing.
*/
- if (other_cpu_in_panic())
+ if (other_cpu_in_panic() &&
+ (!is_reacquire || cur->unsafe_takeover)) {
return -EPERM;
+ }
if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio)
return -EPERM;
@@ -301,8 +308,9 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
* Event #1 implies this context is EMERGENCY.
* Event #2 implies the new context is PANIC.
* Event #3 occurs when panic() has flushed the console.
- * Events #4 and #5 are not possible due to the other_cpu_in_panic()
- * check in nbcon_context_try_acquire_direct().
+ * Event #4 occurs when a non-panic CPU reacquires.
+ * Event #5 is not possible due to the other_cpu_in_panic() check
+ * in nbcon_context_try_acquire_handover().
*/
return (cur->req_prio == expected_prio);
@@ -431,6 +439,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
WARN_ON_ONCE(!cur->unsafe);
+ /*
+ * Panic does not imply that the console is owned. However, it
+ * is critical that non-panic CPUs during panic are unable to
+ * wait for a handover in order to satisfy the assumptions of
+ * nbcon_waiter_matches(). In particular, the assumption that
+ * lower priorities are ignored during panic.
+ */
+ if (other_cpu_in_panic())
+ return -EPERM;
+
/* Handover is not possible on the same CPU. */
if (cur->cpu == cpu)
return -EBUSY;
@@ -558,7 +576,8 @@ static struct printk_buffers panic_nbcon_pbufs;
/**
* nbcon_context_try_acquire - Try to acquire nbcon console
- * @ctxt: The context of the caller
+ * @ctxt: The context of the caller
+ * @is_reacquire: This acquire is a reacquire
*
* Context: Under @ctxt->con->device_lock() or local_irq_save().
* Return: True if the console was acquired. False otherwise.
@@ -568,7 +587,7 @@ static struct printk_buffers panic_nbcon_pbufs;
* in an unsafe state. Otherwise, on success the caller may assume
* the console is not in an unsafe state.
*/
-static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
+static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire)
{
unsigned int cpu = smp_processor_id();
struct console *con = ctxt->console;
@@ -577,7 +596,7 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
nbcon_state_read(con, &cur);
try_again:
- err = nbcon_context_try_acquire_direct(ctxt, &cur);
+ err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire);
if (err != -EBUSY)
goto out;
@@ -913,7 +932,7 @@ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
- while (!nbcon_context_try_acquire(ctxt))
+ while (!nbcon_context_try_acquire(ctxt, true))
cpu_relax();
nbcon_write_context_set_buf(wctxt, NULL, 0);
@@ -1101,7 +1120,7 @@ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
cant_migrate();
}
- if (!nbcon_context_try_acquire(ctxt))
+ if (!nbcon_context_try_acquire(ctxt, false))
goto out;
/*
@@ -1486,7 +1505,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
ctxt->prio = nbcon_get_default_prio();
ctxt->allow_unsafe_takeover = allow_unsafe_takeover;
- if (!nbcon_context_try_acquire(ctxt))
+ if (!nbcon_context_try_acquire(ctxt, false))
return -EPERM;
while (nbcon_seq_read(con) < stop_seq) {
@@ -1671,6 +1690,9 @@ bool nbcon_alloc(struct console *con)
{
struct nbcon_state state = { };
+ /* Synchronize the kthread start. */
+ lockdep_assert_console_list_lock_held();
+
/* The write_thread() callback is mandatory. */
if (WARN_ON(!con->write_thread))
return false;
@@ -1701,12 +1723,15 @@ bool nbcon_alloc(struct console *con)
return false;
}
- if (printk_kthreads_running) {
+ if (printk_kthreads_ready && !have_boot_console) {
if (!nbcon_kthread_create(con)) {
kfree(con->pbufs);
con->pbufs = NULL;
return false;
}
+
+ /* Might be the first kthread. */
+ printk_kthreads_running = true;
}
}
@@ -1716,14 +1741,30 @@ bool nbcon_alloc(struct console *con)
/**
* nbcon_free - Free and cleanup the nbcon console specific data
* @con: Console to free/cleanup nbcon data
+ *
+ * Important: @have_nbcon_console must be updated before calling
+ * this function. In particular, it can be set only when there
+ * is still another nbcon console registered.
*/
void nbcon_free(struct console *con)
{
struct nbcon_state state = { };
- if (printk_kthreads_running)
+ /* Synchronize the kthread stop. */
+ lockdep_assert_console_list_lock_held();
+
+ if (printk_kthreads_running) {
nbcon_kthread_stop(con);
+ /* Might be the last nbcon console.
+ *
+ * Do not rely on printk_kthreads_check_locked(). It is not
+ * called in some code paths, see nbcon_free() callers.
+ */
+ if (!have_nbcon_console)
+ printk_kthreads_running = false;
+ }
+
nbcon_state_set(con, &state);
/* Boot consoles share global printk buffers. */
@@ -1762,7 +1803,7 @@ bool nbcon_device_try_acquire(struct console *con)
ctxt->console = con;
ctxt->prio = NBCON_PRIO_NORMAL;
- if (!nbcon_context_try_acquire(ctxt))
+ if (!nbcon_context_try_acquire(ctxt, false))
return false;
if (!nbcon_context_enter_unsafe(ctxt))
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1eea80d0648e..0efbcdda9aab 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3574,7 +3574,7 @@ EXPORT_SYMBOL(console_resume);
static int unregister_console_locked(struct console *console);
/* True when system boot is far enough to create printer threads. */
-static bool printk_kthreads_ready __ro_after_init;
+bool printk_kthreads_ready __ro_after_init;
static struct task_struct *printk_legacy_kthread;
@@ -3713,6 +3713,7 @@ static void printk_kthreads_check_locked(void)
if (!printk_kthreads_ready)
return;
+ /* Start or stop the legacy kthread when needed. */
if (have_legacy_console || have_boot_console) {
if (!printk_legacy_kthread &&
force_legacy_kthread() &&
@@ -4204,14 +4205,6 @@ static int unregister_console_locked(struct console *console)
*/
synchronize_srcu(&console_srcu);
- if (console->flags & CON_NBCON)
- nbcon_free(console);
-
- console_sysfs_notify();
-
- if (console->exit)
- res = console->exit(console);
-
/*
* With this console gone, the global flags tracking registered
* console types may have changed. Update them.
@@ -4232,6 +4225,15 @@ static int unregister_console_locked(struct console *console)
if (!found_nbcon_con)
have_nbcon_console = found_nbcon_con;
+ /* @have_nbcon_console must be updated before calling nbcon_free(). */
+ if (console->flags & CON_NBCON)
+ nbcon_free(console);
+
+ console_sysfs_notify();
+
+ if (console->exit)
+ res = console->exit(console);
+
/* Changed console list, may require printer threads to start/stop. */
printk_kthreads_check_locked();
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 2024c1d36402..59fdb7ebbf22 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -176,7 +176,7 @@ struct psi_group psi_system = {
.pcpu = &system_group_pcpu,
};
-static DEFINE_PER_CPU(seqcount_t, psi_seq);
+static DEFINE_PER_CPU(seqcount_t, psi_seq) = SEQCNT_ZERO(psi_seq);
static inline void psi_write_begin(int cpu)
{
@@ -204,11 +204,7 @@ static void poll_timer_fn(struct timer_list *t);
static void group_init(struct psi_group *group)
{
- int cpu;
-
group->enabled = true;
- for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&psi_seq, cpu));
group->avg_last_update = sched_clock();
group->avg_next_update = group->avg_last_update + psi_period;
mutex_init(&group->avgs_lock);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3d85800757aa..eb0cb11d0d12 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -60,6 +60,20 @@
bool no_hash_pointers __ro_after_init;
EXPORT_SYMBOL_GPL(no_hash_pointers);
+/*
+ * Hashed pointers policy selected by "hash_pointers=..." boot param
+ *
+ * `auto` - Hashed pointers enabled unless disabled by slub_debug_enabled=true
+ * `always` - Hashed pointers enabled unconditionally
+ * `never` - Hashed pointers disabled unconditionally
+ */
+enum hash_pointers_policy {
+ HASH_PTR_AUTO = 0,
+ HASH_PTR_ALWAYS,
+ HASH_PTR_NEVER
+};
+static enum hash_pointers_policy hash_pointers_mode __initdata;
+
noinline
static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars)
{
@@ -1699,10 +1713,9 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
return buf;
}
-#pragma GCC diagnostic push
-#ifndef __clang__
-#pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
-#endif
+__diag_push();
+__diag_ignore(GCC, all, "-Wsuggest-attribute=format",
+ "Not a valid __printf() conversion candidate.");
static char *va_format(char *buf, char *end, struct va_format *va_fmt,
struct printf_spec spec)
{
@@ -1717,7 +1730,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt,
return buf;
}
-#pragma GCC diagnostic pop
+__diag_pop();
static noinline_for_stack
char *uuid_string(char *buf, char *end, const u8 *addr,
@@ -2289,12 +2302,23 @@ char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr,
return resource_string(buf, end, ptr, spec, fmt);
}
-int __init no_hash_pointers_enable(char *str)
+void __init hash_pointers_finalize(bool slub_debug)
{
- if (no_hash_pointers)
- return 0;
+ switch (hash_pointers_mode) {
+ case HASH_PTR_ALWAYS:
+ no_hash_pointers = false;
+ break;
+ case HASH_PTR_NEVER:
+ no_hash_pointers = true;
+ break;
+ case HASH_PTR_AUTO:
+ default:
+ no_hash_pointers = slub_debug;
+ break;
+ }
- no_hash_pointers = true;
+ if (!no_hash_pointers)
+ return;
pr_warn("**********************************************************\n");
pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
@@ -2307,11 +2331,39 @@ int __init no_hash_pointers_enable(char *str)
pr_warn("** the kernel, report this immediately to your system **\n");
pr_warn("** administrator! **\n");
pr_warn("** **\n");
+ pr_warn("** Use hash_pointers=always to force this mode off **\n");
+ pr_warn("** **\n");
pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_warn("**********************************************************\n");
+}
+
+static int __init hash_pointers_mode_parse(char *str)
+{
+ if (!str) {
+ pr_warn("Hash pointers mode empty; falling back to auto.\n");
+ hash_pointers_mode = HASH_PTR_AUTO;
+ } else if (strncmp(str, "auto", 4) == 0) {
+ pr_info("Hash pointers mode set to auto.\n");
+ hash_pointers_mode = HASH_PTR_AUTO;
+ } else if (strncmp(str, "never", 5) == 0) {
+ pr_info("Hash pointers mode set to never.\n");
+ hash_pointers_mode = HASH_PTR_NEVER;
+ } else if (strncmp(str, "always", 6) == 0) {
+ pr_info("Hash pointers mode set to always.\n");
+ hash_pointers_mode = HASH_PTR_ALWAYS;
+ } else {
+ pr_warn("Unknown hash_pointers mode '%s' specified; assuming auto.\n", str);
+ hash_pointers_mode = HASH_PTR_AUTO;
+ }
return 0;
}
+early_param("hash_pointers", hash_pointers_mode_parse);
+
+static int __init no_hash_pointers_enable(char *str)
+{
+ return hash_pointers_mode_parse("never");
+}
early_param("no_hash_pointers", no_hash_pointers_enable);
/*
diff --git a/mm/slub.c b/mm/slub.c
index cf7c6032d5fd..30003763d224 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6312,9 +6312,8 @@ void __init kmem_cache_init(void)
if (debug_guardpage_minorder())
slub_max_order = 0;
- /* Print slub debugging pointers without hashing */
- if (__slub_debug_enabled())
- no_hash_pointers_enable(NULL);
+ /* Inform pointer hashing choice about slub debugging state. */
+ hash_pointers_finalize(__slub_debug_enabled());
kmem_cache_node = &boot_kmem_cache_node;
kmem_cache = &boot_kmem_cache;
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index b9c5879dd599..12fb419714c0 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \
path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
resource.o secid.o file.o policy_ns.o label.o mount.o net.o \
- policy_compat.o
+ policy_compat.o af_unix.o
apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
obj-$(CONFIG_SECURITY_APPARMOR_KUNIT_TEST) += apparmor_policy_unpack_test.o
@@ -28,7 +28,7 @@ clean-files := capability_names.h rlim_names.h net_names.h
# to
# #define AA_SFS_AF_MASK "local inet"
quiet_cmd_make-af = GEN $@
-cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
+cmd_make-af = echo "static const char *const address_family_names[] = {" > $@ ;\
sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
echo "};" >> $@ ;\
@@ -43,7 +43,7 @@ cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
# to
# [1] = "stream",
quiet_cmd_make-sock = GEN $@
-cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\
+cmd_make-sock = echo "static const char *const sock_type_names[] = {" >> $@ ;\
sed $^ >>$@ -r -n \
-e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
echo "};" >> $@
diff --git a/security/apparmor/af_unix.c b/security/apparmor/af_unix.c
new file mode 100644
index 000000000000..9129766d1e9c
--- /dev/null
+++ b/security/apparmor/af_unix.c
@@ -0,0 +1,799 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor af_unix fine grained mediation
+ *
+ * Copyright 2023 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <net/tcp_states.h>
+
+#include "include/audit.h"
+#include "include/af_unix.h"
+#include "include/apparmor.h"
+#include "include/file.h"
+#include "include/label.h"
+#include "include/path.h"
+#include "include/policy.h"
+#include "include/cred.h"
+
+
+static inline struct sock *aa_unix_sk(struct unix_sock *u)
+{
+ return &u->sk;
+}
+
+static int unix_fs_perm(const char *op, u32 mask, const struct cred *subj_cred,
+ struct aa_label *label, struct path *path)
+{
+ AA_BUG(!label);
+ AA_BUG(!path);
+
+ if (unconfined(label) || !label_mediates(label, AA_CLASS_FILE))
+ return 0;
+
+ mask &= NET_FS_PERMS;
+ /* if !u->path.dentry socket is being shutdown - implicit delegation
+ * until obj delegation is supported
+ */
+ if (path->dentry) {
+ /* the sunpath may not be valid for this ns so use the path */
+ struct inode *inode = path->dentry->d_inode;
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(path->mnt), inode);
+ struct path_cond cond = {
+ .uid = vfsuid_into_kuid(vfsuid),
+ .mode = inode->i_mode,
+ };
+
+ return aa_path_perm(op, subj_cred, label, path,
+ PATH_SOCK_COND, mask, &cond);
+ } /* else implicitly delegated */
+
+ return 0;
+}
+
+/* match_addr special constants */
+#define ABSTRACT_ADDR "\x00" /* abstract socket addr */
+#define ANONYMOUS_ADDR "\x01" /* anonymous endpoint, no addr */
+#define DISCONNECTED_ADDR "\x02" /* addr is another namespace */
+#define SHUTDOWN_ADDR "\x03" /* path addr is shutdown and cleared */
+#define FS_ADDR "/" /* path addr in fs */
+
+static aa_state_t match_addr(struct aa_dfa *dfa, aa_state_t state,
+ struct sockaddr_un *addr, int addrlen)
+{
+ if (addr)
+ /* include leading \0 */
+ state = aa_dfa_match_len(dfa, state, addr->sun_path,
+ unix_addr_len(addrlen));
+ else
+ state = aa_dfa_match_len(dfa, state, ANONYMOUS_ADDR, 1);
+ /* todo: could change to out of band for cleaner separation */
+ state = aa_dfa_null_transition(dfa, state);
+
+ return state;
+}
+
+static aa_state_t match_to_local(struct aa_policydb *policy,
+ aa_state_t state, u32 request,
+ int type, int protocol,
+ struct sockaddr_un *addr, int addrlen,
+ struct aa_perms **p,
+ const char **info)
+{
+ state = aa_match_to_prot(policy, state, request, PF_UNIX, type,
+ protocol, NULL, info);
+ if (state) {
+ state = match_addr(policy->dfa, state, addr, addrlen);
+ if (state) {
+ /* todo: local label matching */
+ state = aa_dfa_null_transition(policy->dfa, state);
+ if (!state)
+ *info = "failed local label match";
+ } else {
+ *info = "failed local address match";
+ }
+ }
+
+ return state;
+}
+
+struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen)
+{
+ struct unix_address *addr;
+
+ /* memory barrier is sufficient see note in net/unix/af_unix.c */
+ addr = smp_load_acquire(&u->addr);
+ if (addr) {
+ *addrlen = addr->len;
+ return addr->name;
+ }
+ *addrlen = 0;
+ return NULL;
+}
+
+static aa_state_t match_to_sk(struct aa_policydb *policy,
+ aa_state_t state, u32 request,
+ struct unix_sock *u, struct aa_perms **p,
+ const char **info)
+{
+ int addrlen;
+ struct sockaddr_un *addr = aa_sunaddr(u, &addrlen);
+
+ return match_to_local(policy, state, request, u->sk.sk_type,
+ u->sk.sk_protocol, addr, addrlen, p, info);
+}
+
+#define CMD_ADDR 1
+#define CMD_LISTEN 2
+#define CMD_OPT 4
+
+static aa_state_t match_to_cmd(struct aa_policydb *policy, aa_state_t state,
+ u32 request, struct unix_sock *u,
+ char cmd, struct aa_perms **p,
+ const char **info)
+{
+ AA_BUG(!p);
+
+ state = match_to_sk(policy, state, request, u, p, info);
+ if (state && !*p) {
+ state = aa_dfa_match_len(policy->dfa, state, &cmd, 1);
+ if (!state)
+ *info = "failed cmd selection match";
+ }
+
+ return state;
+}
+
+static aa_state_t match_to_peer(struct aa_policydb *policy, aa_state_t state,
+ u32 request, struct unix_sock *u,
+ struct sockaddr_un *peer_addr, int peer_addrlen,
+ struct aa_perms **p, const char **info)
+{
+ AA_BUG(!p);
+
+ state = match_to_cmd(policy, state, request, u, CMD_ADDR, p, info);
+ if (state && !*p) {
+ state = match_addr(policy->dfa, state, peer_addr, peer_addrlen);
+ if (!state)
+ *info = "failed peer address match";
+ }
+
+ return state;
+}
+
+static aa_state_t match_label(struct aa_profile *profile,
+ struct aa_ruleset *rule, aa_state_t state,
+ u32 request, struct aa_profile *peer,
+ struct aa_perms *p,
+ struct apparmor_audit_data *ad)
+{
+ AA_BUG(!profile);
+ AA_BUG(!peer);
+
+ ad->peer = &peer->label;
+
+ if (state && !p) {
+ state = aa_dfa_match(rule->policy->dfa, state,
+ peer->base.hname);
+ if (!state)
+ ad->info = "failed peer label match";
+
+ }
+
+ return aa_do_perms(profile, rule->policy, state, request, p, ad);
+}
+
+
+/* unix sock creation comes before we know if the socket will be an fs
+ * socket
+ * v6 - semantics are handled by mapping in profile load
+ * v7 - semantics require sock create for tasks creating an fs socket.
+ * v8 - same as v7
+ */
+static int profile_create_perm(struct aa_profile *profile, int family,
+ int type, int protocol,
+ struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ state = aa_match_to_prot(rules->policy, state, AA_MAY_CREATE,
+ PF_UNIX, type, protocol, NULL,
+ &ad->info);
+
+ return aa_do_perms(profile, rules->policy, state, AA_MAY_CREATE,
+ NULL, ad);
+ }
+
+ return aa_profile_af_perm(profile, ad, AA_MAY_CREATE, family, type,
+ protocol);
+}
+
+static int profile_sk_perm(struct aa_profile *profile,
+ struct apparmor_audit_data *ad,
+ u32 request, struct sock *sk, struct path *path)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(!sk);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ if (is_unix_fs(sk))
+ return unix_fs_perm(ad->op, request, ad->subj_cred,
+ &profile->label,
+ &unix_sk(sk)->path);
+
+ state = match_to_sk(rules->policy, state, request, unix_sk(sk),
+ &p, &ad->info);
+
+ return aa_do_perms(profile, rules->policy, state, request, p,
+ ad);
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, request, sk);
+}
+
+static int profile_bind_perm(struct aa_profile *profile, struct sock *sk,
+ struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(!sk);
+ AA_BUG(!ad);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ if (is_unix_addr_fs(ad->net.addr, ad->net.addrlen))
+ /* under v7-9 fs hook handles bind */
+ return 0;
+ /* bind for abstract socket */
+ state = match_to_local(rules->policy, state, AA_MAY_BIND,
+ sk->sk_type, sk->sk_protocol,
+ unix_addr(ad->net.addr),
+ ad->net.addrlen,
+ &p, &ad->info);
+
+ return aa_do_perms(profile, rules->policy, state, AA_MAY_BIND,
+ p, ad);
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, AA_MAY_BIND, sk);
+}
+
+static int profile_listen_perm(struct aa_profile *profile, struct sock *sk,
+ int backlog, struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(!sk);
+ AA_BUG(!ad);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ __be16 b = cpu_to_be16(backlog);
+
+ if (is_unix_fs(sk))
+ return unix_fs_perm(ad->op, AA_MAY_LISTEN,
+ ad->subj_cred, &profile->label,
+ &unix_sk(sk)->path);
+
+ state = match_to_cmd(rules->policy, state, AA_MAY_LISTEN,
+ unix_sk(sk), CMD_LISTEN, &p, &ad->info);
+ if (state && !p) {
+ state = aa_dfa_match_len(rules->policy->dfa, state,
+ (char *) &b, 2);
+ if (!state)
+ ad->info = "failed listen backlog match";
+ }
+ return aa_do_perms(profile, rules->policy, state, AA_MAY_LISTEN,
+ p, ad);
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, AA_MAY_LISTEN, sk);
+}
+
+static int profile_accept_perm(struct aa_profile *profile,
+ struct sock *sk,
+ struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(!sk);
+ AA_BUG(!ad);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ if (is_unix_fs(sk))
+ return unix_fs_perm(ad->op, AA_MAY_ACCEPT,
+ ad->subj_cred, &profile->label,
+ &unix_sk(sk)->path);
+
+ state = match_to_sk(rules->policy, state, AA_MAY_ACCEPT,
+ unix_sk(sk), &p, &ad->info);
+
+ return aa_do_perms(profile, rules->policy, state, AA_MAY_ACCEPT,
+ p, ad);
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, AA_MAY_ACCEPT, sk);
+}
+
+static int profile_opt_perm(struct aa_profile *profile, u32 request,
+ struct sock *sk, int optname,
+ struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(!sk);
+ AA_BUG(!ad);
+ AA_BUG(profile_unconfined(profile));
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ __be16 b = cpu_to_be16(optname);
+ if (is_unix_fs(sk))
+ return unix_fs_perm(ad->op, request,
+ ad->subj_cred, &profile->label,
+ &unix_sk(sk)->path);
+
+ state = match_to_cmd(rules->policy, state, request, unix_sk(sk),
+ CMD_OPT, &p, &ad->info);
+ if (state && !p) {
+ state = aa_dfa_match_len(rules->policy->dfa, state,
+ (char *) &b, 2);
+ if (!state)
+ ad->info = "failed sockopt match";
+ }
+ return aa_do_perms(profile, rules->policy, state, request, p,
+ ad);
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, request, sk);
+}
+
+/* null peer_label is allowed, in which case the peer_sk label is used */
+static int profile_peer_perm(struct aa_profile *profile, u32 request,
+ struct sock *sk, struct path *path,
+ struct sockaddr_un *peer_addr,
+ int peer_addrlen, struct path *peer_path,
+ struct aa_label *peer_label,
+ struct apparmor_audit_data *ad)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
+ aa_state_t state;
+
+ AA_BUG(!profile);
+ AA_BUG(profile_unconfined(profile));
+ AA_BUG(!sk);
+ AA_BUG(!peer_label);
+ AA_BUG(!ad);
+
+ state = RULE_MEDIATES_v9NET(rules);
+ if (state) {
+ struct aa_profile *peerp;
+
+ if (peer_path)
+ return unix_fs_perm(ad->op, request, ad->subj_cred,
+ &profile->label, peer_path);
+ else if (path)
+ return unix_fs_perm(ad->op, request, ad->subj_cred,
+ &profile->label, path);
+ state = match_to_peer(rules->policy, state, request,
+ unix_sk(sk),
+ peer_addr, peer_addrlen, &p, &ad->info);
+
+ return fn_for_each_in_ns(peer_label, peerp,
+ match_label(profile, rules, state, request,
+ peerp, p, ad));
+ }
+
+ return aa_profile_af_sk_perm(profile, ad, request, sk);
+}
+
+/* -------------------------------- */
+
+int aa_unix_create_perm(struct aa_label *label, int family, int type,
+ int protocol)
+{
+ if (!unconfined(label)) {
+ struct aa_profile *profile;
+ DEFINE_AUDIT_NET(ad, OP_CREATE, current_cred(), NULL, family,
+ type, protocol);
+
+ return fn_for_each_confined(label, profile,
+ profile_create_perm(profile, family, type,
+ protocol, &ad));
+ }
+
+ return 0;
+}
+
+static int aa_unix_label_sk_perm(const struct cred *subj_cred,
+ struct aa_label *label,
+ const char *op, u32 request, struct sock *sk,
+ struct path *path)
+{
+ if (!unconfined(label)) {
+ struct aa_profile *profile;
+ DEFINE_AUDIT_SK(ad, op, subj_cred, sk);
+
+ return fn_for_each_confined(label, profile,
+ profile_sk_perm(profile, &ad, request, sk,
+ path));
+ }
+ return 0;
+}
+
+/* revalidation, get/set attr, shutdown */
+int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock)
+{
+ struct aa_label *label;
+ int error;
+
+ label = begin_current_label_crit_section();
+ error = aa_unix_label_sk_perm(current_cred(), label, op,
+ request, sock->sk,
+ is_unix_fs(sock->sk) ? &unix_sk(sock->sk)->path : NULL);
+ end_current_label_crit_section(label);
+
+ return error;
+}
+
+static int valid_addr(struct sockaddr *addr, int addr_len)
+{
+ struct sockaddr_un *sunaddr = unix_addr(addr);
+
+ /* addr_len == offsetof(struct sockaddr_un, sun_path) is autobind */
+ if (addr_len < offsetof(struct sockaddr_un, sun_path) ||
+ addr_len > sizeof(*sunaddr))
+ return -EINVAL;
+ return 0;
+}
+
+int aa_unix_bind_perm(struct socket *sock, struct sockaddr *addr,
+ int addrlen)
+{
+ struct aa_profile *profile;
+ struct aa_label *label;
+ int error = 0;
+
+ error = valid_addr(addr, addrlen);
+ if (error)
+ return error;
+
+ label = begin_current_label_crit_section();
+ /* fs bind is handled by mknod */
+ if (!unconfined(label)) {
+ DEFINE_AUDIT_SK(ad, OP_BIND, current_cred(), sock->sk);
+
+ ad.net.addr = unix_addr(addr);
+ ad.net.addrlen = addrlen;
+
+ error = fn_for_each_confined(label, profile,
+ profile_bind_perm(profile, sock->sk, &ad));
+ }
+ end_current_label_crit_section(label);
+
+ return error;
+}
+
+/*
+ * unix connections are covered by the
+ * - unix_stream_connect (stream) and unix_may_send hooks (dgram)
+ * - fs connect is handled by open
+ * This is just here to document this is not needed for af_unix
+ *
+int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ return 0;
+}
+*/
+
+int aa_unix_listen_perm(struct socket *sock, int backlog)
+{
+ struct aa_profile *profile;
+ struct aa_label *label;
+ int error = 0;
+
+ label = begin_current_label_crit_section();
+ if (!unconfined(label)) {
+ DEFINE_AUDIT_SK(ad, OP_LISTEN, current_cred(), sock->sk);
+
+ error = fn_for_each_confined(label, profile,
+ profile_listen_perm(profile, sock->sk,
+ backlog, &ad));
+ }
+ end_current_label_crit_section(label);
+
+ return error;
+}
+
+
+/* ability of sock to connect, not peer address binding */
+int aa_unix_accept_perm(struct socket *sock, struct socket *newsock)
+{
+ struct aa_profile *profile;
+ struct aa_label *label;
+ int error = 0;
+
+ label = begin_current_label_crit_section();
+ if (!unconfined(label)) {
+ DEFINE_AUDIT_SK(ad, OP_ACCEPT, current_cred(), sock->sk);
+
+ error = fn_for_each_confined(label, profile,
+ profile_accept_perm(profile, sock->sk, &ad));
+ }
+ end_current_label_crit_section(label);
+
+ return error;
+}
+
+
+/*
+ * dgram handled by unix_may_sendmsg, right to send on stream done at connect
+ * could do per msg unix_stream here, but connect + socket transfer is
+ * sufficient. This is just here to document this is not needed for af_unix
+ *
+ * sendmsg, recvmsg
+int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock,
+ struct msghdr *msg, int size)
+{
+ return 0;
+}
+*/
+
+int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock,
+ int level, int optname)
+{
+ struct aa_profile *profile;
+ struct aa_label *label;
+ int error = 0;
+
+ label = begin_current_label_crit_section();
+ if (!unconfined(label)) {
+ DEFINE_AUDIT_SK(ad, op, current_cred(), sock->sk);
+
+ error = fn_for_each_confined(label, profile,
+ profile_opt_perm(profile, request, sock->sk,
+ optname, &ad));
+ }
+ end_current_label_crit_section(label);
+
+ return error;
+}
+
+static int unix_peer_perm(const struct cred *subj_cred,
+ struct aa_label *label, const char *op, u32 request,
+ struct sock *sk, struct path *path,
+ struct sockaddr_un *peer_addr, int peer_addrlen,
+ struct path *peer_path, struct aa_label *peer_label)
+{
+ struct aa_profile *profile;
+ DEFINE_AUDIT_SK(ad, op, subj_cred, sk);
+
+ ad.net.peer.addr = peer_addr;
+ ad.net.peer.addrlen = peer_addrlen;
+
+ return fn_for_each_confined(label, profile,
+ profile_peer_perm(profile, request, sk, path,
+ peer_addr, peer_addrlen, peer_path,
+ peer_label, &ad));
+}
+
+/**
+ *
+ * Requires: lock held on both @sk and @peer_sk
+ * called by unix_stream_connect, unix_may_send
+ */
+int aa_unix_peer_perm(const struct cred *subj_cred,
+ struct aa_label *label, const char *op, u32 request,
+ struct sock *sk, struct sock *peer_sk,
+ struct aa_label *peer_label)
+{
+ struct unix_sock *peeru = unix_sk(peer_sk);
+ struct unix_sock *u = unix_sk(sk);
+ int plen;
+ struct sockaddr_un *paddr = aa_sunaddr(unix_sk(peer_sk), &plen);
+
+ AA_BUG(!label);
+ AA_BUG(!sk);
+ AA_BUG(!peer_sk);
+ AA_BUG(!peer_label);
+
+ return unix_peer_perm(subj_cred, label, op, request, sk,
+ is_unix_fs(sk) ? &u->path : NULL,
+ paddr, plen,
+ is_unix_fs(peer_sk) ? &peeru->path : NULL,
+ peer_label);
+}
+
+/* sk_plabel for comparison only */
+static void update_sk_ctx(struct sock *sk, struct aa_label *label,
+ struct aa_label *plabel)
+{
+ struct aa_label *l, *old;
+ struct aa_sk_ctx *ctx = aa_sock(sk);
+ bool update_sk;
+
+ rcu_read_lock();
+ update_sk = (plabel &&
+ (plabel != rcu_access_pointer(ctx->peer_lastupdate) ||
+ !aa_label_is_subset(plabel, rcu_dereference(ctx->peer)))) ||
+ !__aa_subj_label_is_cached(label, rcu_dereference(ctx->label));
+ rcu_read_unlock();
+ if (!update_sk)
+ return;
+
+ spin_lock(&unix_sk(sk)->lock);
+ old = rcu_dereference_protected(ctx->label,
+ lockdep_is_held(&unix_sk(sk)->lock));
+ l = aa_label_merge(old, label, GFP_ATOMIC);
+ if (l) {
+ if (l != old) {
+ rcu_assign_pointer(ctx->label, l);
+ aa_put_label(old);
+ } else
+ aa_put_label(l);
+ }
+ if (plabel && rcu_access_pointer(ctx->peer_lastupdate) != plabel) {
+ old = rcu_dereference_protected(ctx->peer, lockdep_is_held(&unix_sk(sk)->lock));
+
+ if (old == plabel) {
+ rcu_assign_pointer(ctx->peer_lastupdate, plabel);
+ } else if (aa_label_is_subset(plabel, old)) {
+ rcu_assign_pointer(ctx->peer_lastupdate, plabel);
+ rcu_assign_pointer(ctx->peer, aa_get_label(plabel));
+ aa_put_label(old);
+ } /* else race or a subset - don't update */
+ }
+ spin_unlock(&unix_sk(sk)->lock);
+}
+
+static void update_peer_ctx(struct sock *sk, struct aa_sk_ctx *ctx,
+ struct aa_label *label)
+{
+ struct aa_label *l, *old;
+
+ spin_lock(&unix_sk(sk)->lock);
+ old = rcu_dereference_protected(ctx->peer,
+ lockdep_is_held(&unix_sk(sk)->lock));
+ l = aa_label_merge(old, label, GFP_ATOMIC);
+ if (l) {
+ if (l != old) {
+ rcu_assign_pointer(ctx->peer, l);
+ aa_put_label(old);
+ } else
+ aa_put_label(l);
+ }
+ spin_unlock(&unix_sk(sk)->lock);
+}
+
+/* This fn is only checked if something has changed in the security
+ * boundaries. Otherwise cached info off file is sufficient
+ */
+int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label,
+ const char *op, u32 request, struct file *file)
+{
+ struct socket *sock = (struct socket *) file->private_data;
+ struct sockaddr_un *addr, *peer_addr;
+ int addrlen, peer_addrlen;
+ struct aa_label *plabel = NULL;
+ struct sock *peer_sk = NULL;
+ u32 sk_req = request & ~NET_PEER_MASK;
+ struct path path;
+ bool is_sk_fs;
+ int error = 0;
+
+ AA_BUG(!label);
+ AA_BUG(!sock);
+ AA_BUG(!sock->sk);
+ AA_BUG(sock->sk->sk_family != PF_UNIX);
+
+ /* investigate only using lock via unix_peer_get()
+ * addr only needs the memory barrier, but need to investigate
+ * path
+ */
+ unix_state_lock(sock->sk);
+ peer_sk = unix_peer(sock->sk);
+ if (peer_sk)
+ sock_hold(peer_sk);
+
+ is_sk_fs = is_unix_fs(sock->sk);
+ addr = aa_sunaddr(unix_sk(sock->sk), &addrlen);
+ path = unix_sk(sock->sk)->path;
+ unix_state_unlock(sock->sk);
+
+ if (is_sk_fs && peer_sk)
+ sk_req = request;
+ if (sk_req) {
+ error = aa_unix_label_sk_perm(subj_cred, label, op,
+ sk_req, sock->sk,
+ is_sk_fs ? &path : NULL);
+ }
+ if (!peer_sk)
+ goto out;
+
+ peer_addr = aa_sunaddr(unix_sk(peer_sk), &peer_addrlen);
+
+ struct path peer_path;
+
+ peer_path = unix_sk(peer_sk)->path;
+ if (!is_sk_fs && is_unix_fs(peer_sk)) {
+ last_error(error,
+ unix_fs_perm(op, request, subj_cred, label,
+ is_unix_fs(peer_sk) ? &peer_path : NULL));
+ } else if (!is_sk_fs) {
+ struct aa_label *plabel;
+ struct aa_sk_ctx *pctx = aa_sock(peer_sk);
+
+ rcu_read_lock();
+ plabel = aa_get_label_rcu(&pctx->label);
+ rcu_read_unlock();
+ /* no fs check of aa_unix_peer_perm because conditions above
+ * ensure they will never be done
+ */
+ last_error(error,
+ xcheck(unix_peer_perm(subj_cred, label, op,
+ MAY_READ | MAY_WRITE, sock->sk,
+ is_sk_fs ? &path : NULL,
+ peer_addr, peer_addrlen,
+ is_unix_fs(peer_sk) ?
+ &peer_path : NULL,
+ plabel),
+ unix_peer_perm(file->f_cred, plabel, op,
+ MAY_READ | MAY_WRITE, peer_sk,
+ is_unix_fs(peer_sk) ?
+ &peer_path : NULL,
+ addr, addrlen,
+ is_sk_fs ? &path : NULL,
+ label)));
+ if (!error && !__aa_subj_label_is_cached(plabel, label))
+ update_peer_ctx(peer_sk, pctx, label);
+ }
+ sock_put(peer_sk);
+
+out:
+
+ /* update peer cache to latest successful perm check */
+ if (error == 0)
+ update_sk_ctx(sock->sk, label, plabel);
+ aa_put_label(plabel);
+
+ return error;
+}
+
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 0aef34b9609b..391a586d0557 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -43,7 +43,7 @@
* The interface is split into two main components based on their function
* a securityfs component:
* used for static files that are always available, and which allows
- * userspace to specificy the location of the security filesystem.
+ * userspace to specify the location of the security filesystem.
*
* fns and data are prefixed with
* aa_sfs_
@@ -204,7 +204,7 @@ static struct file_system_type aafs_ops = {
/**
* __aafs_setup_d_inode - basic inode setup for apparmorfs
* @dir: parent directory for the dentry
- * @dentry: dentry we are seting the inode up for
+ * @dentry: dentry we are setting the inode up for
* @mode: permissions the file should have
* @data: data to store on inode.i_private, available in open()
* @link: if symlink, symlink target string
@@ -612,8 +612,7 @@ static const struct file_operations aa_fs_ns_revision_fops = {
static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
const char *match_str, size_t match_len)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_perms tmp = { };
aa_state_t state = DFA_NOMATCH;
@@ -626,11 +625,20 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
if (state) {
struct path_cond cond = { };
- tmp = *(aa_lookup_fperms(rules->file, state, &cond));
+ tmp = *(aa_lookup_condperms(current_fsuid(),
+ rules->file, state, &cond));
}
} else if (rules->policy->dfa) {
if (!RULE_MEDIATES(rules, *match_str))
return; /* no change to current perms */
+ /* old user space does not correctly detect dbus mediation
+ * support so we may get dbus policy and requests when
+ * the abi doesn't support it. This can cause mediation
+ * regressions, so explicitly test for this situation.
+ */
+ if (*match_str == AA_CLASS_DBUS &&
+ !RULE_MEDIATES_v9NET(rules))
+ return; /* no change to current perms */
state = aa_dfa_match_len(rules->policy->dfa,
rules->policy->start[0],
match_str, match_len);
@@ -997,7 +1005,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v)
switch (fs_file->v_type) {
case AA_SFS_TYPE_BOOLEAN:
- seq_printf(seq, "%s\n", fs_file->v.boolean ? "yes" : "no");
+ seq_printf(seq, "%s\n", str_yes_no(fs_file->v.boolean));
break;
case AA_SFS_TYPE_STRING:
seq_printf(seq, "%s\n", fs_file->v.string);
@@ -1006,7 +1014,7 @@ static int aa_sfs_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%#08lx\n", fs_file->v.u64);
break;
default:
- /* Ignore unpritable entry types. */
+ /* Ignore unprintable entry types. */
break;
}
@@ -1152,7 +1160,7 @@ static int seq_ns_stacked_show(struct seq_file *seq, void *v)
struct aa_label *label;
label = begin_current_label_crit_section();
- seq_printf(seq, "%s\n", label->size > 1 ? "yes" : "no");
+ seq_printf(seq, "%s\n", str_yes_no(label->size > 1));
end_current_label_crit_section(label);
return 0;
@@ -1175,7 +1183,7 @@ static int seq_ns_nsstacked_show(struct seq_file *seq, void *v)
}
}
- seq_printf(seq, "%s\n", count > 1 ? "yes" : "no");
+ seq_printf(seq, "%s\n", str_yes_no(count > 1));
end_current_label_crit_section(label);
return 0;
@@ -2244,7 +2252,7 @@ static void *p_next(struct seq_file *f, void *p, loff_t *pos)
/**
* p_stop - stop depth first traversal
* @f: seq_file we are filling
- * @p: the last profile writen
+ * @p: the last profile written
*
* Release all locking done by p_start/p_next on namespace tree
*/
@@ -2332,6 +2340,7 @@ static struct aa_sfs_entry aa_sfs_entry_attach[] = {
static struct aa_sfs_entry aa_sfs_entry_domain[] = {
AA_SFS_FILE_BOOLEAN("change_hat", 1),
AA_SFS_FILE_BOOLEAN("change_hatv", 1),
+ AA_SFS_FILE_BOOLEAN("unconfined_allowed_children", 1),
AA_SFS_FILE_BOOLEAN("change_onexec", 1),
AA_SFS_FILE_BOOLEAN("change_profile", 1),
AA_SFS_FILE_BOOLEAN("stack", 1),
@@ -2340,6 +2349,7 @@ static struct aa_sfs_entry aa_sfs_entry_domain[] = {
AA_SFS_FILE_BOOLEAN("computed_longest_left", 1),
AA_SFS_DIR("attach_conditions", aa_sfs_entry_attach),
AA_SFS_FILE_BOOLEAN("disconnected.path", 1),
+ AA_SFS_FILE_BOOLEAN("kill.signal", 1),
AA_SFS_FILE_STRING("version", "1.2"),
{ }
};
@@ -2364,7 +2374,7 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = {
AA_SFS_FILE_BOOLEAN("set_load", 1),
/* number of out of band transitions supported */
AA_SFS_FILE_U64("outofband", MAX_OOB_SUPPORTED),
- AA_SFS_FILE_U64("permstable32_version", 1),
+ AA_SFS_FILE_U64("permstable32_version", 3),
AA_SFS_FILE_STRING("permstable32", PERMS32STR),
AA_SFS_FILE_U64("state32", 1),
AA_SFS_DIR("unconfined_restrictions", aa_sfs_entry_unconfined),
@@ -2384,6 +2394,11 @@ static struct aa_sfs_entry aa_sfs_entry_ns[] = {
{ }
};
+static struct aa_sfs_entry aa_sfs_entry_dbus[] = {
+ AA_SFS_FILE_STRING("mask", "acquire send receive"),
+ { }
+};
+
static struct aa_sfs_entry aa_sfs_entry_query_label[] = {
AA_SFS_FILE_STRING("perms", "allow deny audit quiet"),
AA_SFS_FILE_BOOLEAN("data", 1),
@@ -2406,6 +2421,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
AA_SFS_DIR("domain", aa_sfs_entry_domain),
AA_SFS_DIR("file", aa_sfs_entry_file),
AA_SFS_DIR("network_v8", aa_sfs_entry_network),
+ AA_SFS_DIR("network_v9", aa_sfs_entry_networkv9),
AA_SFS_DIR("mount", aa_sfs_entry_mount),
AA_SFS_DIR("namespaces", aa_sfs_entry_ns),
AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK),
@@ -2413,6 +2429,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
AA_SFS_DIR("caps", aa_sfs_entry_caps),
AA_SFS_DIR("ptrace", aa_sfs_entry_ptrace),
AA_SFS_DIR("signal", aa_sfs_entry_signal),
+ AA_SFS_DIR("dbus", aa_sfs_entry_dbus),
AA_SFS_DIR("query", aa_sfs_entry_query),
AA_SFS_DIR("io_uring", aa_sfs_entry_io_uring),
{ }
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index 73087d76f649..ac89602aa2d9 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -192,7 +192,7 @@ int aa_audit(int type, struct aa_profile *profile,
aa_audit_msg(type, ad, cb);
if (ad->type == AUDIT_APPARMOR_KILL)
- (void)send_sig_info(SIGKILL, NULL,
+ (void)send_sig_info(profile->signal, NULL,
ad->common.type == LSM_AUDIT_DATA_TASK &&
ad->common.u.tsk ? ad->common.u.tsk : current);
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 7ca489ee1054..b9ea6bc45c1a 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -27,6 +27,7 @@
struct aa_sfs_entry aa_sfs_entry_caps[] = {
AA_SFS_FILE_STRING("mask", AA_SFS_CAPS_MASK),
+ AA_SFS_FILE_BOOLEAN("extended", 1),
{ }
};
@@ -68,8 +69,7 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile
{
const u64 AUDIT_CACHE_TIMEOUT_NS = 1000*1000*1000; /* 1 second */
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct audit_cache *ent;
int type = AUDIT_APPARMOR_AUTO;
@@ -121,10 +121,32 @@ static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile
static int profile_capable(struct aa_profile *profile, int cap,
unsigned int opts, struct apparmor_audit_data *ad)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
+ aa_state_t state;
int error;
+ state = RULE_MEDIATES(rules, ad->class);
+ if (state) {
+ struct aa_perms perms = { };
+ u32 request;
+
+ /* caps broken into 256 x 32 bit permission chunks */
+ state = aa_dfa_next(rules->policy->dfa, state, cap >> 5);
+ request = 1 << (cap & 0x1f);
+ perms = *aa_lookup_perms(rules->policy, state);
+ aa_apply_modes_to_perms(profile, &perms);
+
+ if (opts & CAP_OPT_NOAUDIT) {
+ if (perms.complain & request)
+ ad->info = "optional: no audit";
+ else
+ ad = NULL;
+ }
+ return aa_check_perms(profile, &perms, request, ad,
+ audit_cb);
+ }
+
+ /* fallback to old caps mediation that doesn't support conditionals */
if (cap_raised(rules->caps.allow, cap) &&
!cap_raised(rules->caps.denied, cap))
error = 0;
@@ -168,3 +190,34 @@ int aa_capable(const struct cred *subj_cred, struct aa_label *label,
return error;
}
+
+kernel_cap_t aa_profile_capget(struct aa_profile *profile)
+{
+ struct aa_ruleset *rules = profile->label.rules[0];
+ aa_state_t state;
+
+ state = RULE_MEDIATES(rules, AA_CLASS_CAP);
+ if (state) {
+ kernel_cap_t caps = CAP_EMPTY_SET;
+ int i;
+
+ /* caps broken into up to 256, 32 bit permission chunks */
+ for (i = 0; i < (CAP_LAST_CAP >> 5); i++) {
+ struct aa_perms perms = { };
+ aa_state_t tmp;
+
+ tmp = aa_dfa_next(rules->policy->dfa, state, i);
+ perms = *aa_lookup_perms(rules->policy, tmp);
+ aa_apply_modes_to_perms(profile, &perms);
+ caps.val |= ((u64)(perms.allow)) << (i * 5);
+ caps.val |= ((u64)(perms.complain)) << (i * 5);
+ }
+ return caps;
+ }
+
+ /* fallback to old caps */
+ if (COMPLAIN_MODE(profile))
+ return CAP_FULL_SET;
+
+ return rules->caps.allow;
+}
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 5939bd9a9b9b..267da82afb14 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -28,6 +28,12 @@
#include "include/policy.h"
#include "include/policy_ns.h"
+static const char * const CONFLICTING_ATTACH_STR = "conflicting profile attachments";
+static const char * const CONFLICTING_ATTACH_STR_IX =
+ "conflicting profile attachments - ix fallback";
+static const char * const CONFLICTING_ATTACH_STR_UX =
+ "conflicting profile attachments - ux fallback";
+
/**
* may_change_ptraced_domain - check if can change profile on ptraced task
* @to_cred: cred of task changing domain
@@ -87,8 +93,7 @@ static inline aa_state_t match_component(struct aa_profile *profile,
struct aa_profile *tp,
bool stack, aa_state_t state)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
const char *ns_name;
if (stack)
@@ -125,8 +130,7 @@ static int label_compound_match(struct aa_profile *profile,
aa_state_t state, bool subns, u32 request,
struct aa_perms *perms)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_profile *tp;
struct label_it i;
struct path_cond cond = { };
@@ -154,7 +158,8 @@ next:
if (!state)
goto fail;
}
- *perms = *(aa_lookup_fperms(rules->file, state, &cond));
+ *perms = *(aa_lookup_condperms(current_fsuid(), rules->file, state,
+ &cond));
aa_apply_modes_to_perms(profile, perms);
if ((perms->allow & request) != request)
return -EACCES;
@@ -187,8 +192,7 @@ static int label_components_match(struct aa_profile *profile,
aa_state_t start, bool subns, u32 request,
struct aa_perms *perms)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_profile *tp;
struct label_it i;
struct aa_perms tmp;
@@ -209,7 +213,8 @@ static int label_components_match(struct aa_profile *profile,
return 0;
next:
- tmp = *(aa_lookup_fperms(rules->file, state, &cond));
+ tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state,
+ &cond));
aa_apply_modes_to_perms(profile, &tmp);
aa_perms_accum(perms, &tmp);
label_for_each_cont(i, label, tp) {
@@ -218,7 +223,8 @@ next:
state = match_component(profile, tp, stack, start);
if (!state)
goto fail;
- tmp = *(aa_lookup_fperms(rules->file, state, &cond));
+ tmp = *(aa_lookup_condperms(current_fsuid(), rules->file, state,
+ &cond));
aa_apply_modes_to_perms(profile, &tmp);
aa_perms_accum(perms, &tmp);
}
@@ -323,7 +329,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
size = vfs_getxattr_alloc(&nop_mnt_idmap, d, attach->xattrs[i],
&value, value_size, GFP_KERNEL);
if (size >= 0) {
- u32 index, perm;
+ struct aa_perms *perms;
/*
* Check the xattr presence before value. This ensure
@@ -335,9 +341,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
/* Check xattr value */
state = aa_dfa_match_len(attach->xmatch->dfa, state,
value, size);
- index = ACCEPT_TABLE(attach->xmatch->dfa)[state];
- perm = attach->xmatch->perms[index].allow;
- if (!(perm & MAY_EXEC)) {
+ perms = aa_lookup_perms(attach->xmatch, state);
+ if (!(perms->allow & MAY_EXEC)) {
ret = -EINVAL;
goto out;
}
@@ -415,15 +420,14 @@ restart:
if (attach->xmatch->dfa) {
unsigned int count;
aa_state_t state;
- u32 index, perm;
+ struct aa_perms *perms;
state = aa_dfa_leftmatch(attach->xmatch->dfa,
attach->xmatch->start[AA_CLASS_XMATCH],
name, &count);
- index = ACCEPT_TABLE(attach->xmatch->dfa)[state];
- perm = attach->xmatch->perms[index].allow;
+ perms = aa_lookup_perms(attach->xmatch, state);
/* any accepting state means a valid match. */
- if (perm & MAY_EXEC) {
+ if (perms->allow & MAY_EXEC) {
int ret = 0;
if (count < candidate_len)
@@ -484,7 +488,7 @@ restart:
if (!candidate || conflict) {
if (conflict)
- *info = "conflicting profile attachments";
+ *info = CONFLICTING_ATTACH_STR;
rcu_read_unlock();
return NULL;
}
@@ -508,15 +512,16 @@ static const char *next_name(int xtype, const char *name)
* @name: returns: name tested to find label (NOT NULL)
*
* Returns: refcounted label, or NULL on failure (MAYBE NULL)
+ * @name will always be set with the last name tried
*/
struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
const char **name)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_label *label = NULL;
u32 xtype = xindex & AA_X_TYPE_MASK;
int index = xindex & AA_X_INDEX_MASK;
+ const char *next;
AA_BUG(!name);
@@ -524,25 +529,27 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
/* TODO: move lookup parsing to unpack time so this is a straight
* index into the resultant label
*/
- for (*name = rules->file->trans.table[index]; !label && *name;
- *name = next_name(xtype, *name)) {
+ for (next = rules->file->trans.table[index]; next;
+ next = next_name(xtype, next)) {
+ const char *lookup = (*next == '&') ? next + 1 : next;
+ *name = next;
if (xindex & AA_X_CHILD) {
- struct aa_profile *new_profile;
- /* release by caller */
- new_profile = aa_find_child(profile, *name);
- if (new_profile)
- label = &new_profile->label;
+ /* TODO: switich to parse to get stack of child */
+ struct aa_profile *new = aa_find_child(profile, lookup);
+
+ if (new)
+ /* release by caller */
+ return &new->label;
continue;
}
- label = aa_label_parse(&profile->label, *name, GFP_KERNEL,
+ label = aa_label_parse(&profile->label, lookup, GFP_KERNEL,
true, false);
- if (IS_ERR(label))
- label = NULL;
+ if (!IS_ERR_OR_NULL(label))
+ /* release by caller */
+ return label;
}
- /* released by caller */
-
- return label;
+ return NULL;
}
/**
@@ -564,12 +571,12 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
const char **lookupname,
const char **info)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
struct aa_label *new = NULL;
+ struct aa_label *stack = NULL;
struct aa_ns *ns = profile->ns;
u32 xtype = xindex & AA_X_TYPE_MASK;
- const char *stack = NULL;
+ /* Used for info checks during fallback handling */
+ const char *old_info = NULL;
switch (xtype) {
case AA_X_NONE:
@@ -578,13 +585,14 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
break;
case AA_X_TABLE:
/* TODO: fix when perm mapping done at unload */
- stack = rules->file->trans.table[xindex & AA_X_INDEX_MASK];
- if (*stack != '&') {
- /* released by caller */
- new = x_table_lookup(profile, xindex, lookupname);
- stack = NULL;
+ /* released by caller
+ * if null for both stack and direct want to try fallback
+ */
+ new = x_table_lookup(profile, xindex, lookupname);
+ if (!new || **lookupname != '&')
break;
- }
+ stack = new;
+ new = NULL;
fallthrough; /* to X_NAME */
case AA_X_NAME:
if (xindex & AA_X_CHILD)
@@ -599,17 +607,38 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
break;
}
+ /* fallback transition check */
if (!new) {
if (xindex & AA_X_INHERIT) {
/* (p|c|n)ix - don't change profile but do
* use the newest version
*/
- *info = "ix fallback";
+ if (*info == CONFLICTING_ATTACH_STR) {
+ *info = CONFLICTING_ATTACH_STR_IX;
+ } else {
+ old_info = *info;
+ *info = "ix fallback";
+ }
/* no profile && no error */
new = aa_get_newest_label(&profile->label);
} else if (xindex & AA_X_UNCONFINED) {
new = aa_get_newest_label(ns_unconfined(profile->ns));
- *info = "ux fallback";
+ if (*info == CONFLICTING_ATTACH_STR) {
+ *info = CONFLICTING_ATTACH_STR_UX;
+ } else {
+ old_info = *info;
+ *info = "ux fallback";
+ }
+ }
+ /* We set old_info on the code paths above where overwriting
+ * could have happened, so now check if info was set by
+ * find_attach as well (i.e. whether we actually overwrote)
+ * and warn accordingly.
+ */
+ if (old_info && old_info != CONFLICTING_ATTACH_STR) {
+ pr_warn_ratelimited(
+ "AppArmor: find_attach (from profile %s) audit info \"%s\" dropped",
+ profile->base.hname, old_info);
}
}
@@ -617,12 +646,12 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
/* base the stack on post domain transition */
struct aa_label *base = new;
- new = aa_label_parse(base, stack, GFP_KERNEL, true, false);
- if (IS_ERR(new))
- new = NULL;
+ new = aa_label_merge(base, stack, GFP_KERNEL);
+ /* null on error */
aa_put_label(base);
}
+ aa_put_label(stack);
/* released by caller */
return new;
}
@@ -633,8 +662,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred,
char *buffer, struct path_cond *cond,
bool *secure_exec)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_label *new = NULL;
struct aa_profile *new_profile = NULL;
const char *info = NULL, *name = NULL, *target = NULL;
@@ -652,7 +680,7 @@ static struct aa_label *profile_transition(const struct cred *subj_cred,
if (error) {
if (profile_unconfined(profile) ||
(profile->label.flags & FLAG_IX_ON_NAME_ERROR)) {
- AA_DEBUG("name lookup ix on error");
+ AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error");
error = 0;
new = aa_get_newest_label(&profile->label);
}
@@ -663,11 +691,27 @@ static struct aa_label *profile_transition(const struct cred *subj_cred,
if (profile_unconfined(profile)) {
new = find_attach(bprm, profile->ns,
&profile->ns->base.profiles, name, &info);
+ /* info set -> something unusual that we should report
+ * Currently this is only conflicting attachments, but other
+ * infos added in the future should also be logged by default
+ * and only excluded on a case-by-case basis
+ */
+ if (info) {
+ /* Because perms is never used again after this audit
+ * we don't need to care about clobbering it
+ */
+ perms.audit |= MAY_EXEC;
+ perms.allow |= MAY_EXEC;
+ /* Don't cause error if auditing fails */
+ (void) aa_audit_file(subj_cred, profile, &perms,
+ OP_EXEC, MAY_EXEC, name, target, new, cond->uid,
+ info, error);
+ }
if (new) {
- AA_DEBUG("unconfined attached to new label");
+ AA_DEBUG(DEBUG_DOMAIN, "unconfined attached to new label");
return new;
}
- AA_DEBUG("unconfined exec no attachment");
+ AA_DEBUG(DEBUG_DOMAIN, "unconfined exec no attachment");
return aa_get_newest_label(&profile->label);
}
@@ -678,9 +722,21 @@ static struct aa_label *profile_transition(const struct cred *subj_cred,
new = x_to_label(profile, bprm, name, perms.xindex, &target,
&info);
if (new && new->proxy == profile->label.proxy && info) {
+ /* Force audit on conflicting attachment fallback
+ * Because perms is never used again after this audit
+ * we don't need to care about clobbering it
+ */
+ if (info == CONFLICTING_ATTACH_STR_IX
+ || info == CONFLICTING_ATTACH_STR_UX)
+ perms.audit |= MAY_EXEC;
/* hack ix fallback - improve how this is detected */
goto audit;
} else if (!new) {
+ if (info) {
+ pr_warn_ratelimited(
+ "AppArmor: %s (from profile %s) audit info \"%s\" dropped on missing transition",
+ __func__, profile->base.hname, info);
+ }
info = "profile transition not found";
/* remove MAY_EXEC to audit as failure or complaint */
perms.allow &= ~MAY_EXEC;
@@ -739,8 +795,7 @@ static int profile_onexec(const struct cred *subj_cred,
char *buffer, struct path_cond *cond,
bool *secure_exec)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
aa_state_t state = rules->file->start[AA_CLASS_FILE];
struct aa_perms perms = {};
const char *xname = NULL, *info = "change_profile onexec";
@@ -755,7 +810,7 @@ static int profile_onexec(const struct cred *subj_cred,
/* change_profile on exec already granted */
/*
* NOTE: Domain transitions from unconfined are allowed
- * even when no_new_privs is set because this aways results
+ * even when no_new_privs is set because this always results
* in a further reduction of permissions.
*/
return 0;
@@ -766,7 +821,7 @@ static int profile_onexec(const struct cred *subj_cred,
if (error) {
if (profile_unconfined(profile) ||
(profile->label.flags & FLAG_IX_ON_NAME_ERROR)) {
- AA_DEBUG("name lookup ix on error");
+ AA_DEBUG(DEBUG_DOMAIN, "name lookup ix on error");
error = 0;
}
xname = bprm->filename;
@@ -926,7 +981,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm)
*
* NOTE: Domain transitions from unconfined and to stacked
* subsets are allowed even when no_new_privs is set because this
- * aways results in a further reduction of permissions.
+ * always results in a further reduction of permissions.
*/
if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) &&
!unconfined(label) &&
@@ -1188,10 +1243,24 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags)
if (task_no_new_privs(current) && !unconfined(label) && !ctx->nnp)
ctx->nnp = aa_get_label(label);
+ /* return -EPERM when unconfined doesn't have children to avoid
+ * changing the traditional error code for unconfined.
+ */
if (unconfined(label)) {
- info = "unconfined can not change_hat";
- error = -EPERM;
- goto fail;
+ struct label_it i;
+ bool empty = true;
+
+ rcu_read_lock();
+ label_for_each_in_ns(i, labels_ns(label), label, profile) {
+ empty &= list_empty(&profile->base.profiles);
+ }
+ rcu_read_unlock();
+
+ if (empty) {
+ info = "unconfined can not change_hat";
+ error = -EPERM;
+ goto fail;
+ }
}
if (count) {
@@ -1216,7 +1285,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags)
if (task_no_new_privs(current) && !unconfined(label) &&
!aa_label_is_unconfined_subset(new, ctx->nnp)) {
/* not an apparmor denial per se, so don't log it */
- AA_DEBUG("no_new_privs - change_hat denied");
+ AA_DEBUG(DEBUG_DOMAIN,
+ "no_new_privs - change_hat denied");
error = -EPERM;
goto out;
}
@@ -1237,7 +1307,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags)
if (task_no_new_privs(current) && !unconfined(label) &&
!aa_label_is_unconfined_subset(previous, ctx->nnp)) {
/* not an apparmor denial per se, so don't log it */
- AA_DEBUG("no_new_privs - change_hat denied");
+ AA_DEBUG(DEBUG_DOMAIN,
+ "no_new_privs - change_hat denied");
error = -EPERM;
goto out;
}
@@ -1282,8 +1353,7 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
struct aa_label *target, bool stack,
u32 request, struct aa_perms *perms)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
const char *info = NULL;
int error = 0;
@@ -1343,7 +1413,7 @@ int aa_change_profile(const char *fqname, int flags)
if (!fqname || !*fqname) {
aa_put_label(label);
- AA_DEBUG("no profile name");
+ AA_DEBUG(DEBUG_DOMAIN, "no profile name");
return -EINVAL;
}
@@ -1462,7 +1532,8 @@ check:
if (task_no_new_privs(current) && !unconfined(label) &&
!aa_label_is_unconfined_subset(new, ctx->nnp)) {
/* not an apparmor denial per se, so don't log it */
- AA_DEBUG("no_new_privs - change_hat denied");
+ AA_DEBUG(DEBUG_DOMAIN,
+ "no_new_privs - change_hat denied");
error = -EPERM;
goto out;
}
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index f494217112c9..c75820402878 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include "include/af_unix.h"
#include "include/apparmor.h"
#include "include/audit.h"
#include "include/cred.h"
@@ -168,8 +169,9 @@ static int path_name(const char *op, const struct cred *subj_cred,
struct aa_perms default_perms = {};
/**
- * aa_lookup_fperms - convert dfa compressed perms to internal perms
- * @file_rules: the aa_policydb to lookup perms for (NOT NULL)
+ * aa_lookup_condperms - convert dfa compressed perms to internal perms
+ * @subj_uid: uid to use for subject owner test
+ * @rules: the aa_policydb to lookup perms for (NOT NULL)
* @state: state in dfa
* @cond: conditions to consider (NOT NULL)
*
@@ -177,18 +179,21 @@ struct aa_perms default_perms = {};
*
* Returns: a pointer to a file permission set
*/
-struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
- aa_state_t state, struct path_cond *cond)
+struct aa_perms *aa_lookup_condperms(kuid_t subj_uid, struct aa_policydb *rules,
+ aa_state_t state, struct path_cond *cond)
{
- unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state];
+ unsigned int index = ACCEPT_TABLE(rules->dfa)[state];
- if (!(file_rules->perms))
+ if (!(rules->perms))
return &default_perms;
- if (uid_eq(current_fsuid(), cond->uid))
- return &(file_rules->perms[index]);
+ if ((ACCEPT_TABLE2(rules->dfa)[state] & ACCEPT_FLAG_OWNER)) {
+ if (uid_eq(subj_uid, cond->uid))
+ return &(rules->perms[index]);
+ return &(rules->perms[index + 1]);
+ }
- return &(file_rules->perms[index + 1]);
+ return &(rules->perms[index]);
}
/**
@@ -207,21 +212,22 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
{
aa_state_t state;
state = aa_dfa_match(file_rules->dfa, start, name);
- *perms = *(aa_lookup_fperms(file_rules, state, cond));
+ *perms = *(aa_lookup_condperms(current_fsuid(), file_rules, state,
+ cond));
return state;
}
-static int __aa_path_perm(const char *op, const struct cred *subj_cred,
- struct aa_profile *profile, const char *name,
- u32 request, struct path_cond *cond, int flags,
- struct aa_perms *perms)
+int __aa_path_perm(const char *op, const struct cred *subj_cred,
+ struct aa_profile *profile, const char *name,
+ u32 request, struct path_cond *cond, int flags,
+ struct aa_perms *perms)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
int e = 0;
- if (profile_unconfined(profile))
+ if (profile_unconfined(profile) ||
+ ((flags & PATH_SOCK_COND) && !RULE_MEDIATES_v9NET(rules)))
return 0;
aa_str_perms(rules->file, rules->file->start[AA_CLASS_FILE],
name, cond, perms);
@@ -316,8 +322,7 @@ static int profile_path_link(const struct cred *subj_cred,
const struct path *target, char *buffer2,
struct path_cond *cond)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
const char *lname, *tname = NULL;
struct aa_perms lperms = {}, perms;
const char *info = NULL;
@@ -423,9 +428,11 @@ int aa_path_link(const struct cred *subj_cred,
{
struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry };
struct path target = { .mnt = new_dir->mnt, .dentry = old_dentry };
+ struct inode *inode = d_backing_inode(old_dentry);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_idmap(target.mnt), inode);
struct path_cond cond = {
- d_backing_inode(old_dentry)->i_uid,
- d_backing_inode(old_dentry)->i_mode
+ .uid = vfsuid_into_kuid(vfsuid),
+ .mode = inode->i_mode,
};
char *buffer = NULL, *buffer2 = NULL;
struct aa_profile *profile;
@@ -534,22 +541,19 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred,
struct aa_label *flabel, struct file *file,
u32 request, u32 denied)
{
- struct socket *sock = (struct socket *) file->private_data;
int error;
- AA_BUG(!sock);
-
/* revalidation due to label out of date. No revocation at this time */
if (!denied && aa_label_is_subset(flabel, label))
return 0;
/* TODO: improve to skip profiles cached in flabel */
- error = aa_sock_file_perm(subj_cred, label, op, request, sock);
+ error = aa_sock_file_perm(subj_cred, label, op, request, file);
if (denied) {
/* TODO: improve to skip profiles checked above */
/* check every profile in file label to is cached */
last_error(error, aa_sock_file_perm(subj_cred, flabel, op,
- request, sock));
+ request, file));
}
if (!error)
update_file_ctx(file_ctx(file), label, request);
@@ -557,6 +561,35 @@ static int __file_sock_perm(const char *op, const struct cred *subj_cred,
return error;
}
+/* for now separate fn to indicate semantics of the check */
+static bool __file_is_delegated(struct aa_label *obj_label)
+{
+ return unconfined(obj_label);
+}
+
+static bool __unix_needs_revalidation(struct file *file, struct aa_label *label,
+ u32 request)
+{
+ struct socket *sock = (struct socket *) file->private_data;
+
+ lockdep_assert_in_rcu_read_lock();
+
+ if (!S_ISSOCK(file_inode(file)->i_mode))
+ return false;
+ if (request & NET_PEER_MASK)
+ return false;
+ if (sock->sk->sk_family == PF_UNIX) {
+ struct aa_sk_ctx *ctx = aa_sock(sock->sk);
+
+ if (rcu_access_pointer(ctx->peer) !=
+ rcu_access_pointer(ctx->peer_lastupdate))
+ return true;
+ return !__aa_subj_label_is_cached(rcu_dereference(ctx->label),
+ label);
+ }
+ return false;
+}
+
/**
* aa_file_perm - do permission revalidation check & audit for @file
* @op: operation being checked
@@ -594,15 +627,16 @@ int aa_file_perm(const char *op, const struct cred *subj_cred,
* delegation from unconfined tasks
*/
denied = request & ~fctx->allow;
- if (unconfined(label) || unconfined(flabel) ||
- (!denied && aa_label_is_subset(flabel, label))) {
+ if (unconfined(label) || __file_is_delegated(flabel) ||
+ __unix_needs_revalidation(file, label, request) ||
+ (!denied && __aa_subj_label_is_cached(label, flabel))) {
rcu_read_unlock();
goto done;
}
+ /* slow path - revalidate access */
flabel = aa_get_newest_label(flabel);
rcu_read_unlock();
- /* TODO: label cross check */
if (path_mediated_fs(file->f_path.dentry))
error = __file_path_perm(op, subj_cred, label, flabel, file,
diff --git a/security/apparmor/include/af_unix.h b/security/apparmor/include/af_unix.h
new file mode 100644
index 000000000000..4a62e600d82b
--- /dev/null
+++ b/security/apparmor/include/af_unix.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor af_unix fine grained mediation
+ *
+ * Copyright 2023 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+#ifndef __AA_AF_UNIX_H
+
+#include <net/af_unix.h>
+
+#include "label.h"
+
+#define unix_addr(A) ((struct sockaddr_un *)(A))
+#define unix_addr_len(L) ((L) - sizeof(sa_family_t))
+#define unix_peer(sk) (unix_sk(sk)->peer)
+#define is_unix_addr_abstract_name(B) ((B)[0] == 0)
+#define is_unix_addr_anon(A, L) ((A) && unix_addr_len(L) <= 0)
+#define is_unix_addr_fs(A, L) (!is_unix_addr_anon(A, L) && \
+ !is_unix_addr_abstract_name(unix_addr(A)->sun_path))
+
+#define is_unix_anonymous(U) (!unix_sk(U)->addr)
+#define is_unix_fs(U) (!is_unix_anonymous(U) && \
+ unix_sk(U)->addr->name->sun_path[0])
+#define is_unix_connected(S) ((S)->state == SS_CONNECTED)
+
+
+struct sockaddr_un *aa_sunaddr(const struct unix_sock *u, int *addrlen);
+int aa_unix_peer_perm(const struct cred *subj_cred,
+ struct aa_label *label, const char *op, u32 request,
+ struct sock *sk, struct sock *peer_sk,
+ struct aa_label *peer_label);
+int aa_unix_sock_perm(const char *op, u32 request, struct socket *sock);
+int aa_unix_create_perm(struct aa_label *label, int family, int type,
+ int protocol);
+int aa_unix_bind_perm(struct socket *sock, struct sockaddr *address,
+ int addrlen);
+int aa_unix_connect_perm(struct socket *sock, struct sockaddr *address,
+ int addrlen);
+int aa_unix_listen_perm(struct socket *sock, int backlog);
+int aa_unix_accept_perm(struct socket *sock, struct socket *newsock);
+int aa_unix_msg_perm(const char *op, u32 request, struct socket *sock,
+ struct msghdr *msg, int size);
+int aa_unix_opt_perm(const char *op, u32 request, struct socket *sock, int level,
+ int optname);
+int aa_unix_file_perm(const struct cred *subj_cred, struct aa_label *label,
+ const char *op, u32 request, struct file *file);
+
+#endif /* __AA_AF_UNIX_H */
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index f83934913b0f..cc6e3df1bc62 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -28,6 +28,7 @@
#define AA_CLASS_SIGNAL 10
#define AA_CLASS_XMATCH 11
#define AA_CLASS_NET 14
+#define AA_CLASS_NETV9 15
#define AA_CLASS_LABEL 16
#define AA_CLASS_POSIX_MQUEUE 17
#define AA_CLASS_MODULE 19
@@ -38,12 +39,13 @@
#define AA_CLASS_X 31
#define AA_CLASS_DBUS 32
+/* NOTE: if AA_CLASS_LAST > 63 need to update label->mediates */
#define AA_CLASS_LAST AA_CLASS_DBUS
/* Control parameters settable through module/boot flags */
extern enum audit_mode aa_g_audit;
extern bool aa_g_audit_header;
-extern bool aa_g_debug;
+extern int aa_g_debug;
extern bool aa_g_hash_policy;
extern bool aa_g_export_binary;
extern int aa_g_rawdata_compression_level;
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index e27229349abb..1a71a94ea19c 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -138,9 +138,12 @@ struct apparmor_audit_data {
};
struct {
int type, protocol;
- struct sock *peer_sk;
void *addr;
int addrlen;
+ struct {
+ void *addr;
+ int addrlen;
+ } peer;
} net;
};
};
diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h
index d6dcc604ec0c..1ddcec2d1160 100644
--- a/security/apparmor/include/capability.h
+++ b/security/apparmor/include/capability.h
@@ -36,6 +36,7 @@ struct aa_caps {
extern struct aa_sfs_entry aa_sfs_entry_caps[];
+kernel_cap_t aa_profile_capget(struct aa_profile *profile);
int aa_capable(const struct cred *subj_cred, struct aa_label *label,
int cap, unsigned int opts);
diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h
index 7265d2f81dd5..b028e4c13b6f 100644
--- a/security/apparmor/include/cred.h
+++ b/security/apparmor/include/cred.h
@@ -114,10 +114,22 @@ static inline struct aa_label *aa_get_current_label(void)
return aa_get_label(l);
}
-#define __end_current_label_crit_section(X) end_current_label_crit_section(X)
+/**
+ * __end_current_label_crit_section - end crit section begun with __begin_...
+ * @label: label obtained from __begin_current_label_crit_section
+ * @needput: output: bool set by __begin_current_label_crit_section
+ *
+ * Returns: label to use for this crit section
+ */
+static inline void __end_current_label_crit_section(struct aa_label *label,
+ bool needput)
+{
+ if (unlikely(needput))
+ aa_put_label(label);
+}
/**
- * end_label_crit_section - put a reference found with begin_current_label..
+ * end_current_label_crit_section - put a reference found with begin_current_label..
* @label: label reference to put
*
* Should only be used with a reference obtained with
@@ -132,6 +144,7 @@ static inline void end_current_label_crit_section(struct aa_label *label)
/**
* __begin_current_label_crit_section - current's confining label
+ * @needput: store whether the label needs to be put when ending crit section
*
* Returns: up to date confining label or the ns unconfined label (NOT NULL)
*
@@ -142,13 +155,16 @@ static inline void end_current_label_crit_section(struct aa_label *label)
* critical section between __begin_current_label_crit_section() ..
* __end_current_label_crit_section()
*/
-static inline struct aa_label *__begin_current_label_crit_section(void)
+static inline struct aa_label *__begin_current_label_crit_section(bool *needput)
{
struct aa_label *label = aa_current_raw_label();
- if (label_is_stale(label))
- label = aa_get_newest_label(label);
+ if (label_is_stale(label)) {
+ *needput = true;
+ return aa_get_newest_label(label);
+ }
+ *needput = false;
return label;
}
@@ -184,10 +200,11 @@ static inline struct aa_ns *aa_get_current_ns(void)
{
struct aa_label *label;
struct aa_ns *ns;
+ bool needput;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
ns = aa_get_ns(labels_ns(label));
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return ns;
}
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 6e8f2aa66cd6..ef60f99bc5ae 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -77,12 +77,17 @@ int aa_audit_file(const struct cred *cred,
const char *target, struct aa_label *tlabel, kuid_t ouid,
const char *info, int error);
-struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
- aa_state_t state, struct path_cond *cond);
+struct aa_perms *aa_lookup_condperms(kuid_t subj_uid,
+ struct aa_policydb *file_rules,
+ aa_state_t state, struct path_cond *cond);
aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
const char *name, struct path_cond *cond,
struct aa_perms *perms);
+int __aa_path_perm(const char *op, const struct cred *subj_cred,
+ struct aa_profile *profile, const char *name,
+ u32 request, struct path_cond *cond, int flags,
+ struct aa_perms *perms);
int aa_path_perm(const char *op, const struct cred *subj_cred,
struct aa_label *label, const struct path *path,
int flags, u32 request, struct path_cond *cond);
@@ -99,7 +104,7 @@ void aa_inherit_files(const struct cred *cred, struct files_struct *files);
/**
- * aa_map_file_perms - map file flags to AppArmor permissions
+ * aa_map_file_to_perms - map file flags to AppArmor permissions
* @file: open file to map flags to AppArmor permissions
*
* Returns: apparmor permission set for the file
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h
index 74d17052f76b..323dd071afe9 100644
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -13,6 +13,9 @@
#include <linux/sched.h>
+#define SIGUNKNOWN 0
+#define MAXMAPPED_SIG 35
+
int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender,
const struct cred *target_cred, struct aa_label *target,
int sig);
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 93290ae300bb..c0812dbc1b5b 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -19,6 +19,7 @@
#include "lib.h"
struct aa_ns;
+struct aa_ruleset;
#define LOCAL_VEC_ENTRIES 8
#define DEFINE_VEC(T, V) \
@@ -109,7 +110,7 @@ struct label_it {
int i, j;
};
-/* struct aa_label - lazy labeling struct
+/* struct aa_label_base - base info of label
* @count: ref count of active users
* @node: rbtree position
* @rcu: rcu callback struct
@@ -118,7 +119,10 @@ struct label_it {
* @flags: stale and other flags - values may change under label set lock
* @secid: secid that references this label
* @size: number of entries in @ent[]
- * @ent: set of profiles for label, actual size determined by @size
+ * @mediates: bitmask for label_mediates
+ * profile: label vec when embedded in a profile FLAG_PROFILE is set
+ * rules: variable length rules in a profile FLAG_PROFILE is set
+ * vec: vector of profiles comprising the compound label
*/
struct aa_label {
struct kref count;
@@ -129,7 +133,18 @@ struct aa_label {
long flags;
u32 secid;
int size;
- struct aa_profile *vec[];
+ u64 mediates;
+ union {
+ struct {
+ /* only used is the label is a profile, size of
+ * rules[] is determined by the profile
+ * profile[1] is poison or null as guard
+ */
+ struct aa_profile *profile[2];
+ DECLARE_FLEX_ARRAY(struct aa_ruleset *, rules);
+ };
+ DECLARE_FLEX_ARRAY(struct aa_profile *, vec);
+ };
};
#define last_error(E, FN) \
@@ -231,20 +246,17 @@ int aa_label_next_confined(struct aa_label *l, int i);
#define fn_for_each_not_in_set(L1, L2, P, FN) \
fn_for_each2_XXX((L1), (L2), P, FN, _not_in_set)
-#define LABEL_MEDIATES(L, C) \
-({ \
- struct aa_profile *profile; \
- struct label_it i; \
- int ret = 0; \
- label_for_each(i, (L), profile) { \
- if (RULE_MEDIATES(&profile->rules, (C))) { \
- ret = 1; \
- break; \
- } \
- } \
- ret; \
-})
+static inline bool label_mediates(struct aa_label *L, unsigned char C)
+{
+ return (L)->mediates & (((u64) 1) << (C));
+}
+static inline bool label_mediates_safe(struct aa_label *L, unsigned char C)
+{
+ if (C > AA_CLASS_LAST)
+ return false;
+ return label_mediates(L, C);
+}
void aa_labelset_destroy(struct aa_labelset *ls);
void aa_labelset_init(struct aa_labelset *ls);
@@ -417,6 +429,13 @@ static inline void aa_put_label(struct aa_label *l)
kref_put(&l->count, aa_label_kref);
}
+/* wrapper fn to indicate semantics of the check */
+static inline bool __aa_subj_label_is_cached(struct aa_label *subj_label,
+ struct aa_label *obj_label)
+{
+ return aa_label_is_subset(obj_label, subj_label);
+}
+
struct aa_proxy *aa_alloc_proxy(struct aa_label *l, gfp_t gfp);
void aa_proxy_kref(struct kref *kref);
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index f11a0db7f51d..444197075fd6 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -19,22 +19,34 @@
extern struct aa_dfa *stacksplitdfa;
/*
- * DEBUG remains global (no per profile flag) since it is mostly used in sysctl
- * which is not related to profile accesses.
- */
-
-#define DEBUG_ON (aa_g_debug)
-/*
* split individual debug cases out in preparation for finer grained
* debug controls in the future.
*/
-#define AA_DEBUG_LABEL DEBUG_ON
#define dbg_printk(__fmt, __args...) pr_debug(__fmt, ##__args)
-#define AA_DEBUG(fmt, args...) \
+
+#define DEBUG_NONE 0
+#define DEBUG_LABEL_ABS_ROOT 1
+#define DEBUG_LABEL 2
+#define DEBUG_DOMAIN 4
+#define DEBUG_POLICY 8
+#define DEBUG_INTERFACE 0x10
+
+#define DEBUG_ALL 0x1f /* update if new DEBUG_X added */
+#define DEBUG_PARSE_ERROR (-1)
+
+#define DEBUG_ON (aa_g_debug != DEBUG_NONE)
+#define DEBUG_ABS_ROOT (aa_g_debug & DEBUG_LABEL_ABS_ROOT)
+
+#define AA_DEBUG(opt, fmt, args...) \
do { \
- if (DEBUG_ON) \
- pr_debug_ratelimited("AppArmor: " fmt, ##args); \
+ if (aa_g_debug & opt) \
+ pr_warn_ratelimited("%s: " fmt, __func__, ##args); \
} while (0)
+#define AA_DEBUG_LABEL(LAB, X, fmt, args...) \
+do { \
+ if ((LAB)->flags & FLAG_DEBUG1) \
+ AA_DEBUG(X, fmt, args); \
+} while (0)
#define AA_WARN(X) WARN((X), "APPARMOR WARN %s: %s\n", __func__, #X)
@@ -48,9 +60,16 @@ extern struct aa_dfa *stacksplitdfa;
#define AA_BUG_FMT(X, fmt, args...) \
WARN((X), "AppArmor WARN %s: (" #X "): " fmt, __func__, ##args)
#else
-#define AA_BUG_FMT(X, fmt, args...) no_printk(fmt, ##args)
+#define AA_BUG_FMT(X, fmt, args...) \
+ do { \
+ BUILD_BUG_ON_INVALID(X); \
+ no_printk(fmt, ##args); \
+ } while (0)
#endif
+int aa_parse_debug_params(const char *str);
+int aa_print_debug_params(char *buffer);
+
#define AA_ERROR(fmt, args...) \
pr_err_ratelimited("AppArmor: " fmt, ##args)
@@ -106,6 +125,7 @@ struct aa_str_table {
};
void aa_free_str_table(struct aa_str_table *table);
+bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp);
struct counted_str {
struct kref count;
@@ -151,7 +171,7 @@ struct aa_policy {
/**
* basename - find the last component of an hname
- * @name: hname to find the base profile name component of (NOT NULL)
+ * @hname: hname to find the base profile name component of (NOT NULL)
*
* Returns: the tail (base profile name) name component of an hname
*/
@@ -281,7 +301,7 @@ __do_cleanup: \
} \
__done: \
if (!__new_) \
- AA_DEBUG("label build failed\n"); \
+ AA_DEBUG(DEBUG_LABEL, "label build failed\n"); \
(__new_); \
})
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index 536ce3abd598..1fbe82f5021b 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -17,7 +17,7 @@
#define DFA_START 1
-/**
+/*
* The format used for transition tables is based on the GNU flex table
* file format (--tables-file option; see Table File Format in the flex
* info pages and the flex sources for documentation). The magic number
@@ -137,17 +137,15 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start,
void aa_dfa_free_kref(struct kref *kref);
-#define WB_HISTORY_SIZE 24
+/* This needs to be a power of 2 */
+#define WB_HISTORY_SIZE 32
struct match_workbuf {
- unsigned int count;
unsigned int pos;
unsigned int len;
- unsigned int size; /* power of 2, same as history size */
- unsigned int history[WB_HISTORY_SIZE];
+ aa_state_t history[WB_HISTORY_SIZE];
};
#define DEFINE_MATCH_WB(N) \
struct match_workbuf N = { \
- .count = 0, \
.pos = 0, \
.len = 0, \
}
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
index c42ed8a73f1c..0d0b0ce42723 100644
--- a/security/apparmor/include/net.h
+++ b/security/apparmor/include/net.h
@@ -47,8 +47,9 @@
#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \
AA_MAY_ACCEPT)
struct aa_sk_ctx {
- struct aa_label *label;
- struct aa_label *peer;
+ struct aa_label __rcu *label;
+ struct aa_label __rcu *peer;
+ struct aa_label __rcu *peer_lastupdate; /* ptr cmp only, no deref */
};
static inline struct aa_sk_ctx *aa_sock(const struct sock *sk)
@@ -56,7 +57,7 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk)
return sk->sk_security + apparmor_blob_sizes.lbs_sock;
}
-#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \
+#define DEFINE_AUDIT_NET(NAME, OP, CRED, SK, F, T, P) \
struct lsm_network_audit NAME ## _net = { .sk = (SK), \
.family = (F)}; \
DEFINE_AUDIT_DATA(NAME, \
@@ -65,24 +66,15 @@ static inline struct aa_sk_ctx *aa_sock(const struct sock *sk)
AA_CLASS_NET, \
OP); \
NAME.common.u.net = &(NAME ## _net); \
+ NAME.subj_cred = (CRED); \
NAME.net.type = (T); \
NAME.net.protocol = (P)
-#define DEFINE_AUDIT_SK(NAME, OP, SK) \
- DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \
+#define DEFINE_AUDIT_SK(NAME, OP, CRED, SK) \
+ DEFINE_AUDIT_NET(NAME, OP, CRED, SK, (SK)->sk_family, (SK)->sk_type, \
(SK)->sk_protocol)
-#define af_select(FAMILY, FN, DEF_FN) \
-({ \
- int __e; \
- switch ((FAMILY)) { \
- default: \
- __e = DEF_FN; \
- } \
- __e; \
-})
-
struct aa_secmark {
u8 audit;
u8 deny;
@@ -91,11 +83,19 @@ struct aa_secmark {
};
extern struct aa_sfs_entry aa_sfs_entry_network[];
-
+extern struct aa_sfs_entry aa_sfs_entry_networkv9[];
+
+int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy,
+ aa_state_t state, u32 request, struct aa_perms *p,
+ struct apparmor_audit_data *ad);
+/* passing in state returned by XXX_mediates_AF() */
+aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state,
+ u32 request, u16 af, int type, int protocol,
+ struct aa_perms **p, const char **info);
void audit_net_cb(struct audit_buffer *ab, void *va);
int aa_profile_af_perm(struct aa_profile *profile,
struct apparmor_audit_data *ad,
- u32 request, u16 family, int type);
+ u32 request, u16 family, int type, int protocol);
int aa_af_perm(const struct cred *subj_cred, struct aa_label *label,
const char *op, u32 request, u16 family,
int type, int protocol);
@@ -105,13 +105,13 @@ static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
struct sock *sk)
{
return aa_profile_af_perm(profile, ad, request, sk->sk_family,
- sk->sk_type);
+ sk->sk_type, sk->sk_protocol);
}
int aa_sk_perm(const char *op, u32 request, struct sock *sk);
int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label,
const char *op, u32 request,
- struct socket *sock);
+ struct file *file);
int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
u32 secid, const struct sock *sk);
diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h
index 343189903dba..8bb915d48dc7 100644
--- a/security/apparmor/include/path.h
+++ b/security/apparmor/include/path.h
@@ -13,6 +13,7 @@
enum path_flags {
PATH_IS_DIR = 0x1, /* path is a directory */
+ PATH_SOCK_COND = 0x2,
PATH_CONNECT_PATH = 0x4, /* connect disconnected paths to / */
PATH_CHROOT_REL = 0x8, /* do path lookup relative to chroot */
PATH_CHROOT_NSCONNECT = 0x10, /* connect paths that are at ns root */
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index bbaa7d39a39a..37a3781b99a0 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -101,8 +101,8 @@ extern struct aa_perms allperms;
/**
* aa_perms_accum_raw - accumulate perms with out masking off overlapping perms
- * @accum - perms struct to accumulate into
- * @addend - perms struct to add to @accum
+ * @accum: perms struct to accumulate into
+ * @addend: perms struct to add to @accum
*/
static inline void aa_perms_accum_raw(struct aa_perms *accum,
struct aa_perms *addend)
@@ -128,8 +128,8 @@ static inline void aa_perms_accum_raw(struct aa_perms *accum,
/**
* aa_perms_accum - accumulate perms, masking off overlapping perms
- * @accum - perms struct to accumulate into
- * @addend - perms struct to add to @accum
+ * @accum: perms struct to accumulate into
+ * @addend: perms struct to add to @accum
*/
static inline void aa_perms_accum(struct aa_perms *accum,
struct aa_perms *addend)
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 757e3c232c57..4c50875c9d13 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -59,6 +59,11 @@ extern const char *const aa_profile_mode_names[];
#define on_list_rcu(X) (!list_empty(X) && (X)->prev != LIST_POISON2)
+/* flags in the dfa accept2 table */
+enum dfa_accept_flags {
+ ACCEPT_FLAG_OWNER = 1,
+};
+
/*
* FIXME: currently need a clean way to replace and remove profiles as a
* set. It should be done at the namespace level.
@@ -124,6 +129,7 @@ static inline void aa_put_pdb(struct aa_policydb *pdb)
kref_put(&pdb->count, aa_pdb_free_kref);
}
+/* lookup perm that doesn't have and object conditional */
static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy,
aa_state_t state)
{
@@ -135,7 +141,6 @@ static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy,
return &(policy->perms[index]);
}
-
/* struct aa_data - generic data structure
* key: name for retrieving this data
* size: size of data in bytes
@@ -160,8 +165,6 @@ struct aa_data {
* @secmark: secmark label match info
*/
struct aa_ruleset {
- struct list_head list;
-
int size;
/* TODO: merge policy and file */
@@ -175,6 +178,7 @@ struct aa_ruleset {
struct aa_secmark *secmark;
};
+
/* struct aa_attachment - data and rules for a profiles attachment
* @list:
* @xmatch_str: human readable attachment string
@@ -193,7 +197,6 @@ struct aa_attachment {
/* struct aa_profile - basic confinement data
* @base - base components of the profile (name, refcount, lists, lock ...)
- * @label - label this profile is an extension of
* @parent: parent of profile
* @ns: namespace the profile is in
* @rename: optional profile name that this profile renamed
@@ -201,13 +204,20 @@ struct aa_attachment {
* @audit: the auditing mode of the profile
* @mode: the enforcement mode of the profile
* @path_flags: flags controlling path generation behavior
+ * @signal: the signal that should be used when kill is used
* @disconnected: what to prepend if attach_disconnected is specified
* @attach: attachment rules for the profile
* @rules: rules to be enforced
*
+ * learning_cache: the accesses learned in complain mode
+ * raw_data: rawdata of the loaded profile policy
+ * hash: cryptographic hash of the profile
* @dents: dentries for the profiles file entries in apparmorfs
* @dirname: name of the profile dir in apparmorfs
+ * @dents: set of dentries associated with the profile
* @data: hashtable for free-form policy aa_data
+ * @label - label this profile is an extension of
+ * @rules - label with the rule vec on its end
*
* The AppArmor profile contains the basic confinement data. Each profile
* has a name, and exists in a namespace. The @name and @exec_match are
@@ -231,16 +241,19 @@ struct aa_profile {
enum audit_mode audit;
long mode;
u32 path_flags;
+ int signal;
const char *disconnected;
struct aa_attachment attach;
- struct list_head rules;
struct aa_loaddata *rawdata;
unsigned char *hash;
char *dirname;
struct dentry *dents[AAFS_PROF_SIZEOF];
struct rhashtable *data;
+
+ int n_rules;
+ /* special - variable length must be last entry in profile */
struct aa_label label;
};
@@ -298,24 +311,38 @@ static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules,
rules->policy->start[0], &class, 1);
}
-static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF)
+static inline aa_state_t RULE_MEDIATES_v9NET(struct aa_ruleset *rules)
{
- aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NET);
- __be16 be_af = cpu_to_be16(AF);
+ return RULE_MEDIATES(rules, AA_CLASS_NETV9);
+}
+
+static inline aa_state_t RULE_MEDIATES_NET(struct aa_ruleset *rules)
+{
+ /* can not use RULE_MEDIATE_v9AF here, because AF match fail
+ * can not be distiguished from class match fail, and we only
+ * fallback to checking older class on class match failure
+ */
+ aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NETV9);
+ /* fallback and check v7/8 if v9 is NOT mediated */
if (!state)
- return DFA_NOMATCH;
- return aa_dfa_match_len(rules->policy->dfa, state, (char *) &be_af, 2);
+ state = RULE_MEDIATES(rules, AA_CLASS_NET);
+
+ return state;
}
-static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head,
- unsigned char class)
+
+void aa_compute_profile_mediates(struct aa_profile *profile);
+static inline bool profile_mediates(struct aa_profile *profile,
+ unsigned char class)
{
- struct aa_ruleset *rule;
+ return label_mediates(&profile->label, class);
+}
- /* TODO: change to list walk */
- rule = list_first_entry(head, typeof(*rule), list);
- return RULE_MEDIATES(rule, class);
+static inline bool profile_mediates_safe(struct aa_profile *profile,
+ unsigned char class)
+{
+ return label_mediates_safe(&profile->label, class);
}
/**
diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
index cbf7a997ed84..c772668cdc62 100644
--- a/security/apparmor/include/sig_names.h
+++ b/security/apparmor/include/sig_names.h
@@ -1,9 +1,5 @@
#include <linux/signal.h>
-
-#define SIGUNKNOWN 0
-#define MAXMAPPED_SIG 35
-#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1)
-#define SIGRT_BASE 128
+#include "signal.h"
/* provide a mapping of arch signal to internal signal # for mediation
* those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO
diff --git a/security/apparmor/include/signal.h b/security/apparmor/include/signal.h
new file mode 100644
index 000000000000..729763fa7ce6
--- /dev/null
+++ b/security/apparmor/include/signal.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor ipc mediation function definitions.
+ *
+ * Copyright 2023 Canonical Ltd.
+ */
+
+#ifndef __AA_SIGNAL_H
+#define __AA_SIGNAL_H
+
+#define SIGUNKNOWN 0
+#define MAXMAPPED_SIG 35
+
+#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1)
+#define SIGRT_BASE 128
+
+#endif /* __AA_SIGNAL_H */
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 0cdf4340b02d..df5712cea685 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -80,21 +80,20 @@ static int profile_signal_perm(const struct cred *cred,
struct aa_label *peer, u32 request,
struct apparmor_audit_data *ad)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_perms perms;
aa_state_t state;
- if (profile_unconfined(profile) ||
- !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL))
+ if (profile_unconfined(profile))
return 0;
ad->subj_cred = cred;
ad->peer = peer;
/* TODO: secondary cache check <profile, profile, perm> */
- state = aa_dfa_next(rules->policy->dfa,
- rules->policy->start[AA_CLASS_SIGNAL],
- ad->signal);
+ state = RULE_MEDIATES(rules, AA_CLASS_SIGNAL);
+ if (!state)
+ return 0;
+ state = aa_dfa_next(rules->policy->dfa, state, ad->signal);
aa_label_match(profile, rules, peer, state, false, request, &perms);
aa_apply_modes_to_perms(profile, &perms);
return aa_check_perms(profile, &perms, request, ad, audit_signal_cb);
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 91483ecacc16..913678f199c3 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -198,21 +198,25 @@ static bool vec_is_stale(struct aa_profile **vec, int n)
return false;
}
-static long accum_vec_flags(struct aa_profile **vec, int n)
+static void accum_label_info(struct aa_label *new)
{
long u = FLAG_UNCONFINED;
int i;
- AA_BUG(!vec);
+ AA_BUG(!new);
- for (i = 0; i < n; i++) {
- u |= vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 |
- FLAG_STALE);
- if (!(u & vec[i]->label.flags & FLAG_UNCONFINED))
+ /* size == 1 is a profile and flags must be set as part of creation */
+ if (new->size == 1)
+ return;
+
+ for (i = 0; i < new->size; i++) {
+ u |= new->vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 |
+ FLAG_STALE);
+ if (!(u & new->vec[i]->label.flags & FLAG_UNCONFINED))
u &= ~FLAG_UNCONFINED;
+ new->mediates |= new->vec[i]->label.mediates;
}
-
- return u;
+ new->flags |= u;
}
static int sort_cmp(const void *a, const void *b)
@@ -431,7 +435,7 @@ struct aa_label *aa_label_alloc(int size, struct aa_proxy *proxy, gfp_t gfp)
/* + 1 for null terminator entry on vec */
new = kzalloc(struct_size(new, vec, size + 1), gfp);
- AA_DEBUG("%s (%p)\n", __func__, new);
+ AA_DEBUG(DEBUG_LABEL, "%s (%p)\n", __func__, new);
if (!new)
goto fail;
@@ -645,6 +649,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new)
rb_replace_node(&old->node, &new->node, &ls->root);
old->flags &= ~FLAG_IN_TREE;
new->flags |= FLAG_IN_TREE;
+ accum_label_info(new);
return true;
}
@@ -705,6 +710,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls,
rb_link_node(&label->node, parent, new);
rb_insert_color(&label->node, &ls->root);
label->flags |= FLAG_IN_TREE;
+ accum_label_info(label);
return aa_get_label(label);
}
@@ -1085,7 +1091,6 @@ static struct aa_label *label_merge_insert(struct aa_label *new,
else if (k == b->size)
return aa_get_label(b);
}
- new->flags |= accum_vec_flags(new->vec, new->size);
ls = labels_set(new);
write_lock_irqsave(&ls->lock, flags);
label = __label_insert(labels_set(new), new, false);
@@ -1456,7 +1461,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp)
/*
* cached label name is present and visible
- * @label->hname only exists if label is namespace hierachical
+ * @label->hname only exists if label is namespace hierarchical
*/
static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label,
int flags)
@@ -1617,7 +1622,7 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns,
AA_BUG(!str && size != 0);
AA_BUG(!label);
- if (AA_DEBUG_LABEL && (flags & FLAG_ABS_ROOT)) {
+ if (DEBUG_ABS_ROOT && (flags & FLAG_ABS_ROOT)) {
ns = root_ns;
len = snprintf(str, size, "_");
update_for_len(total, len, size, str);
@@ -1731,7 +1736,7 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
display_mode(ns, label, flags)) {
len = aa_label_asxprint(&name, ns, label, flags, gfp);
if (len < 0) {
- AA_DEBUG("label print error");
+ AA_DEBUG(DEBUG_LABEL, "label print error");
return;
}
str = name;
@@ -1759,7 +1764,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
len = aa_label_asxprint(&str, ns, label, flags, gfp);
if (len < 0) {
- AA_DEBUG("label print error");
+ AA_DEBUG(DEBUG_LABEL, "label print error");
return;
}
seq_puts(f, str);
@@ -1782,7 +1787,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
len = aa_label_asxprint(&str, ns, label, flags, gfp);
if (len < 0) {
- AA_DEBUG("label print error");
+ AA_DEBUG(DEBUG_LABEL, "label print error");
return;
}
pr_info("%s", str);
@@ -1865,7 +1870,7 @@ struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str,
AA_BUG(!str);
str = skipn_spaces(str, n);
- if (str == NULL || (AA_DEBUG_LABEL && *str == '_' &&
+ if (str == NULL || (DEBUG_ABS_ROOT && *str == '_' &&
base != &root_ns->unconfined->label))
return ERR_PTR(-EINVAL);
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 7db62213e352..82dbb97ad406 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -25,6 +25,120 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK,
.quiet = ALL_PERMS_MASK,
.hide = ALL_PERMS_MASK };
+struct val_table_ent {
+ const char *str;
+ int value;
+};
+
+static struct val_table_ent debug_values_table[] = {
+ { "N", DEBUG_NONE },
+ { "none", DEBUG_NONE },
+ { "n", DEBUG_NONE },
+ { "0", DEBUG_NONE },
+ { "all", DEBUG_ALL },
+ { "Y", DEBUG_ALL },
+ { "y", DEBUG_ALL },
+ { "1", DEBUG_ALL },
+ { "abs_root", DEBUG_LABEL_ABS_ROOT },
+ { "label", DEBUG_LABEL },
+ { "domain", DEBUG_DOMAIN },
+ { "policy", DEBUG_POLICY },
+ { "interface", DEBUG_INTERFACE },
+ { NULL, 0 }
+};
+
+static struct val_table_ent *val_table_find_ent(struct val_table_ent *table,
+ const char *name, size_t len)
+{
+ struct val_table_ent *entry;
+
+ for (entry = table; entry->str != NULL; entry++) {
+ if (strncmp(entry->str, name, len) == 0 &&
+ strlen(entry->str) == len)
+ return entry;
+ }
+ return NULL;
+}
+
+int aa_parse_debug_params(const char *str)
+{
+ struct val_table_ent *ent;
+ const char *next;
+ int val = 0;
+
+ do {
+ size_t n = strcspn(str, "\r\n,");
+
+ next = str + n;
+ ent = val_table_find_ent(debug_values_table, str, next - str);
+ if (ent)
+ val |= ent->value;
+ else
+ AA_DEBUG(DEBUG_INTERFACE, "unknown debug type '%.*s'",
+ (int)(next - str), str);
+ str = next + 1;
+ } while (*next != 0);
+ return val;
+}
+
+/**
+ * val_mask_to_str - convert a perm mask to its short string
+ * @str: character buffer to store string in (at least 10 characters)
+ * @size: size of the @str buffer
+ * @table: NUL-terminated character buffer of permission characters (NOT NULL)
+ * @mask: permission mask to convert
+ */
+static int val_mask_to_str(char *str, size_t size,
+ const struct val_table_ent *table, u32 mask)
+{
+ const struct val_table_ent *ent;
+ int total = 0;
+
+ for (ent = table; ent->str; ent++) {
+ if (ent->value && (ent->value & mask) == ent->value) {
+ int len = scnprintf(str, size, "%s%s", total ? "," : "",
+ ent->str);
+ size -= len;
+ str += len;
+ total += len;
+ mask &= ~ent->value;
+ }
+ }
+
+ return total;
+}
+
+int aa_print_debug_params(char *buffer)
+{
+ if (!aa_g_debug)
+ return sprintf(buffer, "N");
+ return val_mask_to_str(buffer, PAGE_SIZE, debug_values_table,
+ aa_g_debug);
+}
+
+bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp)
+{
+ char **n;
+ int i;
+
+ if (t->size == newsize)
+ return true;
+ n = kcalloc(newsize, sizeof(*n), gfp);
+ if (!n)
+ return false;
+ for (i = 0; i < min(t->size, newsize); i++)
+ n[i] = t->table[i];
+ for (; i < t->size; i++)
+ kfree_sensitive(t->table[i]);
+ if (newsize > t->size)
+ memset(&n[t->size], 0, (newsize-t->size)*sizeof(*n));
+ kfree_sensitive(t->table);
+ t->table = n;
+ t->size = newsize;
+
+ return true;
+}
+
/**
* aa_free_str_table - free entries str table
* @t: the string table to free (MAYBE NULL)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 9b6c2f157f83..8e1cc229b41b 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -26,6 +26,7 @@
#include <uapi/linux/mount.h>
#include <uapi/linux/lsm.h>
+#include "include/af_unix.h"
#include "include/apparmor.h"
#include "include/apparmorfs.h"
#include "include/audit.h"
@@ -126,14 +127,15 @@ static int apparmor_ptrace_access_check(struct task_struct *child,
struct aa_label *tracer, *tracee;
const struct cred *cred;
int error;
+ bool needput;
cred = get_task_cred(child);
tracee = cred_label(cred); /* ref count on cred */
- tracer = __begin_current_label_crit_section();
+ tracer = __begin_current_label_crit_section(&needput);
error = aa_may_ptrace(current_cred(), tracer, cred, tracee,
(mode & PTRACE_MODE_READ) ? AA_PTRACE_READ
: AA_PTRACE_TRACE);
- __end_current_label_crit_section(tracer);
+ __end_current_label_crit_section(tracer, needput);
put_cred(cred);
return error;
@@ -144,14 +146,15 @@ static int apparmor_ptrace_traceme(struct task_struct *parent)
struct aa_label *tracer, *tracee;
const struct cred *cred;
int error;
+ bool needput;
- tracee = __begin_current_label_crit_section();
+ tracee = __begin_current_label_crit_section(&needput);
cred = get_task_cred(parent);
tracer = cred_label(cred); /* ref count on cred */
error = aa_may_ptrace(cred, tracer, current_cred(), tracee,
AA_PTRACE_TRACE);
put_cred(cred);
- __end_current_label_crit_section(tracee);
+ __end_current_label_crit_section(tracee, needput);
return error;
}
@@ -176,15 +179,11 @@ static int apparmor_capget(const struct task_struct *target, kernel_cap_t *effec
struct label_it i;
label_for_each_confined(i, label, profile) {
- struct aa_ruleset *rules;
- if (COMPLAIN_MODE(profile))
- continue;
- rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
- *effective = cap_intersect(*effective,
- rules->caps.allow);
- *permitted = cap_intersect(*permitted,
- rules->caps.allow);
+ kernel_cap_t allowed;
+
+ allowed = aa_profile_capget(profile);
+ *effective = cap_intersect(*effective, allowed);
+ *permitted = cap_intersect(*permitted, allowed);
}
}
rcu_read_unlock();
@@ -221,12 +220,13 @@ static int common_perm(const char *op, const struct path *path, u32 mask,
{
struct aa_label *label;
int error = 0;
+ bool needput;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
if (!unconfined(label))
error = aa_path_perm(op, current_cred(), label, path, 0, mask,
cond);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -524,14 +524,15 @@ static int common_file_perm(const char *op, struct file *file, u32 mask,
{
struct aa_label *label;
int error = 0;
+ bool needput;
/* don't reaudit files closed during inheritance */
- if (file->f_path.dentry == aa_null.dentry)
+ if (unlikely(file->f_path.dentry == aa_null.dentry))
return -EACCES;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -633,7 +634,7 @@ static int profile_uring(struct aa_profile *profile, u32 request,
AA_BUG(!profile);
- rules = list_first_entry(&profile->rules, typeof(*rules), list);
+ rules = profile->label.rules[0];
state = RULE_MEDIATES(rules, AA_CLASS_IO_URING);
if (state) {
struct aa_perms perms = { };
@@ -664,15 +665,16 @@ static int apparmor_uring_override_creds(const struct cred *new)
struct aa_profile *profile;
struct aa_label *label;
int error;
+ bool needput;
DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING,
OP_URING_OVERRIDE);
ad.uring.target = cred_label(new);
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
error = fn_for_each(label, profile,
profile_uring(profile, AA_MAY_OVERRIDE_CRED,
cred_label(new), CAP_SYS_ADMIN, &ad));
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -688,14 +690,15 @@ static int apparmor_uring_sqpoll(void)
struct aa_profile *profile;
struct aa_label *label;
int error;
+ bool needput;
DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_IO_URING,
OP_URING_SQPOLL);
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
error = fn_for_each(label, profile,
profile_uring(profile, AA_MAY_CREATE_SQPOLL,
NULL, CAP_SYS_ADMIN, &ad));
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -706,6 +709,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path,
{
struct aa_label *label;
int error = 0;
+ bool needput;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -713,7 +717,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path,
flags &= ~AA_MS_IGNORE_MASK;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
if (!unconfined(label)) {
if (flags & MS_REMOUNT)
error = aa_remount(current_cred(), label, path, flags,
@@ -732,7 +736,7 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path,
error = aa_new_mount(current_cred(), label, dev_name,
path, type, flags, data);
}
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -742,12 +746,13 @@ static int apparmor_move_mount(const struct path *from_path,
{
struct aa_label *label;
int error = 0;
+ bool needput;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
if (!unconfined(label))
error = aa_move_mount(current_cred(), label, from_path,
to_path);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -756,11 +761,12 @@ static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
{
struct aa_label *label;
int error = 0;
+ bool needput;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
if (!unconfined(label))
error = aa_umount(current_cred(), label, mnt, flags);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -984,10 +990,12 @@ static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm)
static void apparmor_current_getlsmprop_subj(struct lsm_prop *prop)
{
- struct aa_label *label = __begin_current_label_crit_section();
+ struct aa_label *label;
+ bool needput;
+ label = __begin_current_label_crit_section(&needput);
prop->apparmor.label = label;
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
}
static void apparmor_task_getlsmprop_obj(struct task_struct *p,
@@ -1002,13 +1010,16 @@ static void apparmor_task_getlsmprop_obj(struct task_struct *p,
static int apparmor_task_setrlimit(struct task_struct *task,
unsigned int resource, struct rlimit *new_rlim)
{
- struct aa_label *label = __begin_current_label_crit_section();
+ struct aa_label *label;
int error = 0;
+ bool needput;
+
+ label = __begin_current_label_crit_section(&needput);
if (!unconfined(label))
error = aa_task_setrlimit(current_cred(), label, task,
resource, new_rlim);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return error;
}
@@ -1019,6 +1030,7 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo
const struct cred *tc;
struct aa_label *cl, *tl;
int error;
+ bool needput;
tc = get_task_cred(target);
tl = aa_get_newest_cred_label(tc);
@@ -1030,9 +1042,9 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo
error = aa_may_signal(cred, cl, tc, tl, sig);
aa_put_label(cl);
} else {
- cl = __begin_current_label_crit_section();
+ cl = __begin_current_label_crit_section(&needput);
error = aa_may_signal(current_cred(), cl, tc, tl, sig);
- __end_current_label_crit_section(cl);
+ __end_current_label_crit_section(cl, needput);
}
aa_put_label(tl);
put_cred(tc);
@@ -1061,12 +1073,29 @@ static int apparmor_userns_create(const struct cred *cred)
return error;
}
+static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t gfp)
+{
+ struct aa_sk_ctx *ctx = aa_sock(sk);
+ struct aa_label *label;
+ bool needput;
+
+ label = __begin_current_label_crit_section(&needput);
+ //spin_lock_init(&ctx->lock);
+ rcu_assign_pointer(ctx->label, aa_get_label(label));
+ rcu_assign_pointer(ctx->peer, NULL);
+ rcu_assign_pointer(ctx->peer_lastupdate, NULL);
+ __end_current_label_crit_section(label, needput);
+ return 0;
+}
+
static void apparmor_sk_free_security(struct sock *sk)
{
struct aa_sk_ctx *ctx = aa_sock(sk);
- aa_put_label(ctx->label);
- aa_put_label(ctx->peer);
+ /* dead these won't be updated any more */
+ aa_put_label(rcu_dereference_protected(ctx->label, true));
+ aa_put_label(rcu_dereference_protected(ctx->peer, true));
+ aa_put_label(rcu_dereference_protected(ctx->peer_lastupdate, true));
}
/**
@@ -1080,13 +1109,153 @@ static void apparmor_sk_clone_security(const struct sock *sk,
struct aa_sk_ctx *ctx = aa_sock(sk);
struct aa_sk_ctx *new = aa_sock(newsk);
- if (new->label)
- aa_put_label(new->label);
- new->label = aa_get_label(ctx->label);
+ /* not actually in use yet */
+ if (rcu_access_pointer(ctx->label) != rcu_access_pointer(new->label)) {
+ aa_put_label(rcu_dereference_protected(new->label, true));
+ rcu_assign_pointer(new->label, aa_get_label_rcu(&ctx->label));
+ }
+
+ if (rcu_access_pointer(ctx->peer) != rcu_access_pointer(new->peer)) {
+ aa_put_label(rcu_dereference_protected(new->peer, true));
+ rcu_assign_pointer(new->peer, aa_get_label_rcu(&ctx->peer));
+ }
+
+ if (rcu_access_pointer(ctx->peer_lastupdate) != rcu_access_pointer(new->peer_lastupdate)) {
+ aa_put_label(rcu_dereference_protected(new->peer_lastupdate, true));
+ rcu_assign_pointer(new->peer_lastupdate,
+ aa_get_label_rcu(&ctx->peer_lastupdate));
+ }
+}
+
+static int unix_connect_perm(const struct cred *cred, struct aa_label *label,
+ struct sock *sk, struct sock *peer_sk)
+{
+ struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk);
+ int error;
+
+ error = aa_unix_peer_perm(cred, label, OP_CONNECT,
+ (AA_MAY_CONNECT | AA_MAY_SEND | AA_MAY_RECEIVE),
+ sk, peer_sk,
+ rcu_dereference_protected(peer_ctx->label,
+ lockdep_is_held(&unix_sk(peer_sk)->lock)));
+ if (!is_unix_fs(peer_sk)) {
+ last_error(error,
+ aa_unix_peer_perm(cred,
+ rcu_dereference_protected(peer_ctx->label,
+ lockdep_is_held(&unix_sk(peer_sk)->lock)),
+ OP_CONNECT,
+ (AA_MAY_ACCEPT | AA_MAY_SEND | AA_MAY_RECEIVE),
+ peer_sk, sk, label));
+ }
+
+ return error;
+}
+
+/* lockdep check in unix_connect_perm - push sks here to check */
+static void unix_connect_peers(struct aa_sk_ctx *sk_ctx,
+ struct aa_sk_ctx *peer_ctx)
+{
+ /* Cross reference the peer labels for SO_PEERSEC */
+ struct aa_label *label = rcu_dereference_protected(sk_ctx->label, true);
+
+ aa_get_label(label);
+ aa_put_label(rcu_dereference_protected(peer_ctx->peer,
+ true));
+ rcu_assign_pointer(peer_ctx->peer, label); /* transfer cnt */
+
+ label = aa_get_label(rcu_dereference_protected(peer_ctx->label,
+ true));
+ //spin_unlock(&peer_ctx->lock);
+
+ //spin_lock(&sk_ctx->lock);
+ aa_put_label(rcu_dereference_protected(sk_ctx->peer,
+ true));
+ aa_put_label(rcu_dereference_protected(sk_ctx->peer_lastupdate,
+ true));
+
+ rcu_assign_pointer(sk_ctx->peer, aa_get_label(label));
+ rcu_assign_pointer(sk_ctx->peer_lastupdate, label); /* transfer cnt */
+ //spin_unlock(&sk_ctx->lock);
+}
+
+/**
+ * apparmor_unix_stream_connect - check perms before making unix domain conn
+ * @sk: sk attempting to connect
+ * @peer_sk: sk that is accepting the connection
+ * @newsk: new sk created for this connection
+ * peer is locked when this hook is called
+ *
+ * Return:
+ * 0 if connection is permitted
+ * error code on denial or failure
+ */
+static int apparmor_unix_stream_connect(struct sock *sk, struct sock *peer_sk,
+ struct sock *newsk)
+{
+ struct aa_sk_ctx *sk_ctx = aa_sock(sk);
+ struct aa_sk_ctx *peer_ctx = aa_sock(peer_sk);
+ struct aa_sk_ctx *new_ctx = aa_sock(newsk);
+ struct aa_label *label;
+ int error;
+ bool needput;
+
+ label = __begin_current_label_crit_section(&needput);
+ error = unix_connect_perm(current_cred(), label, sk, peer_sk);
+ __end_current_label_crit_section(label, needput);
+
+ if (error)
+ return error;
+
+ /* newsk doesn't go through post_create, but does go through
+ * security_sk_alloc()
+ */
+ rcu_assign_pointer(new_ctx->label,
+ aa_get_label(rcu_dereference_protected(peer_ctx->label,
+ true)));
+
+ /* Cross reference the peer labels for SO_PEERSEC */
+ unix_connect_peers(sk_ctx, new_ctx);
+
+ return 0;
+}
+
+/**
+ * apparmor_unix_may_send - check perms before conn or sending unix dgrams
+ * @sock: socket sending the message
+ * @peer: socket message is being send to
+ *
+ * Performs bidirectional permission checks for Unix domain socket communication:
+ * 1. Verifies sender has AA_MAY_SEND to target socket
+ * 2. Verifies receiver has AA_MAY_RECEIVE from source socket
+ *
+ * sock and peer are locked when this hook is called
+ * called by: dgram_connect peer setup but path not copied to newsk
+ *
+ * Return:
+ * 0 if transmission is permitted
+ * error code on denial or failure
+ */
+static int apparmor_unix_may_send(struct socket *sock, struct socket *peer)
+{
+ struct aa_sk_ctx *peer_ctx = aa_sock(peer->sk);
+ struct aa_label *label;
+ int error;
+ bool needput;
+
+ label = __begin_current_label_crit_section(&needput);
+ error = xcheck(aa_unix_peer_perm(current_cred(),
+ label, OP_SENDMSG, AA_MAY_SEND,
+ sock->sk, peer->sk,
+ rcu_dereference_protected(peer_ctx->label,
+ true)),
+ aa_unix_peer_perm(peer->file ? peer->file->f_cred : NULL,
+ rcu_dereference_protected(peer_ctx->label,
+ true),
+ OP_SENDMSG, AA_MAY_RECEIVE, peer->sk,
+ sock->sk, label));
+ __end_current_label_crit_section(label, needput);
- if (new->peer)
- aa_put_label(new->peer);
- new->peer = aa_get_label(ctx->peer);
+ return error;
}
static int apparmor_socket_create(int family, int type, int protocol, int kern)
@@ -1096,13 +1265,19 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern)
AA_BUG(in_interrupt());
+ if (kern)
+ return 0;
+
label = begin_current_label_crit_section();
- if (!(kern || unconfined(label)))
- error = af_select(family,
- create_perm(label, family, type, protocol),
- aa_af_perm(current_cred(), label,
- OP_CREATE, AA_MAY_CREATE,
- family, type, protocol));
+ if (!unconfined(label)) {
+ if (family == PF_UNIX)
+ error = aa_unix_create_perm(label, family, type,
+ protocol);
+ else
+ error = aa_af_perm(current_cred(), label, OP_CREATE,
+ AA_MAY_CREATE, family, type,
+ protocol);
+ }
end_current_label_crit_section(label);
return error;
@@ -1135,14 +1310,58 @@ static int apparmor_socket_post_create(struct socket *sock, int family,
if (sock->sk) {
struct aa_sk_ctx *ctx = aa_sock(sock->sk);
- aa_put_label(ctx->label);
- ctx->label = aa_get_label(label);
+ /* still not live */
+ aa_put_label(rcu_dereference_protected(ctx->label, true));
+ rcu_assign_pointer(ctx->label, aa_get_label(label));
}
aa_put_label(label);
return 0;
}
+static int apparmor_socket_socketpair(struct socket *socka,
+ struct socket *sockb)
+{
+ struct aa_sk_ctx *a_ctx = aa_sock(socka->sk);
+ struct aa_sk_ctx *b_ctx = aa_sock(sockb->sk);
+ struct aa_label *label;
+
+ /* socks not live yet - initial values set in sk_alloc */
+ label = begin_current_label_crit_section();
+ if (rcu_access_pointer(a_ctx->label) != label) {
+ AA_BUG("a_ctx != label");
+ aa_put_label(rcu_dereference_protected(a_ctx->label, true));
+ rcu_assign_pointer(a_ctx->label, aa_get_label(label));
+ }
+ if (rcu_access_pointer(b_ctx->label) != label) {
+ AA_BUG("b_ctx != label");
+ aa_put_label(rcu_dereference_protected(b_ctx->label, true));
+ rcu_assign_pointer(b_ctx->label, aa_get_label(label));
+ }
+
+ if (socka->sk->sk_family == PF_UNIX) {
+ /* unix socket pairs by-pass unix_stream_connect */
+ unix_connect_peers(a_ctx, b_ctx);
+ }
+ end_current_label_crit_section(label);
+
+ return 0;
+}
+
+/**
+ * apparmor_socket_bind - check perms before bind addr to socket
+ * @sock: socket to bind the address to (must be non-NULL)
+ * @address: address that is being bound (must be non-NULL)
+ * @addrlen: length of @address
+ *
+ * Performs security checks before allowing a socket to bind to an address.
+ * Handles Unix domain sockets specially through aa_unix_bind_perm().
+ * For other socket families, uses generic permission check via aa_sk_perm().
+ *
+ * Return:
+ * 0 if binding is permitted
+ * error code on denial or invalid parameters
+ */
static int apparmor_socket_bind(struct socket *sock,
struct sockaddr *address, int addrlen)
{
@@ -1151,9 +1370,9 @@ static int apparmor_socket_bind(struct socket *sock,
AA_BUG(!address);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- bind_perm(sock, address, addrlen),
- aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk));
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_bind_perm(sock, address, addrlen);
+ return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk);
}
static int apparmor_socket_connect(struct socket *sock,
@@ -1164,9 +1383,10 @@ static int apparmor_socket_connect(struct socket *sock,
AA_BUG(!address);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- connect_perm(sock, address, addrlen),
- aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk));
+ /* PF_UNIX goes through unix_stream_connect && unix_may_send */
+ if (sock->sk->sk_family == PF_UNIX)
+ return 0;
+ return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk);
}
static int apparmor_socket_listen(struct socket *sock, int backlog)
@@ -1175,9 +1395,9 @@ static int apparmor_socket_listen(struct socket *sock, int backlog)
AA_BUG(!sock->sk);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- listen_perm(sock, backlog),
- aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk));
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_listen_perm(sock, backlog);
+ return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk);
}
/*
@@ -1191,9 +1411,9 @@ static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
AA_BUG(!newsock);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- accept_perm(sock, newsock),
- aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk));
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_accept_perm(sock, newsock);
+ return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk);
}
static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
@@ -1204,9 +1424,10 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
AA_BUG(!msg);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- msg_perm(op, request, sock, msg, size),
- aa_sk_perm(op, request, sock->sk));
+ /* PF_UNIX goes through unix_may_send */
+ if (sock->sk->sk_family == PF_UNIX)
+ return 0;
+ return aa_sk_perm(op, request, sock->sk);
}
static int apparmor_socket_sendmsg(struct socket *sock,
@@ -1228,9 +1449,9 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
AA_BUG(!sock->sk);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- sock_perm(op, request, sock),
- aa_sk_perm(op, request, sock->sk));
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_sock_perm(op, request, sock);
+ return aa_sk_perm(op, request, sock->sk);
}
static int apparmor_socket_getsockname(struct socket *sock)
@@ -1251,9 +1472,9 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
AA_BUG(!sock->sk);
AA_BUG(in_interrupt());
- return af_select(sock->sk->sk_family,
- opt_perm(op, request, sock, level, optname),
- aa_sk_perm(op, request, sock->sk));
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_opt_perm(op, request, sock, level, optname);
+ return aa_sk_perm(op, request, sock->sk);
}
static int apparmor_socket_getsockopt(struct socket *sock, int level,
@@ -1289,6 +1510,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct aa_sk_ctx *ctx = aa_sock(sk);
+ int error;
if (!skb->secmark)
return 0;
@@ -1297,23 +1519,31 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
* If reach here before socket_post_create hook is called, in which
* case label is null, drop the packet.
*/
- if (!ctx->label)
+ if (!rcu_access_pointer(ctx->label))
return -EACCES;
- return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
- skb->secmark, sk);
+ rcu_read_lock();
+ error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_RECVMSG,
+ AA_MAY_RECEIVE, skb->secmark, sk);
+ rcu_read_unlock();
+
+ return error;
}
#endif
-static struct aa_label *sk_peer_label(struct sock *sk)
+static struct aa_label *sk_peer_get_label(struct sock *sk)
{
struct aa_sk_ctx *ctx = aa_sock(sk);
+ struct aa_label *label = ERR_PTR(-ENOPROTOOPT);
- if (ctx->peer)
- return ctx->peer;
+ if (rcu_access_pointer(ctx->peer))
+ return aa_get_label_rcu(&ctx->peer);
- return ERR_PTR(-ENOPROTOOPT);
+ if (sk->sk_family != PF_UNIX)
+ return ERR_PTR(-ENOPROTOOPT);
+
+ return label;
}
/**
@@ -1335,19 +1565,19 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock,
struct aa_label *label;
struct aa_label *peer;
- label = begin_current_label_crit_section();
- peer = sk_peer_label(sock->sk);
+ peer = sk_peer_get_label(sock->sk);
if (IS_ERR(peer)) {
error = PTR_ERR(peer);
goto done;
}
+ label = begin_current_label_crit_section();
slen = aa_label_asxprint(&name, labels_ns(label), peer,
FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
/* don't include terminating \0 in slen, it breaks some apps */
if (slen < 0) {
error = -ENOMEM;
- goto done;
+ goto done_put;
}
if (slen > len) {
error = -ERANGE;
@@ -1359,8 +1589,11 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock,
done_len:
if (copy_to_sockptr(optlen, &slen, sizeof(slen)))
error = -EFAULT;
-done:
+
+done_put:
end_current_label_crit_section(label);
+ aa_put_label(peer);
+done:
kfree(name);
return error;
}
@@ -1396,8 +1629,9 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
{
struct aa_sk_ctx *ctx = aa_sock(sk);
- if (!ctx->label)
- ctx->label = aa_get_current_label();
+ /* setup - not live */
+ if (!rcu_access_pointer(ctx->label))
+ rcu_assign_pointer(ctx->label, aa_get_current_label());
}
#ifdef CONFIG_NETWORK_SECMARK
@@ -1405,12 +1639,17 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb
struct request_sock *req)
{
struct aa_sk_ctx *ctx = aa_sock(sk);
+ int error;
if (!skb->secmark)
return 0;
- return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
- skb->secmark, sk);
+ rcu_read_lock();
+ error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_CONNECT,
+ AA_MAY_CONNECT, skb->secmark, sk);
+ rcu_read_unlock();
+
+ return error;
}
#endif
@@ -1467,11 +1706,16 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = {
LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
+ LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+ LSM_HOOK_INIT(unix_stream_connect, apparmor_unix_stream_connect),
+ LSM_HOOK_INIT(unix_may_send, apparmor_unix_may_send),
+
LSM_HOOK_INIT(socket_create, apparmor_socket_create),
LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
+ LSM_HOOK_INIT(socket_socketpair, apparmor_socket_socketpair),
LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
@@ -1571,6 +1815,9 @@ static const struct kernel_param_ops param_ops_aalockpolicy = {
.get = param_get_aalockpolicy
};
+static int param_set_debug(const char *val, const struct kernel_param *kp);
+static int param_get_debug(char *buffer, const struct kernel_param *kp);
+
static int param_set_audit(const char *val, const struct kernel_param *kp);
static int param_get_audit(char *buffer, const struct kernel_param *kp);
@@ -1604,8 +1851,9 @@ module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level,
aacompressionlevel, 0400);
/* Debug mode */
-bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_APPARMOR_DEBUG_MESSAGES);
-module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR);
+int aa_g_debug;
+module_param_call(debug, param_set_debug, param_get_debug,
+ &aa_g_debug, 0600);
/* Audit mode */
enum audit_mode aa_g_audit;
@@ -1798,6 +2046,34 @@ static int param_get_aacompressionlevel(char *buffer,
return param_get_int(buffer, kp);
}
+static int param_get_debug(char *buffer, const struct kernel_param *kp)
+{
+ if (!apparmor_enabled)
+ return -EINVAL;
+ if (apparmor_initialized && !aa_current_policy_view_capable(NULL))
+ return -EPERM;
+ return aa_print_debug_params(buffer);
+}
+
+static int param_set_debug(const char *val, const struct kernel_param *kp)
+{
+ int i;
+
+ if (!apparmor_enabled)
+ return -EINVAL;
+ if (!val)
+ return -EINVAL;
+ if (apparmor_initialized && !aa_current_policy_admin_capable(NULL))
+ return -EPERM;
+
+ i = aa_parse_debug_params(val);
+ if (i == DEBUG_PARSE_ERROR)
+ return -EINVAL;
+
+ aa_g_debug = i;
+ return 0;
+}
+
static int param_get_audit(char *buffer, const struct kernel_param *kp)
{
if (!apparmor_enabled)
@@ -2006,7 +2282,7 @@ static int __init alloc_buffers(void)
* two should be enough, with more CPUs it is possible that more
* buffers will be used simultaneously. The preallocated pool may grow.
* This preallocation has also the side-effect that AppArmor will be
- * disabled early at boot if aa_g_path_max is extremly high.
+ * disabled early at boot if aa_g_path_max is extremely high.
*/
if (num_online_cpus() > 1)
num = 4 + RESERVE_COUNT;
@@ -2082,6 +2358,7 @@ static unsigned int apparmor_ip_postroute(void *priv,
{
struct aa_sk_ctx *ctx;
struct sock *sk;
+ int error;
if (!skb->secmark)
return NF_ACCEPT;
@@ -2091,8 +2368,11 @@ static unsigned int apparmor_ip_postroute(void *priv,
return NF_ACCEPT;
ctx = aa_sock(sk);
- if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND,
- skb->secmark, sk))
+ rcu_read_lock();
+ error = apparmor_secmark_check(rcu_dereference(ctx->label), OP_SENDMSG,
+ AA_MAY_SEND, skb->secmark, sk);
+ rcu_read_unlock();
+ if (!error)
return NF_ACCEPT;
return NF_DROP_ERR(-ECONNREFUSED);
@@ -2149,12 +2429,12 @@ static int __init apparmor_nf_ip_init(void)
__initcall(apparmor_nf_ip_init);
#endif
-static char nulldfa_src[] = {
+static char nulldfa_src[] __aligned(8) = {
#include "nulldfa.in"
};
static struct aa_dfa *nulldfa;
-static char stacksplitdfa_src[] = {
+static char stacksplitdfa_src[] __aligned(8) = {
#include "stacksplitdfa.in"
};
struct aa_dfa *stacksplitdfa;
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index f2d9c57f8794..c5a91600842a 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -679,34 +679,35 @@ aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start,
return state;
}
-#define inc_wb_pos(wb) \
-do { \
+#define inc_wb_pos(wb) \
+do { \
+ BUILD_BUG_ON_NOT_POWER_OF_2(WB_HISTORY_SIZE); \
wb->pos = (wb->pos + 1) & (WB_HISTORY_SIZE - 1); \
- wb->len = (wb->len + 1) & (WB_HISTORY_SIZE - 1); \
+ wb->len = (wb->len + 1) > WB_HISTORY_SIZE ? WB_HISTORY_SIZE : \
+ wb->len + 1; \
} while (0)
/* For DFAs that don't support extended tagging of states */
+/* adjust is only set if is_loop returns true */
static bool is_loop(struct match_workbuf *wb, aa_state_t state,
unsigned int *adjust)
{
- aa_state_t pos = wb->pos;
- aa_state_t i;
+ int pos = wb->pos;
+ int i;
if (wb->history[pos] < state)
return false;
- for (i = 0; i <= wb->len; i++) {
+ for (i = 0; i < wb->len; i++) {
if (wb->history[pos] == state) {
*adjust = i;
return true;
}
- if (pos == 0)
- pos = WB_HISTORY_SIZE;
- pos--;
+ /* -1 wraps to WB_HISTORY_SIZE - 1 */
+ pos = (pos - 1) & (WB_HISTORY_SIZE - 1);
}
- *adjust = i;
- return true;
+ return false;
}
static aa_state_t leftmatch_fb(struct aa_dfa *dfa, aa_state_t start,
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index bf8863253e07..523570aa1a5a 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -311,8 +311,7 @@ static int match_mnt_path_str(const struct cred *subj_cred,
{
struct aa_perms perms = { };
const char *mntpnt = NULL, *info = NULL;
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
int pos, error;
AA_BUG(!profile);
@@ -371,8 +370,7 @@ static int match_mnt(const struct cred *subj_cred,
bool binary)
{
const char *devname = NULL, *info = NULL;
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
int error = -EACCES;
AA_BUG(!profile);
@@ -604,8 +602,7 @@ static int profile_umount(const struct cred *subj_cred,
struct aa_profile *profile, const struct path *path,
char *buffer)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_perms perms = { };
const char *name = NULL, *info = NULL;
aa_state_t state;
@@ -668,8 +665,7 @@ static struct aa_label *build_pivotroot(const struct cred *subj_cred,
const struct path *old_path,
char *old_buffer)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
const char *old_name, *new_name = NULL, *info = NULL;
const char *trans_name = NULL;
struct aa_perms perms = { };
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index 77413a519117..45cf25605c34 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -8,6 +8,7 @@
* Copyright 2009-2017 Canonical Ltd.
*/
+#include "include/af_unix.h"
#include "include/apparmor.h"
#include "include/audit.h"
#include "include/cred.h"
@@ -24,6 +25,12 @@ struct aa_sfs_entry aa_sfs_entry_network[] = {
{ }
};
+struct aa_sfs_entry aa_sfs_entry_networkv9[] = {
+ AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK),
+ AA_SFS_FILE_BOOLEAN("af_unix", 1),
+ { }
+};
+
static const char * const net_mask_names[] = {
"unknown",
"send",
@@ -66,6 +73,42 @@ static const char * const net_mask_names[] = {
"unknown",
};
+static void audit_unix_addr(struct audit_buffer *ab, const char *str,
+ struct sockaddr_un *addr, int addrlen)
+{
+ int len = unix_addr_len(addrlen);
+
+ if (!addr || len <= 0) {
+ audit_log_format(ab, " %s=none", str);
+ } else if (addr->sun_path[0]) {
+ audit_log_format(ab, " %s=", str);
+ audit_log_untrustedstring(ab, addr->sun_path);
+ } else {
+ audit_log_format(ab, " %s=\"@", str);
+ if (audit_string_contains_control(&addr->sun_path[1], len - 1))
+ audit_log_n_hex(ab, &addr->sun_path[1], len - 1);
+ else
+ audit_log_format(ab, "%.*s", len - 1,
+ &addr->sun_path[1]);
+ audit_log_format(ab, "\"");
+ }
+}
+
+static void audit_unix_sk_addr(struct audit_buffer *ab, const char *str,
+ const struct sock *sk)
+{
+ const struct unix_sock *u = unix_sk(sk);
+
+ if (u && u->addr) {
+ int addrlen;
+ struct sockaddr_un *addr = aa_sunaddr(u, &addrlen);
+
+ audit_unix_addr(ab, str, addr, addrlen);
+ } else {
+ audit_unix_addr(ab, str, NULL, 0);
+
+ }
+}
/* audit callback for net specific fields */
void audit_net_cb(struct audit_buffer *ab, void *va)
@@ -73,12 +116,12 @@ void audit_net_cb(struct audit_buffer *ab, void *va)
struct common_audit_data *sa = va;
struct apparmor_audit_data *ad = aad(sa);
- if (address_family_names[sa->u.net->family])
+ if (address_family_names[ad->common.u.net->family])
audit_log_format(ab, " family=\"%s\"",
- address_family_names[sa->u.net->family]);
+ address_family_names[ad->common.u.net->family]);
else
audit_log_format(ab, " family=\"unknown(%d)\"",
- sa->u.net->family);
+ ad->common.u.net->family);
if (sock_type_names[ad->net.type])
audit_log_format(ab, " sock_type=\"%s\"",
sock_type_names[ad->net.type]);
@@ -98,6 +141,19 @@ void audit_net_cb(struct audit_buffer *ab, void *va)
net_mask_names, NET_PERMS_MASK);
}
}
+ if (ad->common.u.net->family == PF_UNIX) {
+ if (ad->net.addr || !ad->common.u.net->sk)
+ audit_unix_addr(ab, "addr",
+ unix_addr(ad->net.addr),
+ ad->net.addrlen);
+ else
+ audit_unix_sk_addr(ab, "addr", ad->common.u.net->sk);
+ if (ad->request & NET_PEER_MASK) {
+ audit_unix_addr(ab, "peer_addr",
+ unix_addr(ad->net.peer.addr),
+ ad->net.peer.addrlen);
+ }
+ }
if (ad->peer) {
audit_log_format(ab, " peer=");
aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer,
@@ -105,45 +161,123 @@ void audit_net_cb(struct audit_buffer *ab, void *va)
}
}
+/* standard permission lookup pattern - supports early bailout */
+int aa_do_perms(struct aa_profile *profile, struct aa_policydb *policy,
+ aa_state_t state, u32 request,
+ struct aa_perms *p, struct apparmor_audit_data *ad)
+{
+ struct aa_perms perms;
+
+ AA_BUG(!profile);
+ AA_BUG(!policy);
+
+
+ if (state || !p)
+ p = aa_lookup_perms(policy, state);
+ perms = *p;
+ aa_apply_modes_to_perms(profile, &perms);
+ return aa_check_perms(profile, &perms, request, ad,
+ audit_net_cb);
+}
+
+/* only continue match if
+ * insufficient current perms at current state
+ * indicates there are more perms in later state
+ * Returns: perms struct if early match
+ */
+static struct aa_perms *early_match(struct aa_policydb *policy,
+ aa_state_t state, u32 request)
+{
+ struct aa_perms *p;
+
+ p = aa_lookup_perms(policy, state);
+ if (((p->allow & request) != request) && (p->allow & AA_CONT_MATCH))
+ return NULL;
+ return p;
+}
+
+static aa_state_t aa_dfa_match_be16(struct aa_dfa *dfa, aa_state_t state,
+ u16 data)
+{
+ __be16 buffer = cpu_to_be16(data);
+
+ return aa_dfa_match_len(dfa, state, (char *) &buffer, 2);
+}
+
+/**
+ * aa_match_to_prot - match the af, type, protocol triplet
+ * @policy: policy being matched
+ * @state: state to start in
+ * @request: permissions being requested, ignored if @p == NULL
+ * @af: socket address family
+ * @type: socket type
+ * @protocol: socket protocol
+ * @p: output - pointer to permission associated with match
+ * @info: output - pointer to string describing failure
+ *
+ * RETURNS: state match stopped in.
+ *
+ * If @(p) is assigned a value the returned state will be the
+ * corresponding state. Will not set @p on failure or if match completes
+ * only if an early match occurs
+ */
+aa_state_t aa_match_to_prot(struct aa_policydb *policy, aa_state_t state,
+ u32 request, u16 af, int type, int protocol,
+ struct aa_perms **p, const char **info)
+{
+ state = aa_dfa_match_be16(policy->dfa, state, (u16)af);
+ if (!state) {
+ *info = "failed af match";
+ return state;
+ }
+ state = aa_dfa_match_be16(policy->dfa, state, (u16)type);
+ if (state) {
+ if (p)
+ *p = early_match(policy, state, request);
+ if (!p || !*p) {
+ state = aa_dfa_match_be16(policy->dfa, state, (u16)protocol);
+ if (!state)
+ *info = "failed protocol match";
+ }
+ } else {
+ *info = "failed type match";
+ }
+
+ return state;
+}
+
/* Generic af perm */
int aa_profile_af_perm(struct aa_profile *profile,
struct apparmor_audit_data *ad, u32 request, u16 family,
- int type)
+ int type, int protocol)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
- struct aa_perms perms = { };
+ struct aa_ruleset *rules = profile->label.rules[0];
+ struct aa_perms *p = NULL;
aa_state_t state;
- __be16 buffer[2];
AA_BUG(family >= AF_MAX);
AA_BUG(type < 0 || type >= SOCK_MAX);
+ AA_BUG(profile_unconfined(profile));
if (profile_unconfined(profile))
return 0;
- state = RULE_MEDIATES(rules, AA_CLASS_NET);
+ state = RULE_MEDIATES_NET(rules);
if (!state)
return 0;
-
- buffer[0] = cpu_to_be16(family);
- buffer[1] = cpu_to_be16((u16) type);
- state = aa_dfa_match_len(rules->policy->dfa, state, (char *) &buffer,
- 4);
- perms = *aa_lookup_perms(rules->policy, state);
- aa_apply_modes_to_perms(profile, &perms);
-
- return aa_check_perms(profile, &perms, request, ad, audit_net_cb);
+ state = aa_match_to_prot(rules->policy, state, request, family, type,
+ protocol, &p, &ad->info);
+ return aa_do_perms(profile, rules->policy, state, request, p, ad);
}
int aa_af_perm(const struct cred *subj_cred, struct aa_label *label,
const char *op, u32 request, u16 family, int type, int protocol)
{
struct aa_profile *profile;
- DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol);
+ DEFINE_AUDIT_NET(ad, op, subj_cred, NULL, family, type, protocol);
return fn_for_each_confined(label, profile,
aa_profile_af_perm(profile, &ad, request, family,
- type));
+ type, protocol));
}
static int aa_label_sk_perm(const struct cred *subj_cred,
@@ -157,9 +291,9 @@ static int aa_label_sk_perm(const struct cred *subj_cred,
AA_BUG(!label);
AA_BUG(!sk);
- if (ctx->label != kernel_t && !unconfined(label)) {
+ if (rcu_access_pointer(ctx->label) != kernel_t && !unconfined(label)) {
struct aa_profile *profile;
- DEFINE_AUDIT_SK(ad, op, sk);
+ DEFINE_AUDIT_SK(ad, op, subj_cred, sk);
ad.subj_cred = subj_cred;
error = fn_for_each_confined(label, profile,
@@ -187,12 +321,16 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk)
int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label,
- const char *op, u32 request, struct socket *sock)
+ const char *op, u32 request, struct file *file)
{
+ struct socket *sock = (struct socket *) file->private_data;
+
AA_BUG(!label);
AA_BUG(!sock);
AA_BUG(!sock->sk);
+ if (sock->sk->sk_family == PF_UNIX)
+ return aa_unix_file_perm(subj_cred, label, op, request, file);
return aa_label_sk_perm(subj_cred, label, op, request, sock->sk);
}
@@ -223,8 +361,7 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
{
int i, ret;
struct aa_perms perms = { };
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
if (rules->secmark_count == 0)
return 0;
@@ -257,7 +394,7 @@ int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
u32 secid, const struct sock *sk)
{
struct aa_profile *profile;
- DEFINE_AUDIT_SK(ad, op, sk);
+ DEFINE_AUDIT_SK(ad, op, NULL, sk);
return fn_for_each_confined(label, profile,
aa_secmark_perm(profile, request, secid,
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index d0244fab0653..50d5345ff5cb 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -243,6 +243,9 @@ static void free_ruleset(struct aa_ruleset *rules)
{
int i;
+ if (!rules)
+ return;
+
aa_put_pdb(rules->file);
aa_put_pdb(rules->policy);
aa_free_cap_rules(&rules->caps);
@@ -259,8 +262,6 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp)
struct aa_ruleset *rules;
rules = kzalloc(sizeof(*rules), gfp);
- if (rules)
- INIT_LIST_HEAD(&rules->list);
return rules;
}
@@ -277,10 +278,9 @@ struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp)
*/
void aa_free_profile(struct aa_profile *profile)
{
- struct aa_ruleset *rule, *tmp;
struct rhashtable *rht;
- AA_DEBUG("%s(%p)\n", __func__, profile);
+ AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, profile);
if (!profile)
return;
@@ -299,10 +299,9 @@ void aa_free_profile(struct aa_profile *profile)
* at this point there are no tasks that can have a reference
* to rules
*/
- list_for_each_entry_safe(rule, tmp, &profile->rules, list) {
- list_del_init(&rule->list);
- free_ruleset(rule);
- }
+ for (int i = 0; i < profile->n_rules; i++)
+ free_ruleset(profile->label.rules[i]);
+
kfree_sensitive(profile->dirname);
if (profile->data) {
@@ -331,10 +330,12 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
gfp_t gfp)
{
struct aa_profile *profile;
- struct aa_ruleset *rules;
- /* freed by free_profile - usually through aa_put_profile */
- profile = kzalloc(struct_size(profile, label.vec, 2), gfp);
+ /* freed by free_profile - usually through aa_put_profile
+ * this adds space for a single ruleset in the rules section of the
+ * label
+ */
+ profile = kzalloc(struct_size(profile, label.rules, 1), gfp);
if (!profile)
return NULL;
@@ -343,13 +344,11 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
if (!aa_label_init(&profile->label, 1, gfp))
goto fail;
- INIT_LIST_HEAD(&profile->rules);
-
/* allocate the first ruleset, but leave it empty */
- rules = aa_alloc_ruleset(gfp);
- if (!rules)
+ profile->label.rules[0] = aa_alloc_ruleset(gfp);
+ if (!profile->label.rules[0])
goto fail;
- list_add(&rules->list, &profile->rules);
+ profile->n_rules = 1;
/* update being set needed by fs interface */
if (!proxy) {
@@ -364,6 +363,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
profile->label.flags |= FLAG_PROFILE;
profile->label.vec[0] = profile;
+ profile->signal = SIGKILL;
/* refcount released by caller */
return profile;
@@ -373,6 +373,41 @@ fail:
return NULL;
}
+static inline bool ANY_RULE_MEDIATES(struct aa_profile *profile,
+ unsigned char class)
+{
+ int i;
+
+ for (i = 0; i < profile->n_rules; i++) {
+ if (RULE_MEDIATES(profile->label.rules[i], class))
+ return true;
+ }
+ return false;
+}
+
+/* set of rules that are mediated by unconfined */
+static int unconfined_mediates[] = { AA_CLASS_NS, AA_CLASS_IO_URING, 0 };
+
+/* must be called after profile rulesets and start information is setup */
+void aa_compute_profile_mediates(struct aa_profile *profile)
+{
+ int c;
+
+ if (profile_unconfined(profile)) {
+ int *pos;
+
+ for (pos = unconfined_mediates; *pos; pos++) {
+ if (ANY_RULE_MEDIATES(profile, *pos))
+ profile->label.mediates |= ((u64) 1) << AA_CLASS_NS;
+ }
+ return;
+ }
+ for (c = 0; c <= AA_CLASS_LAST; c++) {
+ if (ANY_RULE_MEDIATES(profile, c))
+ profile->label.mediates |= ((u64) 1) << c;
+ }
+}
+
/* TODO: profile accounting - setup in remove */
/**
@@ -463,7 +498,7 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns,
}
/**
- * __create_missing_ancestors - create place holders for missing ancestores
+ * __create_missing_ancestors - create place holders for missing ancestors
* @ns: namespace to lookup profile in (NOT NULL)
* @hname: hierarchical profile name to find parent of (NOT NULL)
* @gfp: type of allocation.
@@ -621,13 +656,15 @@ struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name,
/* TODO: ideally we should inherit abi from parent */
profile->label.flags |= FLAG_NULL;
profile->attach.xmatch = aa_get_pdb(nullpdb);
- rules = list_first_entry(&profile->rules, typeof(*rules), list);
+ rules = profile->label.rules[0];
rules->file = aa_get_pdb(nullpdb);
rules->policy = aa_get_pdb(nullpdb);
+ aa_compute_profile_mediates(profile);
if (parent) {
profile->path_flags = parent->path_flags;
-
+ /* override/inherit what is mediated from parent */
+ profile->label.mediates = parent->label.mediates;
/* released on free_profile */
rcu_assign_pointer(profile->parent, aa_get_profile(parent));
profile->ns = aa_get_ns(parent->ns);
@@ -833,8 +870,8 @@ bool aa_policy_admin_capable(const struct cred *subj_cred,
bool capable = policy_ns_capable(subj_cred, label, user_ns,
CAP_MAC_ADMIN) == 0;
- AA_DEBUG("cap_mac_admin? %d\n", capable);
- AA_DEBUG("policy locked? %d\n", aa_g_lock_policy);
+ AA_DEBUG(DEBUG_POLICY, "cap_mac_admin? %d\n", capable);
+ AA_DEBUG(DEBUG_POLICY, "policy locked? %d\n", aa_g_lock_policy);
return aa_policy_view_capable(subj_cred, label, ns) && capable &&
!aa_g_lock_policy;
@@ -843,11 +880,11 @@ bool aa_policy_admin_capable(const struct cred *subj_cred,
bool aa_current_policy_view_capable(struct aa_ns *ns)
{
struct aa_label *label;
- bool res;
+ bool needput, res;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
res = aa_policy_view_capable(current_cred(), label, ns);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return res;
}
@@ -855,11 +892,11 @@ bool aa_current_policy_view_capable(struct aa_ns *ns)
bool aa_current_policy_admin_capable(struct aa_ns *ns)
{
struct aa_label *label;
- bool res;
+ bool needput, res;
- label = __begin_current_label_crit_section();
+ label = __begin_current_label_crit_section(&needput);
res = aa_policy_admin_capable(current_cred(), label, ns);
- __end_current_label_crit_section(label);
+ __end_current_label_crit_section(label, needput);
return res;
}
@@ -1068,7 +1105,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
goto out;
/* ensure that profiles are all for the same ns
- * TODO: update locking to remove this constaint. All profiles in
+ * TODO: update locking to remove this constraint. All profiles in
* the load set must succeed as a set or the load will
* fail. Sort ent list and take ns locks in hierarchy order
*/
diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c
index 423227670e68..cfc2207e5a12 100644
--- a/security/apparmor/policy_compat.c
+++ b/security/apparmor/policy_compat.c
@@ -286,10 +286,10 @@ static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor)
AA_BUG(!dfa);
- for (state = 0; state < state_count; state++)
+ for (state = 0; state < state_count; state++) {
ACCEPT_TABLE(dfa)[state] = state * factor;
- kvfree(dfa->tables[YYTD_ID_ACCEPT2]);
- dfa->tables[YYTD_ID_ACCEPT2] = NULL;
+ ACCEPT_TABLE2(dfa)[state] = factor > 1 ? ACCEPT_FLAG_OWNER : 0;
+ }
}
/* TODO: merge different dfa mappings into single map_policy fn */
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 1f02cfe1d974..64783ca3b0f2 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -107,7 +107,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
struct aa_ns *ns;
ns = kzalloc(sizeof(*ns), GFP_KERNEL);
- AA_DEBUG("%s(%p)\n", __func__, ns);
+ AA_DEBUG(DEBUG_POLICY, "%s(%p)\n", __func__, ns);
if (!ns)
return NULL;
if (!aa_policy_init(&ns->base, prefix, name, GFP_KERNEL))
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 992b74c50d64..7523971e37d9 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -29,6 +29,7 @@
#include "include/policy.h"
#include "include/policy_unpack.h"
#include "include/policy_compat.h"
+#include "include/signal.h"
/* audit callback for unpack fields */
static void audit_cb(struct audit_buffer *ab, void *va)
@@ -598,8 +599,8 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_ruleset *rules)
fail:
if (rules->secmark) {
for (i = 0; i < size; i++)
- kfree(rules->secmark[i].label);
- kfree(rules->secmark);
+ kfree_sensitive(rules->secmark[i].label);
+ kfree_sensitive(rules->secmark);
rules->secmark_count = 0;
rules->secmark = NULL;
}
@@ -716,6 +717,7 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy,
void *pos = e->pos;
int i, flags, error = -EPROTO;
ssize_t size;
+ u32 version = 0;
pdb = aa_alloc_pdb(GFP_KERNEL);
if (!pdb)
@@ -733,6 +735,9 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy,
if (pdb->perms) {
/* perms table present accept is index */
flags = TO_ACCEPT1_FLAG(YYTD_DATA32);
+ if (aa_unpack_u32(e, &version, "permsv") && version > 2)
+ /* accept2 used for dfa flags */
+ flags |= TO_ACCEPT2_FLAG(YYTD_DATA32);
} else {
/* packed perms in accept1 and accept2 */
flags = TO_ACCEPT1_FLAG(YYTD_DATA32) |
@@ -770,6 +775,21 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy,
}
}
+ /* accept2 is in some cases being allocated, even with perms */
+ if (pdb->perms && !pdb->dfa->tables[YYTD_ID_ACCEPT2]) {
+ /* add dfa flags table missing in v2 */
+ u32 noents = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_lolen;
+ u16 tdflags = pdb->dfa->tables[YYTD_ID_ACCEPT]->td_flags;
+ size_t tsize = table_size(noents, tdflags);
+
+ pdb->dfa->tables[YYTD_ID_ACCEPT2] = kvzalloc(tsize, GFP_KERNEL);
+ if (!pdb->dfa->tables[YYTD_ID_ACCEPT2]) {
+ *info = "failed to alloc dfa flags table";
+ goto out;
+ }
+ pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_lolen = noents;
+ pdb->dfa->tables[YYTD_ID_ACCEPT2]->td_flags = tdflags;
+ }
/*
* Unfortunately due to a bug in earlier userspaces, a
* transition table may be present even when the dfa is
@@ -783,9 +803,13 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb **policy,
if (!pdb->dfa && pdb->trans.table)
aa_free_str_table(&pdb->trans);
- /* TODO: move compat mapping here, requires dfa merging first */
- /* TODO: move verify here, it has to be done after compat mappings */
-
+ /* TODO:
+ * - move compat mapping here, requires dfa merging first
+ * - move verify here, it has to be done after compat mappings
+ * - move free of unneeded trans table here, has to be done
+ * after perm mapping.
+ */
+out:
*policy = pdb;
return 0;
@@ -862,7 +886,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
error = -ENOMEM;
goto fail;
}
- rules = list_first_entry(&profile->rules, typeof(*rules), list);
+ rules = profile->label.rules[0];
/* profile renaming is optional */
(void) aa_unpack_str(e, &profile->rename, "rename");
@@ -898,6 +922,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
(void) aa_unpack_strdup(e, &disconnected, "disconnected");
profile->disconnected = disconnected;
+ /* optional */
+ (void) aa_unpack_u32(e, &profile->signal, "kill");
+ if (profile->signal < 1 || profile->signal > MAXMAPPED_SIG) {
+ info = "profile kill.signal invalid value";
+ goto fail;
+ }
/* per profile debug flags (complain, audit) */
if (!aa_unpack_nameX(e, AA_STRUCT, "flags")) {
info = "profile missing flags";
@@ -1101,6 +1131,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
goto fail;
}
+ aa_compute_profile_mediates(profile);
+
return profile;
fail:
@@ -1215,21 +1247,32 @@ static bool verify_perm(struct aa_perms *perm)
static bool verify_perms(struct aa_policydb *pdb)
{
int i;
+ int xidx, xmax = -1;
for (i = 0; i < pdb->size; i++) {
if (!verify_perm(&pdb->perms[i]))
return false;
/* verify indexes into str table */
- if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE &&
- (pdb->perms[i].xindex & AA_X_INDEX_MASK) >= pdb->trans.size)
- return false;
+ if ((pdb->perms[i].xindex & AA_X_TYPE_MASK) == AA_X_TABLE) {
+ xidx = pdb->perms[i].xindex & AA_X_INDEX_MASK;
+ if (xidx >= pdb->trans.size)
+ return false;
+ if (xmax < xidx)
+ xmax = xidx;
+ }
if (pdb->perms[i].tag && pdb->perms[i].tag >= pdb->trans.size)
return false;
if (pdb->perms[i].label &&
pdb->perms[i].label >= pdb->trans.size)
return false;
}
-
+ /* deal with incorrectly constructed string tables */
+ if (xmax == -1) {
+ aa_free_str_table(&pdb->trans);
+ } else if (pdb->trans.size > xmax + 1) {
+ if (!aa_resize_str_table(&pdb->trans, xmax + 1, GFP_KERNEL))
+ return false;
+ }
return true;
}
@@ -1243,8 +1286,8 @@ static bool verify_perms(struct aa_policydb *pdb)
*/
static int verify_profile(struct aa_profile *profile)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
+
if (!rules)
return 0;
diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c
index 5b2ba88ae9e2..cf18744dafe2 100644
--- a/security/apparmor/policy_unpack_test.c
+++ b/security/apparmor/policy_unpack_test.c
@@ -9,6 +9,8 @@
#include "include/policy.h"
#include "include/policy_unpack.h"
+#include <linux/unaligned.h>
+
#define TEST_STRING_NAME "TEST_STRING"
#define TEST_STRING_DATA "testing"
#define TEST_STRING_BUF_OFFSET \
@@ -80,7 +82,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf,
*(buf + 1) = strlen(TEST_U32_NAME) + 1;
strscpy(buf + 3, TEST_U32_NAME, e->end - (void *)(buf + 3));
*(buf + 3 + strlen(TEST_U32_NAME) + 1) = AA_U32;
- *((__le32 *)(buf + 3 + strlen(TEST_U32_NAME) + 2)) = cpu_to_le32(TEST_U32_DATA);
+ put_unaligned_le32(TEST_U32_DATA, buf + 3 + strlen(TEST_U32_NAME) + 2);
buf = e->start + TEST_NAMED_U64_BUF_OFFSET;
*buf = AA_NAME;
@@ -103,7 +105,7 @@ static struct aa_ext *build_aa_ext_struct(struct policy_unpack_fixture *puf,
*(buf + 1) = strlen(TEST_ARRAY_NAME) + 1;
strscpy(buf + 3, TEST_ARRAY_NAME, e->end - (void *)(buf + 3));
*(buf + 3 + strlen(TEST_ARRAY_NAME) + 1) = AA_ARRAY;
- *((__le16 *)(buf + 3 + strlen(TEST_ARRAY_NAME) + 2)) = cpu_to_le16(TEST_ARRAY_SIZE);
+ put_unaligned_le16(TEST_ARRAY_SIZE, buf + 3 + strlen(TEST_ARRAY_NAME) + 2);
return e;
}
diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c
index e3857e3d7c6c..ce40f15d4952 100644
--- a/security/apparmor/procattr.c
+++ b/security/apparmor/procattr.c
@@ -125,12 +125,14 @@ int aa_setprocattr_changehat(char *args, size_t size, int flags)
for (count = 0; (hat < end) && count < 16; ++count) {
char *next = hat + strlen(hat) + 1;
hats[count] = hat;
- AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d hat '%s'\n"
+ AA_DEBUG(DEBUG_DOMAIN,
+ "%s: (pid %d) Magic 0x%llx count %d hat '%s'\n"
, __func__, current->pid, token, count, hat);
hat = next;
}
} else
- AA_DEBUG("%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n",
+ AA_DEBUG(DEBUG_DOMAIN,
+ "%s: (pid %d) Magic 0x%llx count %d Hat '%s'\n",
__func__, current->pid, token, count, "<NULL>");
return aa_change_hat(hats, count, token, flags);
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index dcc94c3153d5..8e80db3ae21c 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -89,8 +89,7 @@ static int profile_setrlimit(const struct cred *subj_cred,
struct aa_profile *profile, unsigned int resource,
struct rlimit *new_rlim)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
int e = 0;
if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max >
@@ -165,9 +164,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
* to the lesser of the tasks hard limit and the init tasks soft limit
*/
label_for_each_confined(i, old_l, old) {
- struct aa_ruleset *rules = list_first_entry(&old->rules,
- typeof(*rules),
- list);
+ struct aa_ruleset *rules = old->label.rules[0];
if (rules->rlimits.mask) {
int j;
@@ -185,9 +182,7 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
/* set any new hard limits as dictated by the new profile */
label_for_each_confined(i, new_l, new) {
- struct aa_ruleset *rules = list_first_entry(&new->rules,
- typeof(*rules),
- list);
+ struct aa_ruleset *rules = new->label.rules[0];
int j;
if (!rules->rlimits.mask)
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index c87fb9f4ac18..c9bc9cc69475 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -228,8 +228,7 @@ static int profile_ptrace_perm(const struct cred *cred,
struct aa_label *peer, u32 request,
struct apparmor_audit_data *ad)
{
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules), list);
+ struct aa_ruleset *rules = profile->label.rules[0];
struct aa_perms perms = { };
ad->subj_cred = cred;
@@ -246,7 +245,7 @@ static int profile_tracee_perm(const struct cred *cred,
struct apparmor_audit_data *ad)
{
if (profile_unconfined(tracee) || unconfined(tracer) ||
- !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE))
+ !label_mediates(&tracee->label, AA_CLASS_PTRACE))
return 0;
return profile_ptrace_perm(cred, tracee, tracer, request, ad);
@@ -260,7 +259,7 @@ static int profile_tracer_perm(const struct cred *cred,
if (profile_unconfined(tracer))
return 0;
- if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE))
+ if (label_mediates(&tracer->label, AA_CLASS_PTRACE))
return profile_ptrace_perm(cred, tracer, tracee, request, ad);
/* profile uses the old style capability check for ptrace */
@@ -324,9 +323,7 @@ int aa_profile_ns_perm(struct aa_profile *profile,
ad->request = request;
if (!profile_unconfined(profile)) {
- struct aa_ruleset *rules = list_first_entry(&profile->rules,
- typeof(*rules),
- list);
+ struct aa_ruleset *rules = profile->label.rules[0];
aa_state_t state;
state = RULE_MEDIATES(rules, ad->class);