From 6549c46af8551b346bcc0b9043f93848319acd5c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 27 Jun 2021 16:04:18 +0800 Subject: regulator: rt5033: Fix n_voltages settings for BUCK and LDO For linear regulators, the n_voltages should be (max - min) / step + 1. Buck voltage from 1v to 3V, per step 100mV, and vout mask is 0x1f. If value is from 20 to 31, the voltage will all be fixed to 3V. And LDO also, just vout range is different from 1.2v to 3v, step is the same. If value is from 18 to 31, the voltage will also be fixed to 3v. Signed-off-by: Axel Lin Reviewed-by: ChiYuan Huang Link: https://lore.kernel.org/r/20210627080418.1718127-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- include/linux/mfd/rt5033-private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index 2d1895c3efbf..40a0c2dfb80f 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -200,13 +200,13 @@ enum rt5033_reg { #define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21 /* RT5033 regulator LDO output voltage uV */ #define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U #define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19 /* RT5033 regulator SAFE LDO output voltage uV */ #define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U -- cgit v1.2.3 From 5d43f951b1ac797450bb4d230fdc960b739bea04 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:52 +0800 Subject: ptp: add ptp virtual clock driver framework This patch is to add ptp virtual clock driver framework utilizing timecounter/cyclecounter. The patch just exports two essential APIs for PTP driver. - ptp_vclock_register() - ptp_vclock_unregister() Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/Makefile | 2 +- drivers/ptp/ptp_private.h | 15 ++++ drivers/ptp/ptp_vclock.c | 150 +++++++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 4 +- 4 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 drivers/ptp/ptp_vclock.c (limited to 'include/linux') diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 8673d1743faa..28a6fe342d3e 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -3,7 +3,7 @@ # Makefile for PTP 1588 clock support. # -ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o +ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o ptp_vclock.o ptp_kvm-$(CONFIG_X86) := ptp_kvm_x86.o ptp_kvm_common.o ptp_kvm-$(CONFIG_HAVE_ARM_SMCCC) := ptp_kvm_arm.o ptp_kvm_common.o obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 6b97155148f1..853b79b6b30e 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -48,6 +48,19 @@ struct ptp_clock { struct kthread_delayed_work aux_work; }; +#define info_to_vclock(d) container_of((d), struct ptp_vclock, info) +#define cc_to_vclock(d) container_of((d), struct ptp_vclock, cc) +#define dw_to_vclock(d) container_of((d), struct ptp_vclock, refresh_work) + +struct ptp_vclock { + struct ptp_clock *pclock; + struct ptp_clock_info info; + struct ptp_clock *clock; + struct cyclecounter cc; + struct timecounter tc; + spinlock_t lock; /* protects tc/cc */ +}; + /* * The function queue_cnt() is safe for readers to call without * holding q->lock. Readers use this function to verify that the queue @@ -89,4 +102,6 @@ extern const struct attribute_group *ptp_groups[]; int ptp_populate_pin_groups(struct ptp_clock *ptp); void ptp_cleanup_pin_groups(struct ptp_clock *ptp); +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock); +void ptp_vclock_unregister(struct ptp_vclock *vclock); #endif diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c new file mode 100644 index 000000000000..fc9205cc504d --- /dev/null +++ b/drivers/ptp/ptp_vclock.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PTP virtual clock driver + * + * Copyright 2021 NXP + */ +#include +#include "ptp_private.h" + +#define PTP_VCLOCK_CC_SHIFT 31 +#define PTP_VCLOCK_CC_MULT (1 << PTP_VCLOCK_CC_SHIFT) +#define PTP_VCLOCK_FADJ_SHIFT 9 +#define PTP_VCLOCK_FADJ_DENOMINATOR 15625ULL +#define PTP_VCLOCK_REFRESH_INTERVAL (HZ * 2) + +static int ptp_vclock_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + s64 adj; + + adj = (s64)scaled_ppm << PTP_VCLOCK_FADJ_SHIFT; + adj = div_s64(adj, PTP_VCLOCK_FADJ_DENOMINATOR); + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_read(&vclock->tc); + vclock->cc.mult = PTP_VCLOCK_CC_MULT + adj; + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_adjtime(&vclock->tc, delta); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static int ptp_vclock_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + unsigned long flags; + u64 ns; + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_read(&vclock->tc); + spin_unlock_irqrestore(&vclock->lock, flags); + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int ptp_vclock_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + spin_lock_irqsave(&vclock->lock, flags); + timecounter_init(&vclock->tc, &vclock->cc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + return 0; +} + +static long ptp_vclock_refresh(struct ptp_clock_info *ptp) +{ + struct ptp_vclock *vclock = info_to_vclock(ptp); + struct timespec64 ts; + + ptp_vclock_gettime(&vclock->info, &ts); + + return PTP_VCLOCK_REFRESH_INTERVAL; +} + +static const struct ptp_clock_info ptp_vclock_info = { + .owner = THIS_MODULE, + .name = "ptp virtual clock", + /* The maximum ppb value that long scaled_ppm can support */ + .max_adj = 32767999, + .adjfine = ptp_vclock_adjfine, + .adjtime = ptp_vclock_adjtime, + .gettime64 = ptp_vclock_gettime, + .settime64 = ptp_vclock_settime, + .do_aux_work = ptp_vclock_refresh, +}; + +static u64 ptp_vclock_read(const struct cyclecounter *cc) +{ + struct ptp_vclock *vclock = cc_to_vclock(cc); + struct ptp_clock *ptp = vclock->pclock; + struct timespec64 ts = {}; + + if (ptp->info->gettimex64) + ptp->info->gettimex64(ptp->info, &ts, NULL); + else + ptp->info->gettime64(ptp->info, &ts); + + return timespec64_to_ns(&ts); +} + +static const struct cyclecounter ptp_vclock_cc = { + .read = ptp_vclock_read, + .mask = CYCLECOUNTER_MASK(32), + .mult = PTP_VCLOCK_CC_MULT, + .shift = PTP_VCLOCK_CC_SHIFT, +}; + +struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) +{ + struct ptp_vclock *vclock; + + vclock = kzalloc(sizeof(*vclock), GFP_KERNEL); + if (!vclock) + return NULL; + + vclock->pclock = pclock; + vclock->info = ptp_vclock_info; + vclock->cc = ptp_vclock_cc; + + snprintf(vclock->info.name, PTP_CLOCK_NAME_LEN, "ptp%d_virt", + pclock->index); + + spin_lock_init(&vclock->lock); + + vclock->clock = ptp_clock_register(&vclock->info, &pclock->dev); + if (IS_ERR_OR_NULL(vclock->clock)) { + kfree(vclock); + return NULL; + } + + timecounter_init(&vclock->tc, &vclock->cc, 0); + ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); + + return vclock; +} + +void ptp_vclock_unregister(struct ptp_vclock *vclock) +{ + ptp_clock_unregister(vclock->clock); + kfree(vclock); +} diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index aba237c0b3a2..b6fb771ee524 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -11,7 +11,9 @@ #include #include #include +#include +#define PTP_CLOCK_NAME_LEN 32 /** * struct ptp_clock_request - request PTP clock event * @@ -134,7 +136,7 @@ struct ptp_system_timestamp { struct ptp_clock_info { struct module *owner; - char name[16]; + char name[PTP_CLOCK_NAME_LEN]; s32 max_adj; int n_alarm; int n_ext_ts; -- cgit v1.2.3 From acb288e8047b7569fbc9af6fa6e9405315345103 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:55 +0800 Subject: ptp: add kernel API ptp_get_vclocks_index() Add kernel API ptp_get_vclocks_index() to get all ptp vclocks index on pclock. This is preparation for supporting ptp vclocks info query through ethtool. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 3 ++- drivers/ptp/ptp_private.h | 2 ++ drivers/ptp/ptp_vclock.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 14 ++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 9205a9362a9d..f012fa581cf4 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -24,10 +24,11 @@ #define PTP_PPS_EVENT PPS_CAPTUREASSERT #define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC) +struct class *ptp_class; + /* private globals */ static dev_t ptp_devt; -static struct class *ptp_class; static DEFINE_IDA(ptp_clocks_map); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index f75fadd9b244..dba6be477067 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -96,6 +96,8 @@ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) return in_use; } +extern struct class *ptp_class; + /* * see ptp_chardev.c */ diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index fc9205cc504d..cefab29a0592 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -148,3 +148,38 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock) ptp_clock_unregister(vclock->clock); kfree(vclock); } + +int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_clock *ptp; + struct device *dev; + int num = 0; + + if (pclock_index < 0) + return num; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", pclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return num; + + ptp = dev_get_drvdata(dev); + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) { + put_device(dev); + return num; + } + + *vclock_index = kzalloc(sizeof(int) * ptp->n_vclocks, GFP_KERNEL); + if (!(*vclock_index)) + goto out; + + memcpy(*vclock_index, ptp->vclock_index, sizeof(int) * ptp->n_vclocks); + num = ptp->n_vclocks; +out: + mutex_unlock(&ptp->n_vclocks_mux); + put_device(dev); + return num; +} +EXPORT_SYMBOL(ptp_get_vclocks_index); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index b6fb771ee524..300a984fec87 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -306,6 +306,18 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); */ void ptp_cancel_worker_sync(struct ptp_clock *ptp); +/** + * ptp_get_vclocks_index() - get all vclocks index on pclock, and + * caller is responsible to free memory + * of vclock_index + * + * @pclock_index: phc index of ptp pclock. + * @vclock_index: pointer to pointer of vclock index. + * + * return number of vclocks. + */ +int ptp_get_vclocks_index(int pclock_index, int **vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -325,6 +337,8 @@ static inline int ptp_schedule_worker(struct ptp_clock *ptp, { return -EOPNOTSUPP; } static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } +static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) +{ return 0; } #endif -- cgit v1.2.3 From c156174a67070042d51d2c866146d3c934d5468c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:56 +0800 Subject: ethtool: add a new command for getting PHC virtual clocks Add an interface for getting PHC (PTP Hardware Clock) virtual clocks, which are based on PHC physical clock providing hardware timestamp to network packets. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 22 +++++++ include/linux/ethtool.h | 10 +++ include/uapi/linux/ethtool_netlink.h | 15 +++++ net/ethtool/Makefile | 2 +- net/ethtool/common.c | 13 ++++ net/ethtool/netlink.c | 10 +++ net/ethtool/netlink.h | 2 + net/ethtool/phc_vclocks.c | 94 ++++++++++++++++++++++++++++ 8 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/phc_vclocks.c (limited to 'include/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 6ea91e41593f..c86628e6a235 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -212,6 +212,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEC_SET`` set FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET`` get standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` get PHC virtual clocks info ===================================== ================================ Kernel to userspace: @@ -250,6 +251,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEC_NTF`` FEC settings ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY`` read SFP module EEPROM ``ETHTOOL_MSG_STATS_GET_REPLY`` standard statistics + ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY`` PHC virtual clocks info ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1477,6 +1479,25 @@ Low and high bounds are inclusive, for example: etherStatsPkts512to1023Octets 512 1023 ============================= ==== ==== +PHC_VCLOCKS_GET +=============== + +Query device PHC virtual clocks information. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHC_VCLOCKS_HEADER`` nested reply header + ``ETHTOOL_A_PHC_VCLOCKS_NUM`` u32 PHC virtual clocks number + ``ETHTOOL_A_PHC_VCLOCKS_INDEX`` s32 PHC index array + ==================================== ====== ========================== + Request translation =================== @@ -1575,4 +1596,5 @@ are netlink only. n/a ``ETHTOOL_MSG_CABLE_TEST_ACT`` n/a ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` + n/a ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` =================================== ===================================== diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 29dbb603bc91..232daaec56e4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -757,6 +757,16 @@ void ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, enum ethtool_link_mode_bit_indices link_mode); +/** + * ethtool_get_phc_vclocks - Derive phc vclocks information, and caller + * is responsible to free memory of vclock_index + * @dev: pointer to net_device structure + * @vclock_index: pointer to pointer of vclock index + * + * Return number of phc vclocks + */ +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); + /** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to start of string to update diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index c7135c9c37a5..b3b93710eff7 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -46,6 +46,7 @@ enum { ETHTOOL_MSG_FEC_SET, ETHTOOL_MSG_MODULE_EEPROM_GET, ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -88,6 +89,7 @@ enum { ETHTOOL_MSG_FEC_NTF, ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -440,6 +442,19 @@ enum { ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + /* CABLE TEST */ enum { diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 723c9a8a8cdf..0a19470efbfb 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o fec.o eeprom.o stats.o + tunnels.o fec.o eeprom.o stats.o phc_vclocks.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index f9dcbad84788..798231b07676 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "common.h" @@ -554,6 +555,18 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) return 0; } +int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) +{ + struct ethtool_ts_info info = { }; + int num = 0; + + if (!__ethtool_get_ts_info(dev, &info)) + num = ptp_get_vclocks_index(info.phc_index, vclock_index); + + return num; +} +EXPORT_SYMBOL(ethtool_get_phc_vclocks); + const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a7346346114f..73e0f5b626bf 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -248,6 +248,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, [ETHTOOL_MSG_MODULE_EEPROM_GET] = ðnl_module_eeprom_request_ops, [ETHTOOL_MSG_STATS_GET] = ðnl_stats_request_ops, + [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -958,6 +959,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_stats_get_policy, .maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_phc_vclocks_get_policy, + .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3e25a47fd482..3fc395c86702 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,6 +347,7 @@ extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops; extern const struct ethnl_request_ops ethnl_stats_request_ops; +extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -382,6 +383,7 @@ extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; +extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c new file mode 100644 index 000000000000..637b2f5297d5 --- /dev/null +++ b/net/ethtool/phc_vclocks.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 NXP + */ +#include "netlink.h" +#include "common.h" + +struct phc_vclocks_req_info { + struct ethnl_req_info base; +}; + +struct phc_vclocks_reply_data { + struct ethnl_reply_data base; + int num; + int *index; +}; + +#define PHC_VCLOCKS_REPDATA(__reply_base) \ + container_of(__reply_base, struct phc_vclocks_reply_data, base) + +const struct nla_policy ethnl_phc_vclocks_get_policy[] = { + [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + data->num = ethtool_get_phc_vclocks(dev, &data->index); + ethnl_ops_complete(dev); + + return ret; +} + +static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + int len = 0; + + if (data->num > 0) { + len += nla_total_size(sizeof(u32)); + len += nla_total_size(sizeof(s32) * data->num); + } + + return len; +} + +static int phc_vclocks_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + if (data->num <= 0) + return 0; + + if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) || + nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX, + sizeof(s32) * data->num, data->index)) + return -EMSGSIZE; + + return 0; +} + +static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct phc_vclocks_reply_data *data = + PHC_VCLOCKS_REPDATA(reply_base); + + kfree(data->index); +} + +const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = { + .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET, + .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER, + .req_info_size = sizeof(struct phc_vclocks_req_info), + .reply_data_size = sizeof(struct phc_vclocks_reply_data), + + .prepare_data = phc_vclocks_prepare_data, + .reply_size = phc_vclocks_reply_size, + .fill_reply = phc_vclocks_fill_reply, + .cleanup_data = phc_vclocks_cleanup_data, +}; -- cgit v1.2.3 From 895487a3a10fb3a177e20dcde875515d46ccd4df Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 30 Jun 2021 16:11:57 +0800 Subject: ptp: add kernel API ptp_convert_timestamp() Add kernel API ptp_convert_timestamp() to convert raw hardware timestamp to a specified ptp vclock time. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/ptp/ptp_vclock.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/ptp_clock_kernel.h | 13 +++++++++++++ 2 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index cefab29a0592..e0f87c57749a 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -183,3 +183,37 @@ out: return num; } EXPORT_SYMBOL(ptp_get_vclocks_index); + +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ + char name[PTP_CLOCK_NAME_LEN] = ""; + struct ptp_vclock *vclock; + struct ptp_clock *ptp; + unsigned long flags; + struct device *dev; + u64 ns; + + snprintf(name, PTP_CLOCK_NAME_LEN, "ptp%d", vclock_index); + dev = class_find_device_by_name(ptp_class, name); + if (!dev) + return; + + ptp = dev_get_drvdata(dev); + if (!ptp->is_virtual_clock) { + put_device(dev); + return; + } + + vclock = info_to_vclock(ptp->info); + + ns = ktime_to_ns(hwtstamps->hwtstamp); + + spin_lock_irqsave(&vclock->lock, flags); + ns = timecounter_cyc2time(&vclock->tc, ns); + spin_unlock_irqrestore(&vclock->lock, flags); + + put_device(dev); + hwtstamps->hwtstamp = ns_to_ktime(ns); +} +EXPORT_SYMBOL(ptp_convert_timestamp); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 300a984fec87..71fac9237725 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -12,6 +12,7 @@ #include #include #include +#include #define PTP_CLOCK_NAME_LEN 32 /** @@ -318,6 +319,15 @@ void ptp_cancel_worker_sync(struct ptp_clock *ptp); */ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); +/** + * ptp_convert_timestamp() - convert timestamp to a ptp vclock time + * + * @hwtstamps: skb_shared_hwtstamps structure pointer + * @vclock_index: phc index of ptp vclock. + */ +void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -339,6 +349,9 @@ static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp) { } static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) { return 0; } +static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, + int vclock_index) +{ } #endif -- cgit v1.2.3 From b2aae654a4794ef898ad33a179f341eb610f6b85 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:54 +0800 Subject: net: stmmac: add mutex lock to protect est parameters Add a mutex lock to protect est structure parameters so that the EST parameters can be updated by other threads. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 12 +++++++++++- include/linux/stmmac.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 596626c71189..2e3cdf540168 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -796,14 +796,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv, GFP_KERNEL); if (!plat->est) return -ENOMEM; + + mutex_init(&priv->plat->est->lock); } else { memset(plat->est, 0, sizeof(*plat->est)); } size = qopt->num_entries; + mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; + mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -834,6 +838,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } + mutex_lock(&priv->plat->est->lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -847,8 +852,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; - if (fpe && !priv->dma_cap.fpesel) + if (fpe && !priv->dma_cap.fpesel) { + mutex_unlock(&priv->plat->est->lock); return -EOPNOTSUPP; + } /* Actual FPE register configuration will be done after FPE handshake * is success. @@ -857,6 +864,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -872,9 +880,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return 0; disable: + mutex_lock(&priv->plat->est->lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d5ae621d66ba..09157b8a5810 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -115,6 +115,7 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { + struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; -- cgit v1.2.3 From e9e3720002f61cd637a49ecafae77cac230eefae Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:55 +0800 Subject: net: stmmac: ptp: update tas basetime after ptp adjust After adjusting the ptp time, the Qbv base time may be the past time of the new current time. dwmac5 hardware limited the base time cannot be set as past time. This patch add a btr_reserve to store the base time get from qopt, then calculate the base time and reset the Qbv configuration after ptp time adjust. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 41 +++++++++++++++++++++++- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 6 +++- include/linux/stmmac.h | 1 + 3 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 4e86cdf2bc9f..580cc035536b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -62,7 +62,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) u32 sec, nsec; u32 quotient, reminder; int neg_adj = 0; - bool xmac; + bool xmac, est_rst = false; + int ret; xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; @@ -75,10 +76,48 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) sec = quotient; nsec = reminder; + /* If EST is enabled, disabled it before adjust ptp time. */ + if (priv->plat->est && priv->plat->est->enable) { + est_rst = true; + mutex_lock(&priv->plat->est->lock); + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + } + spin_lock_irqsave(&priv->ptp_lock, flags); stmmac_adjust_systime(priv, priv->ptpaddr, sec, nsec, neg_adj, xmac); spin_unlock_irqrestore(&priv->ptp_lock, flags); + /* Caculate new basetime and re-configured EST after PTP time adjust. */ + if (est_rst) { + struct timespec64 current_time, time; + ktime_t current_time_ns, basetime; + u64 cycle_time; + + mutex_lock(&priv->plat->est->lock); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + time.tv_nsec = priv->plat->est->btr_reserve[0]; + time.tv_sec = priv->plat->est->btr_reserve[1]; + basetime = timespec64_to_ktime(time); + cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC + + priv->plat->est->ctr[0]; + time = stmmac_calc_tas_basetime(basetime, + current_time_ns, + cycle_time); + + priv->plat->est->btr[0] = (u32)time.tv_nsec; + priv->plat->est->btr[1] = (u32)time.tv_sec; + priv->plat->est->enable = true; + ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + if (ret) + netdev_err(priv->dev, "failed to configure EST\n"); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 2e3cdf540168..4f3b6437b114 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -739,7 +739,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time, current_time; + struct timespec64 time, current_time, qopt_time; ktime_t current_time_ns; bool fpe = false; int i, ret = 0; @@ -848,6 +848,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; + qopt_time = ktime_to_timespec64(qopt->base_time); + priv->plat->est->btr_reserve[0] = (u32)qopt_time.tv_nsec; + priv->plat->est->btr_reserve[1] = (u32)qopt_time.tv_sec; + ctr = qopt->cycle_time; priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 09157b8a5810..a6f03b36fc4f 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -117,6 +117,7 @@ struct stmmac_axi { struct stmmac_est { struct mutex lock; int enable; + u32 btr_reserve[2]; u32 btr_offset[2]; u32 btr[2]; u32 ctr[2]; -- cgit v1.2.3 From f263a81451c12da5a342d90572e317e611846f2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 7 Jul 2021 15:38:47 -0700 Subject: bpf: Track subprog poke descriptors correctly and fix use-after-free Subprograms are calling map_poke_track(), but on program release there is no hook to call map_poke_untrack(). However, on program release, the aux memory (and poke descriptor table) is freed even though we still have a reference to it in the element list of the map aux data. When we run map_poke_run(), we then end up accessing free'd memory, triggering KASAN in prog_array_map_poke_run(): [...] [ 402.824689] BUG: KASAN: use-after-free in prog_array_map_poke_run+0xc2/0x34e [ 402.824698] Read of size 4 at addr ffff8881905a7940 by task hubble-fgs/4337 [ 402.824705] CPU: 1 PID: 4337 Comm: hubble-fgs Tainted: G I 5.12.0+ #399 [ 402.824715] Call Trace: [ 402.824719] dump_stack+0x93/0xc2 [ 402.824727] print_address_description.constprop.0+0x1a/0x140 [ 402.824736] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824740] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824744] kasan_report.cold+0x7c/0xd8 [ 402.824752] ? prog_array_map_poke_run+0xc2/0x34e [ 402.824757] prog_array_map_poke_run+0xc2/0x34e [ 402.824765] bpf_fd_array_map_update_elem+0x124/0x1a0 [...] The elements concerned are walked as follows: for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; [...] The access to size_poke_tab is a 4 byte read, verified by checking offsets in the KASAN dump: [ 402.825004] The buggy address belongs to the object at ffff8881905a7800 which belongs to the cache kmalloc-1k of size 1024 [ 402.825008] The buggy address is located 320 bytes inside of 1024-byte region [ffff8881905a7800, ffff8881905a7c00) The pahole output of bpf_prog_aux: struct bpf_prog_aux { [...] /* --- cacheline 5 boundary (320 bytes) --- */ u32 size_poke_tab; /* 320 4 */ [...] In general, subprograms do not necessarily manage their own data structures. For example, BTF func_info and linfo are just pointers to the main program structure. This allows reference counting and cleanup to be done on the latter which simplifies their management a bit. The aux->poke_tab struct, however, did not follow this logic. The initial proposed fix for this use-after-free bug further embedded poke data tracking into the subprogram with proper reference counting. However, Daniel and Alexei questioned why we were treating these objects special; I agree, its unnecessary. The fix here removes the per subprogram poke table allocation and map tracking and instead simply points the aux->poke_tab pointer at the main programs poke table. This way, map tracking is simplified to the main program and we do not need to manage them per subprogram. This also means, bpf_prog_free_deferred(), which unwinds the program reference counting and kfrees objects, needs to ensure that we don't try to double free the poke_tab when free'ing the subprog structures. This is easily solved by NULL'ing the poke_tab pointer. The second detail is to ensure that per subprogram JIT logic only does fixups on poke_tab[] entries it owns. To do this, we add a pointer in the poke structure to point at the subprogram value so JITs can easily check while walking the poke_tab structure if the current entry belongs to the current program. The aux pointer is stable and therefore suitable for such comparison. On the jit_subprogs() error path, we omit cleaning up the poke->aux field because these are only ever referenced from the JIT side, but on error we will never make it to the JIT, so its fine to leave them dangling. Removing these pointers would complicate the error path for no reason. However, we do need to untrack all poke descriptors from the main program as otherwise they could race with the freeing of JIT memory from the subprograms. Lastly, a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") had an off-by-one on the subprogram instruction index range check as it was testing 'insn_idx >= subprog_start && insn_idx <= subprog_end'. However, subprog_end is the next subprogram's start instruction. Fixes: a748c6975dea3 ("bpf: propagate poke descriptors to subprograms") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210707223848.14580-2-john.fastabend@gmail.com --- arch/x86/net/bpf_jit_comp.c | 3 +++ include/linux/bpf.h | 1 + kernel/bpf/core.c | 8 +++++- kernel/bpf/verifier.c | 60 ++++++++++++++++----------------------------- 4 files changed, 32 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e835164189f1..4b951458c9fc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -570,6 +570,9 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) for (i = 0; i < prog->aux->size_poke_tab; i++) { poke = &prog->aux->poke_tab[i]; + if (poke->aux && poke->aux != prog->aux) + continue; + WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); if (poke->reason != BPF_POKE_REASON_TAIL_CALL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f309fc1509f2..e8e2b0393ca9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -780,6 +780,7 @@ struct bpf_jit_poke_descriptor { void *tailcall_target; void *tailcall_bypass; void *bypass_addr; + void *aux; union { struct { struct bpf_map *map; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 034ad93a1ad7..9b1577498373 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2236,8 +2236,14 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) + for (i = 0; i < aux->func_cnt; i++) { + /* We can just unlink the subprog poke descriptor table as + * it was originally linked to the main program and is also + * released along with it. + */ + aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); + } if (aux->func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index be38bb930bf1..42a4063de7cd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12121,33 +12121,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) goto out_free; func[i]->is_func = 1; func[i]->aux->func_idx = i; - /* the btf and func_info will be freed only at prog->aux */ + /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->poke_tab = prog->aux->poke_tab; + func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; for (j = 0; j < prog->aux->size_poke_tab; j++) { - u32 insn_idx = prog->aux->poke_tab[j].insn_idx; - int ret; + struct bpf_jit_poke_descriptor *poke; - if (!(insn_idx >= subprog_start && - insn_idx <= subprog_end)) - continue; - - ret = bpf_jit_add_poke_descriptor(func[i], - &prog->aux->poke_tab[j]); - if (ret < 0) { - verbose(env, "adding tail call poke descriptor failed\n"); - goto out_free; - } - - func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1; - - map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; - ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); - if (ret < 0) { - verbose(env, "tracking tail call prog failed\n"); - goto out_free; - } + poke = &prog->aux->poke_tab[j]; + if (poke->insn_idx < subprog_end && + poke->insn_idx >= subprog_start) + poke->aux = func[i]->aux; } /* Use bpf_prog_F_tag to indicate functions in stack traces. @@ -12178,18 +12164,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) cond_resched(); } - /* Untrack main program's aux structs so that during map_poke_run() - * we will not stumble upon the unfilled poke descriptors; each - * of the main program's poke descs got distributed across subprogs - * and got tracked onto map, so we are sure that none of them will - * be missed after the operation below - */ - for (i = 0; i < prog->aux->size_poke_tab; i++) { - map_ptr = prog->aux->poke_tab[i].tail_call.map; - - map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); - } - /* at this point all bpf functions were successfully JITed * now populate all bpf_calls with correct addresses and * run last pass of JIT @@ -12267,14 +12241,22 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_jit_attempt_done(prog); return 0; out_free: + /* We failed JIT'ing, so at this point we need to unregister poke + * descriptors from subprogs, so that kernel is not attempting to + * patch it anymore as we're freeing the subprog JIT memory. + */ + for (i = 0; i < prog->aux->size_poke_tab; i++) { + map_ptr = prog->aux->poke_tab[i].tail_call.map; + map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); + } + /* At this point we're guaranteed that poke descriptors are not + * live anymore. We can just unlink its descriptor table as it's + * released with the main prog. + */ for (i = 0; i < env->subprog_cnt; i++) { if (!func[i]) continue; - - for (j = 0; j < func[i]->aux->size_poke_tab; j++) { - map_ptr = func[i]->aux->poke_tab[j].tail_call.map; - map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux); - } + func[i]->aux->poke_tab = NULL; bpf_jit_free(func[i]); } kfree(func); -- cgit v1.2.3 From a5de4be0aaaa66a2fa98e8a33bdbed3bd0682804 Mon Sep 17 00:00:00 2001 From: Marek BehĂșn Date: Sun, 11 Jul 2021 18:38:15 +0200 Subject: net: phy: marvell10g: fix differentiation of 88X3310 from 88X3340 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that we cannot differentiate 88X3310 from 88X3340 by simply looking at bit 3 of revision ID. This only works on revisions A0 and A1. On revision B0, this bit is always 1. Instead use the 3.d00d register for differentiation, since this register contains information about number of ports on the device. Fixes: 9885d016ffa9 ("net: phy: marvell10g: add separate structure for 88X3340") Signed-off-by: Marek BehĂșn Reported-by: Matteo Croce Tested-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 40 +++++++++++++++++++++++++++++++++++----- include/linux/marvell_phy.h | 6 +----- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index bbbc6ac8fa82..53a433442803 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -78,6 +78,11 @@ enum { /* Temperature read register (88E2110 only) */ MV_PCS_TEMP = 0x8042, + /* Number of ports on the device */ + MV_PCS_PORT_INFO = 0xd00d, + MV_PCS_PORT_INFO_NPORTS_MASK = 0x0380, + MV_PCS_PORT_INFO_NPORTS_SHIFT = 7, + /* These registers appear at 0x800X and 0xa00X - the 0xa00X control * registers appear to set themselves to the 0x800X when AN is * restarted, but status registers appear readable from either. @@ -966,6 +971,30 @@ static const struct mv3310_chip mv2111_type = { #endif }; +static int mv3310_get_number_of_ports(struct phy_device *phydev) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_PORT_INFO); + if (ret < 0) + return ret; + + ret &= MV_PCS_PORT_INFO_NPORTS_MASK; + ret >>= MV_PCS_PORT_INFO_NPORTS_SHIFT; + + return ret + 1; +} + +static int mv3310_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 1; +} + +static int mv3340_match_phy_device(struct phy_device *phydev) +{ + return mv3310_get_number_of_ports(phydev) == 4; +} + static int mv211x_match_phy_device(struct phy_device *phydev, bool has_5g) { int val; @@ -994,7 +1023,8 @@ static int mv2111_match_phy_device(struct phy_device *phydev) static struct phy_driver mv3310_drivers[] = { { .phy_id = MARVELL_PHY_ID_88X3310, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3310_match_phy_device, .name = "mv88x3310", .driver_data = &mv3310_type, .get_features = mv3310_get_features, @@ -1011,8 +1041,9 @@ static struct phy_driver mv3310_drivers[] = { .set_loopback = genphy_c45_loopback, }, { - .phy_id = MARVELL_PHY_ID_88X3340, - .phy_id_mask = MARVELL_PHY_ID_88X33X0_MASK, + .phy_id = MARVELL_PHY_ID_88X3310, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .match_phy_device = mv3340_match_phy_device, .name = "mv88x3340", .driver_data = &mv3340_type, .get_features = mv3310_get_features, @@ -1069,8 +1100,7 @@ static struct phy_driver mv3310_drivers[] = { module_phy_driver(mv3310_drivers); static struct mdio_device_id __maybe_unused mv3310_tbl[] = { - { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_88X33X0_MASK }, - { MARVELL_PHY_ID_88X3340, MARVELL_PHY_ID_88X33X0_MASK }, + { MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E2110, MARVELL_PHY_ID_MASK }, { }, }; diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index acee44b9db26..0f06c2287b52 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -22,14 +22,10 @@ #define MARVELL_PHY_ID_88E1545 0x01410ea0 #define MARVELL_PHY_ID_88E1548P 0x01410ec0 #define MARVELL_PHY_ID_88E3016 0x01410e60 +#define MARVELL_PHY_ID_88X3310 0x002b09a0 #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 -/* PHY IDs and mask for Alaska 10G PHYs */ -#define MARVELL_PHY_ID_88X33X0_MASK 0xfffffff8 -#define MARVELL_PHY_ID_88X3310 0x002b09a0 -#define MARVELL_PHY_ID_88X3340 0x002b09a8 - /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -- cgit v1.2.3 From 79789db03fdd77510cfb35cb4b3bd52b6c50c901 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Jul 2021 16:32:07 +0100 Subject: mm: Make copy_huge_page() always available Rewrite copy_huge_page() and move it into mm/util.c so it's always available. Fixes an exposure of uninitialised memory on configurations with HUGETLB and UFFD enabled and MIGRATION disabled. Fixes: 8cc5fcbb5be8 ("mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 5 ----- include/linux/mm.h | 1 + mm/migrate.c | 48 ------------------------------------------------ mm/util.c | 10 ++++++++++ 4 files changed, 11 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9b7b7cd3bae9..23dadf7aeba8 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -51,7 +51,6 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); -extern void copy_huge_page(struct page *dst, struct page *src); #else static inline void putback_movable_pages(struct list_head *l) {} @@ -77,10 +76,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } - -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION diff --git a/include/linux/mm.h b/include/linux/mm.h index 57453dba41b9..7ca22e6e694a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -906,6 +906,7 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); +void copy_huge_page(struct page *dst, struct page *src); /* * Compound pages have a destructor function. Provide a diff --git a/mm/migrate.c b/mm/migrate.c index 23cbd9de030b..34a9ad3e0a4f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -536,54 +536,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -/* - * Gigantic pages are so large that we do not guarantee that page++ pointer - * arithmetic will work across the entire page. We need something more - * specialized. - */ -static void __copy_gigantic_page(struct page *dst, struct page *src, - int nr_pages) -{ - int i; - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < nr_pages; ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - int nr_pages; - - if (PageHuge(src)) { - /* hugetlbfs page */ - struct hstate *h = page_hstate(src); - nr_pages = pages_per_huge_page(h); - - if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { - __copy_gigantic_page(dst, src, nr_pages); - return; - } - } else { - /* thp page */ - BUG_ON(!PageTransHuge(src)); - nr_pages = thp_nr_pages(src); - } - - for (i = 0; i < nr_pages; i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - /* * Copy the page to its new location */ diff --git a/mm/util.c b/mm/util.c index 99c6cc77de9e..9043d03750a7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -731,6 +731,16 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); +void copy_huge_page(struct page *dst, struct page *src) +{ + unsigned i, nr = compound_nr(src); + + for (i = 0; i < nr; i++) { + cond_resched(); + copy_highpage(nth_page(dst, i), nth_page(src, i)); + } +} + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; int sysctl_overcommit_ratio __read_mostly = 50; unsigned long sysctl_overcommit_kbytes __read_mostly; -- cgit v1.2.3 From 52f83955aaf91b22f46765b007b4404ce85b2133 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 12 Jul 2021 14:08:01 +0100 Subject: firmware: arm_scmi: Fix kernel doc warnings Kernel doc validation script is unhappy and complains with the below set of warnings. | Function parameter or member 'fast_switch_possible' not described in 'scmi_perf_proto_ops' | Function parameter or member 'power_scale_mw_get' not described in 'scmi_perf_proto_ops' | cannot understand function prototype: 'struct scmi_sensor_reading ' | cannot understand function prototype: 'struct scmi_range_attrs ' | cannot understand function prototype: 'struct scmi_sensor_axis_info ' | cannot understand function prototype: 'struct scmi_sensor_intervals_info ' Fix them adding appropriate documents or missing keywords. Link: https://lore.kernel.org/r/20210712130801.2436492-2-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 79d0a1237e6c..80e781c51ddc 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -101,6 +101,10 @@ struct scmi_clk_proto_ops { * to sustained performance level mapping * @est_power_get: gets the estimated power cost for a given performance domain * at a given frequency + * @fast_switch_possible: indicates if fast DVFS switching is possible or not + * for a given device + * @power_scale_mw_get: indicates if the power values provided are in milliWatts + * or in some other (abstract) scale */ struct scmi_perf_proto_ops { int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, @@ -153,7 +157,7 @@ struct scmi_power_proto_ops { }; /** - * scmi_sensor_reading - represent a timestamped read + * struct scmi_sensor_reading - represent a timestamped read * * Used by @reading_get_timestamped method. * @@ -167,7 +171,7 @@ struct scmi_sensor_reading { }; /** - * scmi_range_attrs - specifies a sensor or axis values' range + * struct scmi_range_attrs - specifies a sensor or axis values' range * @min_range: The minimum value which can be represented by the sensor/axis. * @max_range: The maximum value which can be represented by the sensor/axis. */ @@ -177,7 +181,7 @@ struct scmi_range_attrs { }; /** - * scmi_sensor_axis_info - describes one sensor axes + * struct scmi_sensor_axis_info - describes one sensor axes * @id: The axes ID. * @type: Axes type. Chosen amongst one of @enum scmi_sensor_class. * @scale: Power-of-10 multiplier applied to the axis unit. @@ -205,8 +209,8 @@ struct scmi_sensor_axis_info { }; /** - * scmi_sensor_intervals_info - describes number and type of available update - * intervals + * struct scmi_sensor_intervals_info - describes number and type of available + * update intervals * @segmented: Flag for segmented intervals' representation. When True there * will be exactly 3 intervals in @desc, with each entry * representing a member of a segment in this order: -- cgit v1.2.3 From 5ff6319d46cee22c9cd6f39a377e32c444f9a7b0 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 12 Jul 2021 11:27:48 +0100 Subject: firmware: arm_scpi: Fix kernel doc warnings Kernel doc validation script is unhappy and complains with the below set of warnings. | Function parameter or member 'device_domain_id' not described in 'scpi_ops' | Function parameter or member 'get_transition_latency' not described in 'scpi_ops' | Function parameter or member 'add_opps_to_device' not described in 'scpi_ops' | Function parameter or member 'sensor_get_capability' not described in 'scpi_ops' | Function parameter or member 'sensor_get_info' not described in 'scpi_ops' | Function parameter or member 'sensor_get_value' not described in 'scpi_ops' | Function parameter or member 'device_get_power_state' not described in 'scpi_ops' | Function parameter or member 'device_set_power_state' not described in 'scpi_ops' Fix them adding appropriate documents or missing keywords. Link: https://lore.kernel.org/r/20210712130801.2436492-1-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scpi_protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index afbf8037d8db..d2176a56828a 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -51,6 +51,14 @@ struct scpi_sensor_info { * OPP is an index to the list return by @dvfs_get_info * @dvfs_get_info: returns the DVFS capabilities of the given power * domain. It includes the OPP list and the latency information + * @device_domain_id: gets the scpi domain id for a given device + * @get_transition_latency: gets the DVFS transition latency for a given device + * @add_opps_to_device: adds all the OPPs for a given device + * @sensor_get_capability: get the list of capabilities for the sensors + * @sensor_get_info: get the information of the specified sensor + * @sensor_get_value: gets the current value of the sensor + * @device_get_power_state: gets the power state of a power domain + * @device_set_power_state: sets the power state of a power domain */ struct scpi_ops { u32 (*get_version)(void); -- cgit v1.2.3 From d1d488d813703618f0dd93f0e4c4a05928114aa8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 14 Jul 2021 15:47:50 +0200 Subject: fs: add vfs_parse_fs_param_source() helper Add a simple helper that filesystems can use in their parameter parser to parse the "source" parameter. A few places open-coded this function and that already caused a bug in the cgroup v1 parser that we fixed. Let's make it harder to get this wrong by introducing a helper which performs all necessary checks. Link: https://syzkaller.appspot.com/bug?id=6312526aba5beae046fdae8f00399f87aab48b12 Cc: Christoph Hellwig Cc: Alexander Viro Cc: Dmitry Vyukov Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- fs/fs_context.c | 54 ++++++++++++++++++++++++++++++---------------- include/linux/fs_context.h | 2 ++ kernel/cgroup/cgroup-v1.c | 14 +++++------- 3 files changed, 43 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index 2834d1afa6e8..de1985eae535 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -79,6 +79,35 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key) return -ENOPARAM; } +/** + * vfs_parse_fs_param_source - Handle setting "source" via parameter + * @fc: The filesystem context to modify + * @param: The parameter + * + * This is a simple helper for filesystems to verify that the "source" they + * accept is sane. + * + * Returns 0 on success, -ENOPARAM if this is not "source" parameter, and + * -EINVAL otherwise. In the event of failure, supplementary error information + * is logged. + */ +int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param) +{ + if (strcmp(param->key, "source") != 0) + return -ENOPARAM; + + if (param->type != fs_value_is_string) + return invalf(fc, "Non-string source"); + + if (fc->source) + return invalf(fc, "Multiple sources"); + + fc->source = param->string; + param->string = NULL; + return 0; +} +EXPORT_SYMBOL(vfs_parse_fs_param_source); + /** * vfs_parse_fs_param - Add a single parameter to a superblock config * @fc: The filesystem context to modify @@ -122,15 +151,9 @@ int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param) /* If the filesystem doesn't take any arguments, give it the * default handling of source. */ - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "VFS: Non-string source"); - if (fc->source) - return invalf(fc, "VFS: Multiple sources"); - fc->source = param->string; - param->string = NULL; - return 0; - } + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; return invalf(fc, "%s: Unknown parameter '%s'", fc->fs_type->name, param->key); @@ -504,16 +527,11 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) struct legacy_fs_context *ctx = fc->fs_private; unsigned int size = ctx->data_size; size_t len = 0; + int ret; - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "VFS: Legacy: Non-string source"); - if (fc->source) - return invalf(fc, "VFS: Legacy: Multiple sources"); - fc->source = param->string; - param->string = NULL; - return 0; - } + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS) return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options"); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 37e1e8f7f08d..e2bc16300c82 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -139,6 +139,8 @@ extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); +extern int vfs_parse_fs_param_source(struct fs_context *fc, + struct fs_parameter *param); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 527917c0b30b..8d6bf56ed77a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -911,15 +911,11 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { - if (strcmp(param->key, "source") == 0) { - if (param->type != fs_value_is_string) - return invalf(fc, "Non-string source"); - if (fc->source) - return invalf(fc, "Multiple sources not supported"); - fc->source = param->string; - param->string = NULL; - return 0; - } + int ret; + + ret = vfs_parse_fs_param_source(fc, param); + if (ret != -ENOPARAM) + return ret; for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; -- cgit v1.2.3 From 2db710cc846d3321a4dc0977fa13769bddba2351 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 14 Jul 2021 21:26:40 -0700 Subject: kasan: fix build by including kernel.h The header relies on _RET_IP_ being defined, and had been receiving that definition via inclusion of bug.h which includes kernel.h. However, since f39650de687e ("kernel.h: split out panic and oops helpers") that is no longer the case and get the following build error when building CONFIG_KASAN_HW_TAGS on arm64: In file included from arch/arm64/mm/kasan_init.c:10: include/linux/kasan.h: In function 'kasan_slab_free': include/linux/kasan.h:230:39: error: '_RET_IP_' undeclared (first use in this function) 230 | return __kasan_slab_free(s, object, _RET_IP_, init); Fix it by including kernel.h from kasan.h. Link: https://lkml.kernel.org/r/20210705072716.2125074-1-elver@google.com Fixes: f39650de687e ("kernel.h: split out panic and oops helpers") Signed-off-by: Marco Elver Reviewed-by: Andy Shevchenko Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Peter Collingbourne Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5310e217bd74..dd874a1ee862 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include +#include #include #include -- cgit v1.2.3 From ab7965de1725cd8514f0edbced5c2fb793846078 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jul 2021 21:26:55 -0700 Subject: mm: fix the try_to_unmap prototype for !CONFIG_MMU Adjust the nommu stub of try_to_unmap to match the changed protype for the full version. Turn it into an inline instead of a macro to generally improve the type checking. Link: https://lkml.kernel.org/r/20210705053944.885828-1-hch@lst.de Fixes: 1fb08ac63bee ("mm: rmap: make try_to_unmap() void function") Signed-off-by: Christoph Hellwig Reviewed-by: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 83fb86133fe1..c976cc6de257 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -291,7 +291,9 @@ static inline int page_referenced(struct page *page, int is_locked, return 0; } -#define try_to_unmap(page, refs) false +static inline void try_to_unmap(struct page *page, enum ttu_flags flags) +{ +} static inline int page_mkclean(struct page *page) { -- cgit v1.2.3 From ec645dc96699ea6c37b6de86c84d7288ea9a4ddf Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Sat, 17 Jul 2021 14:33:28 +0200 Subject: block: increase BLKCG_MAX_POLS After mq-deadline learned to deal with cgroups, the BLKCG_MAX_POLS value became too small for all the elevators to be registered properly. The following issue is seen: ``` calling bfq_init+0x0/0x8b @ 1 blkcg_policy_register: BLKCG_MAX_POLS too small initcall bfq_init+0x0/0x8b returned -28 after 507 usecs ``` which renders BFQ non-functional. Increase BLKCG_MAX_POLS to allow enough space for everyone. Fixes: 08a9ad8bf607 ("block/mq-deadline: Add cgroup support") Link: https://lore.kernel.org/lkml/8988303.mDXGIdCtx8@natalenko.name/ Signed-off-by: Oleksandr Natalenko Link: https://lore.kernel.org/r/20210717123328.945810-1-oleksandr@natalenko.name Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c454fb446fd0..2e12320cb121 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -57,7 +57,7 @@ struct blk_keyslot_manager; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 5 +#define BLKCG_MAX_POLS 6 typedef void (rq_end_io_fn)(struct request *, blk_status_t); -- cgit v1.2.3 From d8a719059b9dc963aa190598778ac804ff3e6a87 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 21 Jul 2021 17:02:13 +1000 Subject: Revert "mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge" This reverts commit c742199a014de23ee92055c2473d91fe5561ffdf. c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") breaks arm64 in at least two ways for configurations where PUD or PMD folding occur: 1. We no longer install huge-vmap mappings and silently fall back to page-granular entries, despite being able to install block entries at what is effectively the PGD level. 2. If the linear map is backed with block mappings, these will now silently fail to be created in alloc_init_pud(), causing a panic early during boot. The pgtable selftests caught this, although a fix has not been forthcoming and Christophe is AWOL at the moment, so just revert the change for now to get a working -rc3 on which we can queue patches for 5.15. A simple revert breaks the build for 32-bit PowerPC 8xx machines, which rely on the default function definitions when the corresponding page-table levels are folded, since commit a6a8f7c4aa7e ("powerpc/8xx: add support for huge pages on VMAP and VMALLOC"), eg: powerpc64-linux-ld: mm/vmalloc.o: in function `vunmap_pud_range': linux/mm/vmalloc.c:362: undefined reference to `pud_clear_huge' To avoid that, add stubs for pud_clear_huge() and pmd_clear_huge() in arch/powerpc/mm/nohash/8xx.c as suggested by Christophe. Cc: Christophe Leroy Cc: Catalin Marinas Cc: Andrew Morton Cc: Nicholas Piggin Cc: Mike Rapoport Cc: Mark Rutland Cc: Geert Uytterhoeven Fixes: c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") Signed-off-by: Jonathan Marek Reviewed-by: Ard Biesheuvel Acked-by: Marc Zyngier [mpe: Fold in 8xx.c changes from Christophe and mention in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linux-arm-kernel/CAMuHMdXShORDox-xxaeUfDW3wx2PeggFSqhVSHVZNKCGK-y_vQ@mail.gmail.com/ Link: https://lore.kernel.org/r/20210717160118.9855-1-jonathan@marek.ca Link: https://lore.kernel.org/r/87r1fs1762.fsf@mpe.ellerman.id.au Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 20 ++++++++------------ arch/powerpc/mm/nohash/8xx.c | 10 ++++++++++ arch/x86/mm/pgtable.c | 34 +++++++++++++++------------------- include/linux/pgtable.h | 26 +------------------------- 4 files changed, 34 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d74586508448..9ff0de1b2b93 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1339,7 +1339,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -#if CONFIG_PGTABLE_LEVELS > 3 int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); @@ -1354,16 +1353,6 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) return 1; } -int pud_clear_huge(pud_t *pudp) -{ - if (!pud_sect(READ_ONCE(*pudp))) - return 0; - pud_clear(pudp); - return 1; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); @@ -1378,6 +1367,14 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 1; } +int pud_clear_huge(pud_t *pudp) +{ + if (!pud_sect(READ_ONCE(*pudp))) + return 0; + pud_clear(pudp); + return 1; +} + int pmd_clear_huge(pmd_t *pmdp) { if (!pmd_sect(READ_ONCE(*pmdp))) @@ -1385,7 +1382,6 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } -#endif int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) { diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 60780e089118..0df9fe29dd56 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -240,3 +240,13 @@ void __init setup_kuap(bool disabled) mtspr(SPRN_MD_AP, MD_APG_KUAP); } #endif + +int pud_clear_huge(pud_t *pud) +{ + return 0; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + return 0; +} diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3364fe62b903..3481b35cb4ec 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -682,7 +682,6 @@ int p4d_clear_huge(p4d_t *p4d) } #endif -#if CONFIG_PGTABLE_LEVELS > 3 /** * pud_set_huge - setup kernel PUD mapping * @@ -721,23 +720,6 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } -/** - * pud_clear_huge - clear kernel PUD mapping when it is set - * - * Returns 1 on success and 0 on failure (no PUD map is found). - */ -int pud_clear_huge(pud_t *pud) -{ - if (pud_large(*pud)) { - pud_clear(pud); - return 1; - } - - return 0; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 /** * pmd_set_huge - setup kernel PMD mapping * @@ -768,6 +750,21 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + /** * pmd_clear_huge - clear kernel PMD mapping when it is set * @@ -782,7 +779,6 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } -#endif #ifdef CONFIG_X86_64 /** diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index d147480cdefc..e24d2c992b11 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1397,34 +1397,10 @@ static inline int p4d_clear_huge(p4d_t *p4d) } #endif /* !__PAGETABLE_P4D_FOLDED */ -#ifndef __PAGETABLE_PUD_FOLDED int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); -int pud_clear_huge(pud_t *pud); -#else -static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pud_clear_huge(pud_t *pud) -{ - return 0; -} -#endif /* !__PAGETABLE_PUD_FOLDED */ - -#ifndef __PAGETABLE_PMD_FOLDED int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); +int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -#else -static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pmd_clear_huge(pmd_t *pmd) -{ - return 0; -} -#endif /* !__PAGETABLE_PMD_FOLDED */ - int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); -- cgit v1.2.3 From 8dad53a11f8d94dceb540a5f8f153484f42be84b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:17 -0700 Subject: mm: call flush_dcache_page() in memcpy_to_page() and memzero_page() memcpy_to_page and memzero_page can write to arbitrary pages, which could be in the page cache or in high memory, so call flush_kernel_dcache_pages to flush the dcache. This is a problem when using these helpers on dcache challeneged architectures. Right now there are just a few users, chances are no one used the PC floppy driver, the aha1542 driver for an ISA SCSI HBA, and a few advanced and optional btrfs and ext4 features on those platforms yet since the conversion. Link: https://lkml.kernel.org/r/20210713055231.137602-2-hch@lst.de Fixes: bb90d4bc7b6a ("mm/highmem: Lift memcpy_[to|from]_page to core") Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Cc: Chaitanya Kulkarni Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c87..8e7e50a53a12 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -318,6 +318,7 @@ static inline void memcpy_to_page(struct page *page, size_t offset, VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); + flush_dcache_page(page); kunmap_local(to); } @@ -325,6 +326,7 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_atomic(page); memset(addr + offset, 0, len); + flush_dcache_page(page); kunmap_atomic(addr); } -- cgit v1.2.3 From d9a42b53bdf7b0329dc09a59fc1b092640b6da19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:20 -0700 Subject: mm: use kmap_local_page in memzero_page The commit message introducing the global memzero_page explicitly mentions switching to kmap_local_page in the commit log but doesn't actually do that. Link: https://lkml.kernel.org/r/20210713055231.137602-3-hch@lst.de Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8e7e50a53a12..d9a606a9fc64 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -324,10 +324,10 @@ static inline void memcpy_to_page(struct page *page, size_t offset, static inline void memzero_page(struct page *page, size_t offset, size_t len) { - char *addr = kmap_atomic(page); + char *addr = kmap_local_page(page); memset(addr + offset, 0, len); flush_dcache_page(page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 79e482e9c3ae86e849c701c846592e72baddda5a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 23 Jul 2021 15:50:26 -0700 Subject: memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions Commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") didn't take into account that when there is movable_node parameter in the kernel command line, for_each_mem_range() would skip ranges marked with MEMBLOCK_HOTPLUG. The page table setup code in POWER uses for_each_mem_range() to create the linear mapping of the physical memory and since the regions marked as MEMORY_HOTPLUG are skipped, they never make it to the linear map. A later access to the memory in those ranges will fail: BUG: Unable to handle kernel data access on write at 0xc000000400000000 Faulting instruction address: 0xc00000000008a3c0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 0 PID: 53 Comm: kworker/u2:0 Not tainted 5.13.0 #7 NIP: c00000000008a3c0 LR: c0000000003c1ed8 CTR: 0000000000000040 REGS: c000000008a57770 TRAP: 0300 Not tainted (5.13.0) MSR: 8000000002009033 CR: 84222202 XER: 20040000 CFAR: c0000000003c1ed4 DAR: c000000400000000 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000003c1ed8 c000000008a57a10 c0000000019da700 c000000400000000 GPR04: 0000000000000280 0000000000000180 0000000000000400 0000000000000200 GPR08: 0000000000000100 0000000000000080 0000000000000040 0000000000000300 GPR12: 0000000000000380 c000000001bc0000 c0000000001660c8 c000000006337e00 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000040000000 0000000020000000 c000000001a81990 c000000008c30000 GPR24: c000000008c20000 c000000001a81998 000fffffffff0000 c000000001a819a0 GPR28: c000000001a81908 c00c000001000000 c000000008c40000 c000000008a64680 NIP clear_user_page+0x50/0x80 LR __handle_mm_fault+0xc88/0x1910 Call Trace: __handle_mm_fault+0xc44/0x1910 (unreliable) handle_mm_fault+0x130/0x2a0 __get_user_pages+0x248/0x610 __get_user_pages_remote+0x12c/0x3e0 get_arg_page+0x54/0xf0 copy_string_kernel+0x11c/0x210 kernel_execve+0x16c/0x220 call_usermodehelper_exec_async+0x1b0/0x2f0 ret_from_kernel_thread+0x5c/0x70 Instruction dump: 79280fa4 79271764 79261f24 794ae8e2 7ca94214 7d683a14 7c893a14 7d893050 7d4903a6 60000000 60000000 60000000 <7c001fec> 7c091fec 7c081fec 7c051fec ---[ end trace 490b8c67e6075e09 ]--- Making for_each_mem_range() include MEMBLOCK_HOTPLUG regions in the traversal fixes this issue. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1976100 Link: https://lkml.kernel.org/r/20210712071132.20902-1-rppt@kernel.org Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Mike Rapoport Tested-by: Greg Kurz Reviewed-by: David Hildenbrand Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- mm/memblock.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cbf46f56d105..4a53c3ca86bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas diff --git a/mm/memblock.c b/mm/memblock.c index 0041ff62c584..de7b553baa50 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, return true; /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ -- cgit v1.2.3