From dce1eb93b19b2a1a441708f51c97c4a554054d00 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 2 Nov 2015 13:02:46 -0500 Subject: __div64_32(): make it overridable at compile time Some architectures may want to override the default implementation at compile time to do things inline. For example, ARM uses a non-standard calling convention for better efficiency in this case. Signed-off-by: Nicolas Pitre --- lib/div64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/div64.c b/lib/div64.c index 62a698a432bc..7f345259c32f 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -13,7 +13,8 @@ * * Code generated for this function might be very inefficient * for some CPUs. __div64_32() can be overridden by linking arch-specific - * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S. + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S + * or by defining a preprocessor macro in arch/include/asm/div64.h. */ #include @@ -23,6 +24,7 @@ /* Not needed on 64bit architectures */ #if BITS_PER_LONG == 32 +#ifndef __div64_32 uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) { uint64_t rem = *n; @@ -55,8 +57,8 @@ uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) *n = res; return rem; } - EXPORT_SYMBOL(__div64_32); +#endif #ifndef div_s64_rem s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) -- cgit v1.2.3 From bb649b34dd3d8f69308f5f193cb64457069c7222 Mon Sep 17 00:00:00 2001 From: Roland Kammerer Date: Thu, 16 Apr 2015 10:17:51 +0200 Subject: lru_cache: Converted lc_seq_printf_status to return void Fix the semantic of lc_seq_printf. Currently, it always returns 0 and the return value is unused, therefore, convert the return type to void. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- include/linux/lru_cache.h | 2 +- lib/lru_cache.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 46262284de47..04fc6e6c7ff0 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); extern void lc_committed(struct lru_cache *lc); struct seq_file; -extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); +extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, void (*detail) (struct seq_file *, struct lc_element *)); diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 028f5d996eef..28ba40b99337 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc) * @seq: the seq_file to print into * @lc: the lru cache to print statistics of */ -size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) { /* NOTE: * total calls to lc_get are @@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); - - return 0; } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) -- cgit v1.2.3 From 511cbce2ff8b9d322077909ee90c5d4b67b29b75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Nov 2015 14:56:14 +0100 Subject: irq_poll: make blk-iopoll available outside the block layer The new name is irq_poll as iopoll is already taken. Better suggestions welcome. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- Documentation/kernel-per-CPU-kthreads.txt | 2 +- block/Makefile | 2 +- block/blk-iopoll.c | 224 ------------------------------ drivers/scsi/Kconfig | 1 + drivers/scsi/be2iscsi/Kconfig | 1 + drivers/scsi/be2iscsi/be.h | 4 +- drivers/scsi/be2iscsi/be_iscsi.c | 4 +- drivers/scsi/be2iscsi/be_main.c | 24 ++-- drivers/scsi/ipr.c | 28 ++-- drivers/scsi/ipr.h | 4 +- include/linux/blk-iopoll.h | 46 ------ include/linux/interrupt.h | 2 +- include/linux/irq_poll.h | 46 ++++++ include/trace/events/irq.h | 2 +- lib/Kconfig | 5 + lib/Makefile | 1 + lib/irq_poll.c | 221 +++++++++++++++++++++++++++++ tools/lib/traceevent/event-parse.c | 2 +- tools/perf/util/trace-event-parse.c | 2 +- 19 files changed, 313 insertions(+), 308 deletions(-) delete mode 100644 block/blk-iopoll.c delete mode 100644 include/linux/blk-iopoll.h create mode 100644 include/linux/irq_poll.h create mode 100644 lib/irq_poll.c (limited to 'lib') diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index f4cbfe0ba108..edec3a3e648d 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following: from being initiated from tasks that might run on the CPU to be de-jittered. (It is OK to force this CPU offline and then bring it back online before you start your application.) -BLOCK_IOPOLL_SOFTIRQ: Do all of the following: +IRQ_POLL_SOFTIRQ: Do all of the following: 1. Force block-device interrupts onto some other CPU. 2. Initiate any block I/O and block-I/O polling on other CPUs. 3. Once your application has started, prevent CPU-hotplug operations diff --git a/block/Makefile b/block/Makefile index 00ecc97629db..e8504748c7cb 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ partitions/ diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c deleted file mode 100644 index 0736729d6494..000000000000 --- a/block/blk-iopoll.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Functions related to interrupt-poll handling in the block layer. This - * is similar to NAPI for network devices. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "blk.h" - -static unsigned int blk_iopoll_budget __read_mostly = 256; - -static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); - -/** - * blk_iopoll_sched - Schedule a run of the iopoll handler - * @iop: The parent iopoll structure - * - * Description: - * Add this blk_iopoll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from blk_iopoll_sched_prep() before calling this. - **/ -void blk_iopoll_sched(struct blk_iopoll *iop) -{ - unsigned long flags; - - local_irq_save(flags); - list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - local_irq_restore(flags); -} -EXPORT_SYMBOL(blk_iopoll_sched); - -/** - * __blk_iopoll_complete - Mark this @iop as un-polled again - * @iop: The parent iopoll structure - * - * Description: - * See blk_iopoll_complete(). This function must be called with interrupts - * disabled. - **/ -void __blk_iopoll_complete(struct blk_iopoll *iop) -{ - list_del(&iop->list); - smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(__blk_iopoll_complete); - -/** - * blk_iopoll_complete - Mark this @iop as un-polled again - * @iop: The parent iopoll structure - * - * Description: - * If a driver consumes less than the assigned budget in its run of the - * iopoll handler, it'll end the polled mode by calling this function. The - * iopoll handler will not be invoked again before blk_iopoll_sched_prep() - * is called. - **/ -void blk_iopoll_complete(struct blk_iopoll *iop) -{ - unsigned long flags; - - local_irq_save(flags); - __blk_iopoll_complete(iop); - local_irq_restore(flags); -} -EXPORT_SYMBOL(blk_iopoll_complete); - -static void blk_iopoll_softirq(struct softirq_action *h) -{ - struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); - int rearm = 0, budget = blk_iopoll_budget; - unsigned long start_time = jiffies; - - local_irq_disable(); - - while (!list_empty(list)) { - struct blk_iopoll *iop; - int work, weight; - - /* - * If softirq window is exhausted then punt. - */ - if (budget <= 0 || time_after(jiffies, start_time)) { - rearm = 1; - break; - } - - local_irq_enable(); - - /* Even though interrupts have been re-enabled, this - * access is safe because interrupts can only add new - * entries to the tail of this list, and only ->poll() - * calls can remove this head entry from the list. - */ - iop = list_entry(list->next, struct blk_iopoll, list); - - weight = iop->weight; - work = 0; - if (test_bit(IOPOLL_F_SCHED, &iop->state)) - work = iop->poll(iop, weight); - - budget -= work; - - local_irq_disable(); - - /* - * Drivers must not modify the iopoll state, if they - * consume their assigned weight (or more, some drivers can't - * easily just stop processing, they have to complete an - * entire mask of commands).In such cases this code - * still "owns" the iopoll instance and therefore can - * move the instance around on the list at-will. - */ - if (work >= weight) { - if (blk_iopoll_disable_pending(iop)) - __blk_iopoll_complete(iop); - else - list_move_tail(&iop->list, list); - } - } - - if (rearm) - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - - local_irq_enable(); -} - -/** - * blk_iopoll_disable - Disable iopoll on this @iop - * @iop: The parent iopoll structure - * - * Description: - * Disable io polling and wait for any pending callbacks to have completed. - **/ -void blk_iopoll_disable(struct blk_iopoll *iop) -{ - set_bit(IOPOLL_F_DISABLE, &iop->state); - while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) - msleep(1); - clear_bit(IOPOLL_F_DISABLE, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_disable); - -/** - * blk_iopoll_enable - Enable iopoll on this @iop - * @iop: The parent iopoll structure - * - * Description: - * Enable iopoll on this @iop. Note that the handler run will not be - * scheduled, it will only mark it as active. - **/ -void blk_iopoll_enable(struct blk_iopoll *iop) -{ - BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); - smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_enable); - -/** - * blk_iopoll_init - Initialize this @iop - * @iop: The parent iopoll structure - * @weight: The default weight (or command completion budget) - * @poll_fn: The handler to invoke - * - * Description: - * Initialize this blk_iopoll structure. Before being actively used, the - * driver must call blk_iopoll_enable(). - **/ -void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) -{ - memset(iop, 0, sizeof(*iop)); - INIT_LIST_HEAD(&iop->list); - iop->weight = weight; - iop->poll = poll_fn; - set_bit(IOPOLL_F_SCHED, &iop->state); -} -EXPORT_SYMBOL(blk_iopoll_init); - -static int blk_iopoll_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), - this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); - local_irq_enable(); - } - - return NOTIFY_OK; -} - -static struct notifier_block blk_iopoll_cpu_notifier = { - .notifier_call = blk_iopoll_cpu_notify, -}; - -static __init int blk_iopoll_setup(void) -{ - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); - - open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); - register_hotcpu_notifier(&blk_iopoll_cpu_notifier); - return 0; -} -subsys_initcall(blk_iopoll_setup); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5f692ae40749..92099657b046 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1102,6 +1102,7 @@ config SCSI_IPR tristate "IBM Power Linux RAID adapter support" depends on PCI && SCSI && ATA select FW_LOADER + select IRQ_POLL ---help--- This driver supports the IBM Power Linux family RAID adapters. This includes IBM pSeries 5712, 5703, 5709, and 570A, as well diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig index 4e7cad272469..bad5f32e1f67 100644 --- a/drivers/scsi/be2iscsi/Kconfig +++ b/drivers/scsi/be2iscsi/Kconfig @@ -3,6 +3,7 @@ config BE2ISCSI depends on PCI && SCSI && NET select SCSI_ISCSI_ATTRS select ISCSI_BOOT_SYSFS + select IRQ_POLL help This driver implements the iSCSI functionality for Emulex diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h index 77f992e74726..a41c6432f444 100644 --- a/drivers/scsi/be2iscsi/be.h +++ b/drivers/scsi/be2iscsi/be.h @@ -20,7 +20,7 @@ #include #include -#include +#include #define FW_VER_LEN 32 #define MCC_Q_LEN 128 #define MCC_CQ_LEN 256 @@ -101,7 +101,7 @@ struct be_eq_obj { struct beiscsi_hba *phba; struct be_queue_info *cq; struct work_struct work_cqs; /* Work Item */ - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; struct be_mcc_obj { diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index b7087ba69d8d..022e87b62e40 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); beiscsi_process_cq(pbe_eq); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } } diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index fe0c5143f8e6..1d879ef406d8 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -910,8 +910,8 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) num_eq_processed = 0; while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] & EQE_VALID_MASK) { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + if (!irq_poll_sched_prep(&pbe_eq->iopoll)) + irq_poll_sched(&pbe_eq->iopoll); AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); queue_tail_inc(eq); @@ -972,8 +972,8 @@ static irqreturn_t be_isr(int irq, void *dev_id) spin_unlock_irqrestore(&phba->isr_lock, flags); num_mcceq_processed++; } else { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + if (!irq_poll_sched_prep(&pbe_eq->iopoll)) + irq_poll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); @@ -2295,7 +2295,7 @@ void beiscsi_process_all_cqs(struct work_struct *work) hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1); } -static int be_iopoll(struct blk_iopoll *iop, int budget) +static int be_iopoll(struct irq_poll *iop, int budget) { unsigned int ret; struct beiscsi_hba *phba; @@ -2306,7 +2306,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget) pbe_eq->cq_count += ret; if (ret < budget) { phba = pbe_eq->phba; - blk_iopoll_complete(iop); + irq_poll_complete(iop); beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO, "BM_%d : rearm pbe_eq->q.id =%d\n", @@ -5293,7 +5293,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } if (unload_state == BEISCSI_CLEAN_UNLOAD) { @@ -5579,9 +5579,9 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5752,9 +5752,9 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5795,7 +5795,7 @@ free_blkenbld: destroy_workqueue(phba->wq); for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } free_twq: beiscsi_clean_port(phba); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 536cd5a80422..6b9c738cdc18 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; -static int ipr_iopoll(struct blk_iopoll *iop, int budget); +static int ipr_iopoll(struct irq_poll *iop, int budget); /** * ipr_show_iopoll_weight - Show ipr polling mode * @dev: class device struct @@ -3681,34 +3681,34 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev, int i; if (!ioa_cfg->sis64) { - dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n"); return -EINVAL; } if (kstrtoul(buf, 10, &user_iopoll_weight)) return -EINVAL; if (user_iopoll_weight > 256) { - dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n"); return -EINVAL; } if (user_iopoll_weight == ioa_cfg->iopoll_weight) { - dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n"); return strlen(buf); } if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } spin_lock_irqsave(shost->host_lock, lock_flags); ioa_cfg->iopoll_weight = user_iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } spin_unlock_irqrestore(shost->host_lock, lock_flags); @@ -5569,7 +5569,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, return num_hrrq; } -static int ipr_iopoll(struct blk_iopoll *iop, int budget) +static int ipr_iopoll(struct irq_poll *iop, int budget) { struct ipr_ioa_cfg *ioa_cfg; struct ipr_hrr_queue *hrrq; @@ -5585,7 +5585,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget) completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); if (completed_ops < budget) - blk_iopoll_complete(iop); + irq_poll_complete(iop); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { @@ -5693,8 +5693,8 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { - if (!blk_iopoll_sched_prep(&hrrq->iopoll)) - blk_iopoll_sched(&hrrq->iopoll); + if (!irq_poll_sched_prep(&hrrq->iopoll)) + irq_poll_sched(&hrrq->iopoll); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_HANDLED; } @@ -10405,9 +10405,9 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } @@ -10436,7 +10436,7 @@ static void ipr_shutdown(struct pci_dev *pdev) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { ioa_cfg->iopoll_weight = 0; for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } while (ioa_cfg->in_reset_reload) { diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index a34c7a5a995e..56c57068300a 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -517,7 +517,7 @@ struct ipr_hrr_queue { u8 allow_cmds:1; u8 removing_ioa:1; - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; /* Command packet structure */ diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c0b704..000000000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809c8596..7ff98c23199a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -412,7 +412,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 000000000000..50c39dcd2cba --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,46 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +/* + * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ +static inline int irq_poll_sched_prep(struct irq_poll *iop) +{ + if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); + + return 1; +} + +static inline int irq_poll_disable_pending(struct irq_poll *iop) +{ + return test_bit(IRQ_POLL_F_DISABLE, &iop->state); +} + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void __irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ff8f6c091a15..f95f25e786ef 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -15,7 +15,7 @@ struct softirq_action; softirq_name(NET_TX) \ softirq_name(NET_RX) \ softirq_name(BLOCK) \ - softirq_name(BLOCK_IOPOLL) \ + softirq_name(IRQ_POLL) \ softirq_name(TASKLET) \ softirq_name(SCHED) \ softirq_name(HRTIMER) \ diff --git a/lib/Kconfig b/lib/Kconfig index f0df318104e7..e00e1960260a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -475,6 +475,11 @@ config DDR information. This data is useful for drivers handling DDR SDRAM controllers. +config IRQ_POLL + bool "IRQ polling library" + help + Helper library to poll interrupt mitigation using polling. + config MPILIB tristate select CLZ_TAB diff --git a/lib/Makefile b/lib/Makefile index 7f1de26613d2..1478ae256561 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -164,6 +164,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o +obj-$(CONFIG_IRQ_POLL) += irq_poll.o libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o diff --git a/lib/irq_poll.c b/lib/irq_poll.c new file mode 100644 index 000000000000..e6fd1dc0908b --- /dev/null +++ b/lib/irq_poll.c @@ -0,0 +1,221 @@ +/* + * Functions related to interrupt-poll handling in the block layer. This + * is similar to NAPI for network devices. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int irq_poll_budget __read_mostly = 256; + +static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); + +/** + * irq_poll_sched - Schedule a run of the iopoll handler + * @iop: The parent iopoll structure + * + * Description: + * Add this irq_poll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. The driver must already have gotten a + * successful return from irq_poll_sched_prep() before calling this. + **/ +void irq_poll_sched(struct irq_poll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(irq_poll_sched); + +/** + * __irq_poll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * See irq_poll_complete(). This function must be called with interrupts + * disabled. + **/ +void __irq_poll_complete(struct irq_poll *iop) +{ + list_del(&iop->list); + smp_mb__before_atomic(); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(__irq_poll_complete); + +/** + * irq_poll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * If a driver consumes less than the assigned budget in its run of the + * iopoll handler, it'll end the polled mode by calling this function. The + * iopoll handler will not be invoked again before irq_poll_sched_prep() + * is called. + **/ +void irq_poll_complete(struct irq_poll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + __irq_poll_complete(iop); + local_irq_restore(flags); +} +EXPORT_SYMBOL(irq_poll_complete); + +static void irq_poll_softirq(struct softirq_action *h) +{ + struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); + int rearm = 0, budget = irq_poll_budget; + unsigned long start_time = jiffies; + + local_irq_disable(); + + while (!list_empty(list)) { + struct irq_poll *iop; + int work, weight; + + /* + * If softirq window is exhausted then punt. + */ + if (budget <= 0 || time_after(jiffies, start_time)) { + rearm = 1; + break; + } + + local_irq_enable(); + + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + iop = list_entry(list->next, struct irq_poll, list); + + weight = iop->weight; + work = 0; + if (test_bit(IRQ_POLL_F_SCHED, &iop->state)) + work = iop->poll(iop, weight); + + budget -= work; + + local_irq_disable(); + + /* + * Drivers must not modify the iopoll state, if they + * consume their assigned weight (or more, some drivers can't + * easily just stop processing, they have to complete an + * entire mask of commands).In such cases this code + * still "owns" the iopoll instance and therefore can + * move the instance around on the list at-will. + */ + if (work >= weight) { + if (irq_poll_disable_pending(iop)) + __irq_poll_complete(iop); + else + list_move_tail(&iop->list, list); + } + } + + if (rearm) + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + + local_irq_enable(); +} + +/** + * irq_poll_disable - Disable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Disable io polling and wait for any pending callbacks to have completed. + **/ +void irq_poll_disable(struct irq_poll *iop) +{ + set_bit(IRQ_POLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + msleep(1); + clear_bit(IRQ_POLL_F_DISABLE, &iop->state); +} +EXPORT_SYMBOL(irq_poll_disable); + +/** + * irq_poll_enable - Enable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Enable iopoll on this @iop. Note that the handler run will not be + * scheduled, it will only mark it as active. + **/ +void irq_poll_enable(struct irq_poll *iop) +{ + BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state)); + smp_mb__before_atomic(); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(irq_poll_enable); + +/** + * irq_poll_init - Initialize this @iop + * @iop: The parent iopoll structure + * @weight: The default weight (or command completion budget) + * @poll_fn: The handler to invoke + * + * Description: + * Initialize this irq_poll structure. Before being actively used, the + * driver must call irq_poll_enable(). + **/ +void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn) +{ + memset(iop, 0, sizeof(*iop)); + INIT_LIST_HEAD(&iop->list); + iop->weight = weight; + iop->poll = poll_fn; + set_bit(IRQ_POLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(irq_poll_init); + +static int irq_poll_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), + this_cpu_ptr(&blk_cpu_iopoll)); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block irq_poll_cpu_notifier = { + .notifier_call = irq_poll_cpu_notify, +}; + +static __init int irq_poll_setup(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); + + open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq); + register_hotcpu_notifier(&irq_poll_cpu_notifier); + return 0; +} +subsys_initcall(irq_poll_setup); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2a912df6771b..af5a31661086 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3746,7 +3746,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 8ff7d620d942..33b52eaa39db 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -209,7 +209,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, -- cgit v1.2.3 From 78d0264eb7a938f1eaf59fcb2d3f7da2567369d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:38:28 -0800 Subject: irq_poll: don't disable new irq_poll instances There is no good reason to start out disabled - drivers can control if the poll instance can be scheduled by simply not scheduling it yet. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- drivers/scsi/be2iscsi/be_main.c | 2 -- drivers/scsi/ipr.c | 2 -- lib/irq_poll.c | 4 +--- 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'lib') diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 1d879ef406d8..471e2b942435 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5581,7 +5581,6 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev) pbe_eq = &phwi_context->be_eq[i]; irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5754,7 +5753,6 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, pbe_eq = &phwi_context->be_eq[i]; irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - irq_poll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 6b9c738cdc18..402e4ca32d70 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3708,7 +3708,6 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev, for (i = 1; i < ioa_cfg->hrrq_num; i++) { irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } spin_unlock_irqrestore(shost->host_lock, lock_flags); @@ -10407,7 +10406,6 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) for (i = 1; i < ioa_cfg->hrrq_num; i++) { irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - irq_poll_enable(&ioa_cfg->hrrq[i].iopoll); } } diff --git a/lib/irq_poll.c b/lib/irq_poll.c index e6fd1dc0908b..88af87971e8c 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -170,8 +170,7 @@ EXPORT_SYMBOL(irq_poll_enable); * @poll_fn: The handler to invoke * * Description: - * Initialize this irq_poll structure. Before being actively used, the - * driver must call irq_poll_enable(). + * Initialize and enable this irq_poll structure. **/ void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn) { @@ -179,7 +178,6 @@ void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn) INIT_LIST_HEAD(&iop->list); iop->weight = weight; iop->poll = poll_fn; - set_bit(IRQ_POLL_F_SCHED, &iop->state); } EXPORT_SYMBOL(irq_poll_init); -- cgit v1.2.3 From ea51190c03150fce4d9e428bfb608abbe0991db8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:41:11 -0800 Subject: irq_poll: fold irq_poll_sched_prep into irq_poll_sched There is no good reason to keep them apart, and this makes using the API a bit simpler. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- drivers/scsi/be2iscsi/be_main.c | 6 ++---- drivers/scsi/ipr.c | 3 +-- include/linux/irq_poll.h | 13 ------------- lib/irq_poll.c | 10 +++++++--- 4 files changed, 10 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 471e2b942435..cb9072a841be 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) num_eq_processed = 0; while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] & EQE_VALID_MASK) { - if (!irq_poll_sched_prep(&pbe_eq->iopoll)) - irq_poll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); queue_tail_inc(eq); @@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id) spin_unlock_irqrestore(&phba->isr_lock, flags); num_mcceq_processed++; } else { - if (!irq_poll_sched_prep(&pbe_eq->iopoll)) - irq_poll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 402e4ca32d70..82031e00b2e9 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -5692,8 +5692,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { - if (!irq_poll_sched_prep(&hrrq->iopoll)) - irq_poll_sched(&hrrq->iopoll); + irq_poll_sched(&hrrq->iopoll); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_HANDLED; } diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 50c39dcd2cba..57efae661400 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,19 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -/* - * Returns 0 if we successfully set the IRQ_POLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int irq_poll_sched_prep(struct irq_poll *iop) -{ - if (!test_bit(IRQ_POLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state); - - return 1; -} - static inline int irq_poll_disable_pending(struct irq_poll *iop) { return test_bit(IRQ_POLL_F_DISABLE, &iop->state); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 88af87971e8c..43a3370a09fd 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -21,13 +21,17 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * * Description: * Add this irq_poll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from irq_poll_sched_prep() before calling this. + * raise of the blk iopoll softirq. **/ void irq_poll_sched(struct irq_poll *iop) { unsigned long flags; + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return; + if (!test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + return; + local_irq_save(flags); list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); @@ -58,7 +62,7 @@ EXPORT_SYMBOL(__irq_poll_complete); * Description: * If a driver consumes less than the assigned budget in its run of the * iopoll handler, it'll end the polled mode by calling this function. The - * iopoll handler will not be invoked again before irq_poll_sched_prep() + * iopoll handler will not be invoked again before irq_poll_sched() * is called. **/ void irq_poll_complete(struct irq_poll *iop) -- cgit v1.2.3 From 0bc92ace52ef3ed1c8eb9bcf36cd3d7ca72d5d14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:56:36 -0800 Subject: irq_poll: fold irq_poll_disable_pending into irq_poll_softirq Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 5 ----- lib/irq_poll.c | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index 57efae661400..b4ad03cee9d4 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -18,11 +18,6 @@ enum { IRQ_POLL_F_DISABLE = 1, }; -static inline int irq_poll_disable_pending(struct irq_poll *iop) -{ - return test_bit(IRQ_POLL_F_DISABLE, &iop->state); -} - extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 43a3370a09fd..53d67e341ebb 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -122,7 +122,7 @@ static void irq_poll_softirq(struct softirq_action *h) * move the instance around on the list at-will. */ if (work >= weight) { - if (irq_poll_disable_pending(iop)) + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) __irq_poll_complete(iop); else list_move_tail(&iop->list, list); -- cgit v1.2.3 From 83af187d1b776753d58b53d155318d94f9428e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Dec 2015 06:57:25 -0800 Subject: irq_poll: mark __irq_poll_complete static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche --- include/linux/irq_poll.h | 1 - lib/irq_poll.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h index b4ad03cee9d4..8c4b4087f1f2 100644 --- a/include/linux/irq_poll.h +++ b/include/linux/irq_poll.h @@ -21,7 +21,6 @@ enum { extern void irq_poll_sched(struct irq_poll *); extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); extern void irq_poll_complete(struct irq_poll *); -extern void __irq_poll_complete(struct irq_poll *); extern void irq_poll_enable(struct irq_poll *); extern void irq_poll_disable(struct irq_poll *); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 53d67e341ebb..2836620e889f 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -47,13 +47,12 @@ EXPORT_SYMBOL(irq_poll_sched); * See irq_poll_complete(). This function must be called with interrupts * disabled. **/ -void __irq_poll_complete(struct irq_poll *iop) +static void __irq_poll_complete(struct irq_poll *iop) { list_del(&iop->list); smp_mb__before_atomic(); clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); } -EXPORT_SYMBOL(__irq_poll_complete); /** * irq_poll_complete - Mark this @iop as un-polled again -- cgit v1.2.3 From 9fd4470ff4974c41b1db43c3b355b9085af9c12a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2015 10:05:19 -0800 Subject: Use the new batched user accesses in generic user string handling This converts the generic user string functions to use the batched user access functions. It makes a big difference on Skylake, which is the first x86 microarchitecture to implement SMAP. The STAC/CLAC instructions are not very fast, and doing them for each access inside the loop that copies strings from user space (which is what the pathname handling does for every pathname the kernel uses, for example) is very inefficient. Signed-off-by: Linus Torvalds --- lib/strncpy_from_user.c | 11 ++++++++--- lib/strnlen_user.c | 18 ++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index e0af6ff73d14..33840324138c 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -39,7 +39,7 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) break; *(unsigned long *)(dst+res) = c; if (has_zero(c, &data, &constants)) { @@ -55,7 +55,7 @@ byte_at_a_time: while (max) { char c; - if (unlikely(__get_user(c,src+res))) + if (unlikely(unsafe_get_user(c,src+res))) return -EFAULT; dst[res] = c; if (!c) @@ -107,7 +107,12 @@ long strncpy_from_user(char *dst, const char __user *src, long count) src_addr = (unsigned long)src; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strncpy_from_user(dst, src, count, max); + long retval; + + user_access_begin(); + retval = do_strncpy_from_user(dst, src, count, max); + user_access_end(); + return retval; } return -EFAULT; } diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 3a5f2b366d84..2625943625d7 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -45,7 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, src -= align; max += align; - if (unlikely(__get_user(c,(unsigned long __user *)src))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)src))) return 0; c |= aligned_byte_mask(align); @@ -61,7 +61,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count, if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res)))) return 0; } res -= align; @@ -112,7 +112,12 @@ long strnlen_user(const char __user *str, long count) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, count, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, count, max); + user_access_end(); + return retval; } return 0; } @@ -141,7 +146,12 @@ long strlen_user(const char __user *str) src_addr = (unsigned long)str; if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; - return do_strnlen_user(str, ~0ul, max); + long retval; + + user_access_begin(); + retval = do_strnlen_user(str, ~0ul, max); + user_access_end(); + return retval; } return 0; } -- cgit v1.2.3 From 47e0bbb7fa985a0f1b5794a8653fae4f8f49de77 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 9 Dec 2015 14:50:25 -0800 Subject: test: firmware_class: report errors properly on failure request_firmware() failures currently won't get reported at all (the error code is discarded). What's more, we get confusing messages, like: # echo -n notafile > /sys/devices/virtual/misc/test_firmware/trigger_request [ 8280.311856] test_firmware: loading 'notafile' [ 8280.317042] test_firmware: load of 'notafile' failed: -2 [ 8280.322445] test_firmware: loaded: 0 # echo $? 0 Report the failures via write() errors, and don't say we "loaded" anything. Signed-off-by: Brian Norris Acked-by: Kees Cook Signed-off-by: Shuah Khan --- lib/test_firmware.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 86374c1c49a4..841191061816 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -65,14 +65,19 @@ static ssize_t trigger_request_store(struct device *dev, release_firmware(test_firmware); test_firmware = NULL; rc = request_firmware(&test_firmware, name, dev); - if (rc) + if (rc) { pr_info("load of '%s' failed: %d\n", name, rc); - pr_info("loaded: %zu\n", test_firmware ? test_firmware->size : 0); + goto out; + } + pr_info("loaded: %zu\n", test_firmware->size); + rc = count; + +out: mutex_unlock(&test_fw_mutex); kfree(name); - return count; + return rc; } static DEVICE_ATTR_WO(trigger_request); -- cgit v1.2.3 From be4a1326d12cce8df1f57017bf4112eaab437a38 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 9 Dec 2015 14:50:26 -0800 Subject: test: firmware_class: use kstrndup() where appropriate We're essentially just doing an open-coded kstrndup(). The only differences are with what happens after the first '\0' character, but request_firmware() doesn't care about that. Suggested-by: Kees Cook Signed-off-by: Brian Norris Signed-off-by: Shuah Khan --- lib/test_firmware.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 841191061816..690b9c35a274 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -54,10 +54,9 @@ static ssize_t trigger_request_store(struct device *dev, int rc; char *name; - name = kzalloc(count + 1, GFP_KERNEL); + name = kstrndup(buf, count, GFP_KERNEL); if (!name) return -ENOSPC; - memcpy(name, buf, count); pr_info("loading '%s'\n", name); -- cgit v1.2.3 From eb910947c82f93cee6ee00a59a4ed86f12cf6e7f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 9 Dec 2015 14:50:27 -0800 Subject: test: firmware_class: add asynchronous request trigger We might want to test for bugs like that found in commit f9692b2699bd ("firmware: fix possible use after free on name on asynchronous request"), where the asynchronous request API had race conditions. Let's add a simple file that will launch the async request, then wait until it's complete and report the status. It's not a true async test (we're using a mutex + wait_for_completion(), so we can't get more than one going at the same time), but it does help make sure the basic API is sane, and it can catch some class of bugs. Signed-off-by: Brian Norris Cc: Luis R. Rodriguez Acked-by: Kees Cook Signed-off-by: Shuah Khan --- lib/test_firmware.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 690b9c35a274..a3e8ec3fb1c5 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,57 @@ out: } static DEVICE_ATTR_WO(trigger_request); +static DECLARE_COMPLETION(async_fw_done); + +static void trigger_async_request_cb(const struct firmware *fw, void *context) +{ + test_firmware = fw; + complete(&async_fw_done); +} + +static ssize_t trigger_async_request_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc; + char *name; + + name = kstrndup(buf, count, GFP_KERNEL); + if (!name) + return -ENOSPC; + + pr_info("loading '%s'\n", name); + + mutex_lock(&test_fw_mutex); + release_firmware(test_firmware); + test_firmware = NULL; + rc = request_firmware_nowait(THIS_MODULE, 1, name, dev, GFP_KERNEL, + NULL, trigger_async_request_cb); + if (rc) { + pr_info("async load of '%s' failed: %d\n", name, rc); + kfree(name); + goto out; + } + /* Free 'name' ASAP, to test for race conditions */ + kfree(name); + + wait_for_completion(&async_fw_done); + + if (test_firmware) { + pr_info("loaded: %zu\n", test_firmware->size); + rc = count; + } else { + pr_err("failed to async load firmware\n"); + rc = -ENODEV; + } + +out: + mutex_unlock(&test_fw_mutex); + + return rc; +} +static DEVICE_ATTR_WO(trigger_async_request); + static int __init test_firmware_init(void) { int rc; @@ -96,9 +148,20 @@ static int __init test_firmware_init(void) goto dereg; } + rc = device_create_file(test_fw_misc_device.this_device, + &dev_attr_trigger_async_request); + if (rc) { + pr_err("could not create async sysfs interface: %d\n", rc); + goto remove_file; + } + pr_warn("interface ready\n"); return 0; + +remove_file: + device_remove_file(test_fw_misc_device.this_device, + &dev_attr_trigger_async_request); dereg: misc_deregister(&test_fw_misc_device); return rc; @@ -109,6 +172,8 @@ module_init(test_firmware_init); static void __exit test_firmware_exit(void) { release_firmware(test_firmware); + device_remove_file(test_fw_misc_device.this_device, + &dev_attr_trigger_async_request); device_remove_file(test_fw_misc_device.this_device, &dev_attr_trigger_request); misc_deregister(&test_fw_misc_device); -- cgit v1.2.3 From 95ad97554ac81b31139d4fe5ed8757a07087cd90 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:51:21 -0800 Subject: page-flags: introduce page flags policies wrt compound pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a third argument to macros which create function definitions for page flags. This argument defines how page-flags helpers behave on compound functions. For now we define four policies: - PF_ANY: the helper function operates on the page it gets, regardless if it's non-compound, head or tail. - PF_HEAD: the helper function operates on the head page of the compound page if it gets tail page. - PF_NO_TAIL: only head and non-compond pages are acceptable for this helper function. - PF_NO_COMPOUND: only non-compound pages are acceptable for this helper function. For now we use policy PF_ANY for all helpers, which matches current behaviour. We do not enforce the policy for TESTPAGEFLAG, because we have flags checked for random pages all over the kernel. Noticeable exception to this is PageTransHuge() which triggers VM_BUG_ON() for tail page. Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Vlastimil Babka Cc: Christoph Lameter Cc: Naoya Horiguchi Cc: Steve Capper Cc: "Aneesh Kumar K.V" Cc: Johannes Weiner Cc: Michal Hocko Cc: Jerome Marchand Cc: Jérôme Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 6 ++ include/linux/page-flags.h | 166 ++++++++++++++++++++++++++++----------------- lib/Kconfig.debug | 8 +++ 3 files changed, 116 insertions(+), 64 deletions(-) (limited to 'lib') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 772362adf471..053824b0a412 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -56,4 +56,10 @@ void dump_mm(const struct mm_struct *mm); #define VIRTUAL_BUG_ON(cond) do { } while (0) #endif +#ifdef CONFIG_DEBUG_VM_PGFLAGS +#define VM_BUG_ON_PGFLAGS(cond, page) VM_BUG_ON_PAGE(cond, page) +#else +#define VM_BUG_ON_PGFLAGS(cond, page) BUILD_BUG_ON_INVALID(cond) +#endif + #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 83161a22509c..12ab023b67f2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -154,49 +154,80 @@ static inline int PageCompound(struct page *page) return test_bit(PG_head, &page->flags) || PageTail(page); } +/* + * Page flags policies wrt compound pages + * + * PF_ANY: + * the page flag is relevant for small, head and tail pages. + * + * PF_HEAD: + * for compound page all operations related to the page flag applied to + * head page. + * + * PF_NO_TAIL: + * modifications of the page flag must be done on small or head pages, + * checks can be done on tail pages too. + * + * PF_NO_COMPOUND: + * the page flag is not relevant for compound pages. + */ +#define PF_ANY(page, enforce) page +#define PF_HEAD(page, enforce) compound_head(page) +#define PF_NO_TAIL(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ + compound_head(page);}) +#define PF_NO_COMPOUND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ + page;}) + /* * Macros to create function definitions for page flags */ -#define TESTPAGEFLAG(uname, lname) \ -static inline int Page##uname(const struct page *page) \ - { return test_bit(PG_##lname, &page->flags); } +#define TESTPAGEFLAG(uname, lname, policy) \ +static inline int Page##uname(struct page *page) \ + { return test_bit(PG_##lname, &policy(page, 0)->flags); } -#define SETPAGEFLAG(uname, lname) \ +#define SETPAGEFLAG(uname, lname, policy) \ static inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &page->flags); } + { set_bit(PG_##lname, &policy(page, 1)->flags); } -#define CLEARPAGEFLAG(uname, lname) \ +#define CLEARPAGEFLAG(uname, lname, policy) \ static inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &page->flags); } + { clear_bit(PG_##lname, &policy(page, 1)->flags); } -#define __SETPAGEFLAG(uname, lname) \ +#define __SETPAGEFLAG(uname, lname, policy) \ static inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &page->flags); } + { __set_bit(PG_##lname, &policy(page, 1)->flags); } -#define __CLEARPAGEFLAG(uname, lname) \ +#define __CLEARPAGEFLAG(uname, lname, policy) \ static inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &page->flags); } + { __clear_bit(PG_##lname, &policy(page, 1)->flags); } -#define TESTSETFLAG(uname, lname) \ +#define TESTSETFLAG(uname, lname, policy) \ static inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &page->flags); } + { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } -#define TESTCLEARFLAG(uname, lname) \ +#define TESTCLEARFLAG(uname, lname, policy) \ static inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &page->flags); } + { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } -#define __TESTCLEARFLAG(uname, lname) \ +#define __TESTCLEARFLAG(uname, lname, policy) \ static inline int __TestClearPage##uname(struct page *page) \ - { return __test_and_clear_bit(PG_##lname, &page->flags); } + { return __test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } -#define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ - SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) +#define PAGEFLAG(uname, lname, policy) \ + TESTPAGEFLAG(uname, lname, policy) \ + SETPAGEFLAG(uname, lname, policy) \ + CLEARPAGEFLAG(uname, lname, policy) -#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ - __SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname) +#define __PAGEFLAG(uname, lname, policy) \ + TESTPAGEFLAG(uname, lname, policy) \ + __SETPAGEFLAG(uname, lname, policy) \ + __CLEARPAGEFLAG(uname, lname, policy) -#define TESTSCFLAG(uname, lname) \ - TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname) +#define TESTSCFLAG(uname, lname, policy) \ + TESTSETFLAG(uname, lname, policy) \ + TESTCLEARFLAG(uname, lname, policy) #define TESTPAGEFLAG_FALSE(uname) \ static inline int Page##uname(const struct page *page) { return 0; } @@ -225,46 +256,48 @@ static inline int __TestClearPage##uname(struct page *page) { return 0; } #define TESTSCFLAG_FALSE(uname) \ TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) - -TESTPAGEFLAG(Locked, locked) -PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) -PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) - __SETPAGEFLAG(Referenced, referenced) -PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) -PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) -PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) - TESTCLEARFLAG(Active, active) -__PAGEFLAG(Slab, slab) -PAGEFLAG(Checked, checked) /* Used by some filesystems */ -PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ -PAGEFLAG(SavePinned, savepinned); /* Xen */ -PAGEFLAG(Foreign, foreign); /* Xen */ -PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) -PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) - __SETPAGEFLAG(SwapBacked, swapbacked) - -__PAGEFLAG(SlobFree, slob_free) +TESTPAGEFLAG(Locked, locked, PF_ANY) +PAGEFLAG(Error, error, PF_ANY) TESTCLEARFLAG(Error, error, PF_ANY) +PAGEFLAG(Referenced, referenced, PF_ANY) TESTCLEARFLAG(Referenced, referenced, PF_ANY) + __SETPAGEFLAG(Referenced, referenced, PF_ANY) +PAGEFLAG(Dirty, dirty, PF_ANY) TESTSCFLAG(Dirty, dirty, PF_ANY) + __CLEARPAGEFLAG(Dirty, dirty, PF_ANY) +PAGEFLAG(LRU, lru, PF_ANY) __CLEARPAGEFLAG(LRU, lru, PF_ANY) +PAGEFLAG(Active, active, PF_ANY) __CLEARPAGEFLAG(Active, active, PF_ANY) + TESTCLEARFLAG(Active, active, PF_ANY) +__PAGEFLAG(Slab, slab, PF_ANY) +PAGEFLAG(Checked, checked, PF_ANY) /* Used by some filesystems */ +PAGEFLAG(Pinned, pinned, PF_ANY) TESTSCFLAG(Pinned, pinned, PF_ANY) /* Xen */ +PAGEFLAG(SavePinned, savepinned, PF_ANY); /* Xen */ +PAGEFLAG(Foreign, foreign, PF_ANY); /* Xen */ +PAGEFLAG(Reserved, reserved, PF_ANY) __CLEARPAGEFLAG(Reserved, reserved, PF_ANY) +PAGEFLAG(SwapBacked, swapbacked, PF_ANY) + __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_ANY) + __SETPAGEFLAG(SwapBacked, swapbacked, PF_ANY) + +__PAGEFLAG(SlobFree, slob_free, PF_ANY) /* * Private page markings that may be used by the filesystem that owns the page * for its own purposes. * - PG_private and PG_private_2 cause releasepage() and co to be invoked */ -PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) - __CLEARPAGEFLAG(Private, private) -PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2) -PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) +PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY) + __CLEARPAGEFLAG(Private, private, PF_ANY) +PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) +PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) + TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) /* * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ -TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) -PAGEFLAG(MappedToDisk, mappedtodisk) +TESTPAGEFLAG(Writeback, writeback, PF_ANY) TESTSCFLAG(Writeback, writeback, PF_ANY) +PAGEFLAG(MappedToDisk, mappedtodisk, PF_ANY) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ -PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) +PAGEFLAG(Reclaim, reclaim, PF_ANY) TESTCLEARFLAG(Reclaim, reclaim, PF_ANY) +PAGEFLAG(Readahead, reclaim, PF_ANY) TESTCLEARFLAG(Readahead, reclaim, PF_ANY) #ifdef CONFIG_HIGHMEM /* @@ -277,31 +310,32 @@ PAGEFLAG_FALSE(HighMem) #endif #ifdef CONFIG_SWAP -PAGEFLAG(SwapCache, swapcache) +PAGEFLAG(SwapCache, swapcache, PF_ANY) #else PAGEFLAG_FALSE(SwapCache) #endif -PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) - TESTCLEARFLAG(Unevictable, unevictable) +PAGEFLAG(Unevictable, unevictable, PF_ANY) + __CLEARPAGEFLAG(Unevictable, unevictable, PF_ANY) + TESTCLEARFLAG(Unevictable, unevictable, PF_ANY) #ifdef CONFIG_MMU -PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) - TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) +PAGEFLAG(Mlocked, mlocked, PF_ANY) __CLEARPAGEFLAG(Mlocked, mlocked, PF_ANY) + TESTSCFLAG(Mlocked, mlocked, PF_ANY) __TESTCLEARFLAG(Mlocked, mlocked, PF_ANY) #else PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED -PAGEFLAG(Uncached, uncached) +PAGEFLAG(Uncached, uncached, PF_ANY) #else PAGEFLAG_FALSE(Uncached) #endif #ifdef CONFIG_MEMORY_FAILURE -PAGEFLAG(HWPoison, hwpoison) -TESTSCFLAG(HWPoison, hwpoison) +PAGEFLAG(HWPoison, hwpoison, PF_ANY) +TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) #else PAGEFLAG_FALSE(HWPoison) @@ -309,10 +343,10 @@ PAGEFLAG_FALSE(HWPoison) #endif #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) -TESTPAGEFLAG(Young, young) -SETPAGEFLAG(Young, young) -TESTCLEARFLAG(Young, young) -PAGEFLAG(Idle, idle) +TESTPAGEFLAG(Young, young, PF_ANY) +SETPAGEFLAG(Young, young, PF_ANY) +TESTCLEARFLAG(Young, young, PF_ANY) +PAGEFLAG(Idle, idle, PF_ANY) #endif /* @@ -393,7 +427,7 @@ static inline void SetPageUptodate(struct page *page) set_bit(PG_uptodate, &(page)->flags); } -CLEARPAGEFLAG(Uptodate, uptodate) +CLEARPAGEFLAG(Uptodate, uptodate, PF_ANY) int test_clear_page_writeback(struct page *page); int __test_set_page_writeback(struct page *page, bool keep_write); @@ -413,7 +447,7 @@ static inline void set_page_writeback_keepwrite(struct page *page) test_set_page_writeback_keepwrite(page); } -__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) +__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) static inline void set_compound_head(struct page *page, struct page *head) { @@ -615,6 +649,10 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +#undef PF_ANY +#undef PF_HEAD +#undef PF_NO_TAIL +#undef PF_NO_COMPOUND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ee1ac1cc082c..8fbdef1980a5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -580,6 +580,14 @@ config DEBUG_VM_RB If unsure, say N. +config DEBUG_VM_PGFLAGS + bool "Debug page-flags operations" + depends on DEBUG_VM + help + Enables extra validation on page flags operations. + + If unsure, say N. + config DEBUG_VIRTUAL bool "Debug VM translations" depends on DEBUG_KERNEL && X86 -- cgit v1.2.3 From 5c2c2587b13235bf8b5c9027589f22eff68bdf49 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:49 -0800 Subject: mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup get_dev_page() enables paths like get_user_pages() to pin a dynamically mapped pfn-range (devm_memremap_pages()) while the resulting struct page objects are in use. Unlike get_page() it may fail if the device is, or is in the process of being, disabled. While the initial lookup of the range may be an expensive list walk, the result is cached to speed up subsequent lookups which are likely to be in the same mapped range. devm_memremap_pages() now requires a reference counter to be specified at init time. For pmem this means moving request_queue allocation into pmem_alloc() so the existing queue usage counter can track "device pages". ZONE_DEVICE pages always have an elevated count and will never be on an lru reclaim list. That space in 'struct page' can be redirected for other uses, but for safety introduce a poison value that will always trip __list_add() to assert. This allows half of the struct list_head storage to be reclaimed with some assurance to back up the assumption that the page count never goes to zero and a list_add() is never attempted. Signed-off-by: Dan Williams Tested-by: Logan Gunthorpe Cc: Dave Hansen Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/nvdimm/pmem.c | 6 ++++-- include/linux/list.h | 11 ++++++++++ include/linux/memremap.h | 49 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/mm_types.h | 5 +++++ kernel/memremap.c | 53 ++++++++++++++++++++++++++++++++++++++++++++---- lib/list_debug.c | 9 ++++++++ 6 files changed, 125 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 328173d7e1ac..7edf31671dab 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -184,7 +184,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->pfn_flags = PFN_DEV; if (pmem_should_map_pages(dev)) { pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, - NULL); + &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else pmem->virt_addr = (void __pmem *) devm_memremap(dev, @@ -365,6 +365,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) struct vmem_altmap *altmap; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct request_queue *q; phys_addr_t offset; int rc; struct vmem_altmap __altmap = { @@ -406,9 +407,10 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) /* establish pfn range for lookup, and switch to direct map */ pmem = dev_get_drvdata(dev); + q = pmem->pmem_queue; devm_memunmap(dev, (void __force *) pmem->virt_addr); pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, - altmap); + &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { rc = PTR_ERR(pmem->virt_addr); diff --git a/include/linux/list.h b/include/linux/list.h index 5356f4d661a7..30cf4200ab40 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -113,6 +113,17 @@ extern void __list_del_entry(struct list_head *entry); extern void list_del(struct list_head *entry); #endif +#ifdef CONFIG_DEBUG_LIST +/* + * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one + * of the pages it allocates is ever passed to list_add() + */ +extern void list_force_poison(struct list_head *entry); +#else +/* fallback to the less strict LIST_POISON* definitions */ +#define list_force_poison list_del +#endif + /** * list_replace - replace old entry by new one * @old : the element to be replaced diff --git a/include/linux/memremap.h b/include/linux/memremap.h index aa3e82a80d7b..bcaa634139a9 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,8 @@ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include +#include +#include struct resource; struct device; @@ -36,21 +38,25 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations + * @res: physical address range covered by @ref + * @ref: reference count that pins the devm_memremap_pages() mapping * @dev: host device of the mapping for debug */ struct dev_pagemap { struct vmem_altmap *altmap; const struct resource *res; + struct percpu_ref *ref; struct device *dev; }; #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct resource *res, - struct vmem_altmap *altmap); + struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); #else static inline void *devm_memremap_pages(struct device *dev, - struct resource *res, struct vmem_altmap *altmap) + struct resource *res, struct percpu_ref *ref, + struct vmem_altmap *altmap) { /* * Fail attempts to call devm_memremap_pages() without @@ -66,4 +72,43 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) return NULL; } #endif + +/** + * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn + * @pfn: page frame number to lookup page_map + * @pgmap: optional known pgmap that already has a reference + * + * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the + * same mapping. + */ +static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap) +{ + const struct resource *res = pgmap ? pgmap->res : NULL; + resource_size_t phys = PFN_PHYS(pfn); + + /* + * In the cached case we're already holding a live reference so + * we can simply do a blind increment + */ + if (res && phys >= res->start && phys <= res->end) { + percpu_ref_get(pgmap->ref); + return pgmap; + } + + /* fall back to slow path lookup */ + rcu_read_lock(); + pgmap = find_dev_pagemap(phys); + if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) + pgmap = NULL; + rcu_read_unlock(); + + return pgmap; +} + +static inline void put_dev_pagemap(struct dev_pagemap *pgmap) +{ + if (pgmap) + percpu_ref_put(pgmap->ref); +} #endif /* _LINUX_MEMREMAP_H_ */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2dd9c313a8c0..d3ebb9d21a53 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -116,6 +116,11 @@ struct page { * Can be used as a generic list * by the page owner. */ + struct dev_pagemap *pgmap; /* ZONE_DEVICE pages are never on an + * lru or handled by a slab + * allocator, this points to the + * hosting device page map. + */ struct { /* slub per cpu partial pages */ struct page *next; /* Next partial slab */ #ifdef CONFIG_64BIT diff --git a/kernel/memremap.c b/kernel/memremap.c index 562f6471fe90..3eb8944265d5 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -179,6 +179,29 @@ static void pgmap_radix_release(struct resource *res) mutex_unlock(&pgmap_lock); } +static unsigned long pfn_first(struct page_map *page_map) +{ + struct dev_pagemap *pgmap = &page_map->pgmap; + const struct resource *res = &page_map->res; + struct vmem_altmap *altmap = pgmap->altmap; + unsigned long pfn; + + pfn = res->start >> PAGE_SHIFT; + if (altmap) + pfn += vmem_altmap_offset(altmap); + return pfn; +} + +static unsigned long pfn_end(struct page_map *page_map) +{ + const struct resource *res = &page_map->res; + + return (res->start + resource_size(res)) >> PAGE_SHIFT; +} + +#define for_each_device_pfn(pfn, map) \ + for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) + static void devm_memremap_pages_release(struct device *dev, void *data) { struct page_map *page_map = data; @@ -186,6 +209,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data) resource_size_t align_start, align_size; struct dev_pagemap *pgmap = &page_map->pgmap; + if (percpu_ref_tryget_live(pgmap->ref)) { + dev_WARN(dev, "%s: page mapping is still live!\n", __func__); + percpu_ref_put(pgmap->ref); + } + pgmap_radix_release(res); /* pages are dead and unused, undo the arch mapping */ @@ -211,20 +239,26 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) * devm_memremap_pages - remap and provide memmap backing for the given resource * @dev: hosting device for @res * @res: "host memory" address range + * @ref: a live per-cpu reference count * @altmap: optional descriptor for allocating the memmap from @res * - * Note, the expectation is that @res is a host memory range that could - * feasibly be treated as a "System RAM" range, i.e. not a device mmio - * range, but this is not enforced. + * Notes: + * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time + * (or devm release event). + * + * 2/ @res is expected to be a host memory range that could feasibly be + * treated as a "System RAM" range, i.e. not a device mmio range, but + * this is not enforced. */ void *devm_memremap_pages(struct device *dev, struct resource *res, - struct vmem_altmap *altmap) + struct percpu_ref *ref, struct vmem_altmap *altmap) { int is_ram = region_intersects(res->start, resource_size(res), "System RAM"); resource_size_t key, align_start, align_size; struct dev_pagemap *pgmap; struct page_map *page_map; + unsigned long pfn; int error, nid; if (is_ram == REGION_MIXED) { @@ -242,6 +276,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, return ERR_PTR(-ENXIO); } + if (!ref) + return ERR_PTR(-EINVAL); + page_map = devres_alloc_node(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) @@ -255,6 +292,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; } + pgmap->ref = ref; pgmap->res = &page_map->res; mutex_lock(&pgmap_lock); @@ -292,6 +330,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_add_memory; + for_each_device_pfn(pfn, page_map) { + struct page *page = pfn_to_page(pfn); + + /* ZONE_DEVICE pages must never appear on a slab lru */ + list_force_poison(&page->lru); + page->pgmap = pgmap; + } devres_add(dev, page_map); return __va(res->start); diff --git a/lib/list_debug.c b/lib/list_debug.c index 3859bf63561c..3345a089ef7b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -12,6 +12,13 @@ #include #include +static struct list_head force_poison; +void list_force_poison(struct list_head *entry) +{ + entry->next = &force_poison; + entry->prev = &force_poison; +} + /* * Insert a new entry between two known consecutive entries. * @@ -23,6 +30,8 @@ void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { + WARN(new->next == &force_poison || new->prev == &force_poison, + "list_add attempted on force-poisoned entry\n"); WARN(next->prev != prev, "list_add corruption. next->prev should be " "prev (%p), but was %p. (next=%p).\n", -- cgit v1.2.3 From da48d094ce5d7c7dcdad9011648a81c42fd1c2ef Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Jan 2016 16:58:07 -0800 Subject: Kconfig: remove HAVE_LATENCYTOP_SUPPORT As illustrated by commit a3afe70b83fd ("[S390] latencytop s390 support."), HAVE_LATENCYTOP_SUPPORT is defined by an architecture to advertise an implementation of save_stack_trace_tsk. However, as of 9212ddb5eada ("stacktrace: provide save_stack_trace_tsk() weak alias") a dummy implementation is provided if STACKTRACE=y. Given that LATENCYTOP already depends on STACKTRACE_SUPPORT and selects STACKTRACE, we can remove HAVE_LATENCYTOP_SUPPORT altogether. Signed-off-by: Will Deacon Acked-by: Heiko Carstens Cc: Vineet Gupta Cc: Russell King Cc: James Hogan Cc: Michal Simek Cc: Helge Deller Acked-by: Michael Ellerman Cc: "David S. Miller" Cc: Guan Xuetao Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/Kconfig | 3 --- arch/arm/Kconfig | 5 ----- arch/metag/Kconfig | 3 --- arch/microblaze/Kconfig | 3 --- arch/parisc/Kconfig | 3 --- arch/powerpc/Kconfig | 3 --- arch/s390/Kconfig | 3 --- arch/sh/Kconfig | 3 --- arch/sparc/Kconfig | 4 ---- arch/unicore32/Kconfig | 3 --- arch/x86/Kconfig | 3 --- lib/Kconfig.debug | 1 - 12 files changed, 37 deletions(-) (limited to 'lib') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6312f607932f..76dde9db7934 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -73,9 +73,6 @@ config STACKTRACE_SUPPORT def_bool y select STACKTRACE -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y depends on ARC_MMU_V4 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4e489cc5c45e..6a889afa6a2c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -168,11 +168,6 @@ config STACKTRACE_SUPPORT bool default y -config HAVE_LATENCYTOP_SUPPORT - bool - depends on !SMP - default y - config LOCKDEP_SUPPORT bool default y diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 0b389a81c43a..a0fa88da3e31 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -36,9 +36,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 0bce820428fc..5ecd0287a874 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -67,9 +67,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 729f89163bc3..7c34cafdf301 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -79,9 +79,6 @@ config TIME_LOW_RES depends on SMP default y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - # unless you want to implement ACPI on PA-RISC ... ;-) config PM bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7d5a8350f913..94f6c5089e0c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -47,9 +47,6 @@ config STACKTRACE_SUPPORT bool default y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config TRACE_IRQFLAGS_SUPPORT bool default y diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 24490344c30f..dbeeb3a049f2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -10,9 +10,6 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d514df7e04dd..6c391a5d3e5c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -130,9 +130,6 @@ config STACKTRACE_SUPPORT config LOCKDEP_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config ARCH_HAS_ILOG2_U32 def_bool n diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 56442d2d7bbc..3203e42190dd 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -101,10 +101,6 @@ config LOCKDEP_SUPPORT bool default y if SPARC64 -config HAVE_LATENCYTOP_SUPPORT - bool - default y if SPARC64 - config ARCH_HIBERNATION_POSSIBLE def_bool y if SPARC64 diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 5dc4c0a43ccd..877342640b6e 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -34,9 +34,6 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config LOCKDEP_SUPPORT def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 24f362bf3ec6..4a10ba9e95da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -180,9 +180,6 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config MMU def_bool y diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8fbdef1980a5..f75a33f29f6e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1597,7 +1597,6 @@ config FAULT_INJECTION_STACKTRACE_FILTER config LATENCYTOP bool "Latency measuring infrastructure" - depends on HAVE_LATENCYTOP_SUPPORT depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT depends on PROC_FS -- cgit v1.2.3 From cfccde04e28d26e9f102e535b7358bf6cf3bc6fb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:28 -0800 Subject: lib/vsprintf.c: pull out padding code from dentry_name() Pull out the logic in dentry_name() which handles field width space padding, in preparation for reusing it from string(). Rename the widen() helper to move_right(), since it is used for handling the !(flags & LEFT) case. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ac3f9476b776..3f37b478c429 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -541,7 +541,7 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) return buf; } -static void widen(char *buf, char *end, unsigned len, unsigned spaces) +static void move_right(char *buf, char *end, unsigned len, unsigned spaces) { size_t size; if (buf >= end) /* nowhere to put anything */ @@ -559,6 +559,35 @@ static void widen(char *buf, char *end, unsigned len, unsigned spaces) memset(buf, ' ', spaces); } +/* + * Handle field width padding for a string. + * @buf: current buffer position + * @n: length of string + * @end: end of output buffer + * @spec: for field width and flags + * Returns: new buffer position after padding. + */ +static noinline_for_stack +char *widen_string(char *buf, int n, char *end, struct printf_spec spec) +{ + unsigned spaces; + + if (likely(n >= spec.field_width)) + return buf; + /* we want to pad the sucker */ + spaces = spec.field_width - n; + if (!(spec.flags & LEFT)) { + move_right(buf - n, end, n, spaces); + return buf + spaces; + } + while (spaces--) { + if (buf < end) + *buf = ' '; + ++buf; + } + return buf; +} + static noinline_for_stack char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, const char *fmt) @@ -600,20 +629,7 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp *buf = c; } rcu_read_unlock(); - if (n < spec.field_width) { - /* we want to pad the sucker */ - unsigned spaces = spec.field_width - n; - if (!(spec.flags & LEFT)) { - widen(buf - n, end, n, spaces); - return buf + spaces; - } - while (spaces--) { - if (buf < end) - *buf = ' '; - ++buf; - } - } - return buf; + return widen_string(buf, n, end, spec); } #ifdef CONFIG_BLOCK -- cgit v1.2.3 From 95508cfa10503bbe5adb78bd1b2b56a01ab3dd2d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:31 -0800 Subject: lib/vsprintf.c: move string() below widen_string() This is pure code movement, making sure the widen_string() helper is defined before the string() function. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 62 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3f37b478c429..854d597ba23c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -510,37 +510,6 @@ char *number(char *buf, char *end, unsigned long long num, return buf; } -static noinline_for_stack -char *string(char *buf, char *end, const char *s, struct printf_spec spec) -{ - int len, i; - - if ((unsigned long)s < PAGE_SIZE) - s = "(null)"; - - len = strnlen(s, spec.precision); - - if (!(spec.flags & LEFT)) { - while (len < spec.field_width--) { - if (buf < end) - *buf = ' '; - ++buf; - } - } - for (i = 0; i < len; ++i) { - if (buf < end) - *buf = *s; - ++buf; ++s; - } - while (len < spec.field_width--) { - if (buf < end) - *buf = ' '; - ++buf; - } - - return buf; -} - static void move_right(char *buf, char *end, unsigned len, unsigned spaces) { size_t size; @@ -588,6 +557,37 @@ char *widen_string(char *buf, int n, char *end, struct printf_spec spec) return buf; } +static noinline_for_stack +char *string(char *buf, char *end, const char *s, struct printf_spec spec) +{ + int len, i; + + if ((unsigned long)s < PAGE_SIZE) + s = "(null)"; + + len = strnlen(s, spec.precision); + + if (!(spec.flags & LEFT)) { + while (len < spec.field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + for (i = 0; i < len; ++i) { + if (buf < end) + *buf = *s; + ++buf; ++s; + } + while (len < spec.field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + + return buf; +} + static noinline_for_stack char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, const char *fmt) -- cgit v1.2.3 From 34fc8b9076254513e00607134792a1b67483d2cf Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:34 -0800 Subject: lib/vsprintf.c: eliminate potential race in string() If the string corresponding to a %s specifier can change under us, we might end up copying a \0 byte to the output buffer. There might be callers who expect the output buffer to contain a genuine C string whose length is exactly the snprintf return value (assuming truncation hasn't happened or has been checked for). We can avoid this by only passing over the source string once, stopping the first time we meet a nul byte (or when we reach the given precision), and then letting widen_string() handle left/right space padding. As a small bonus, this code reuse also makes the generated code slightly smaller. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 854d597ba23c..c537a73a62e8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -560,32 +560,22 @@ char *widen_string(char *buf, int n, char *end, struct printf_spec spec) static noinline_for_stack char *string(char *buf, char *end, const char *s, struct printf_spec spec) { - int len, i; + int len = 0; + size_t lim = spec.precision; if ((unsigned long)s < PAGE_SIZE) s = "(null)"; - len = strnlen(s, spec.precision); - - if (!(spec.flags & LEFT)) { - while (len < spec.field_width--) { - if (buf < end) - *buf = ' '; - ++buf; - } - } - for (i = 0; i < len; ++i) { - if (buf < end) - *buf = *s; - ++buf; ++s; - } - while (len < spec.field_width--) { + while (lim--) { + char c = *s++; + if (!c) + break; if (buf < end) - *buf = ' '; + *buf = c; ++buf; + ++len; } - - return buf; + return widen_string(buf, len, end, spec); } static noinline_for_stack -- cgit v1.2.3 From d048419311ff16ba420f4b1bdf93a3d74057b53a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:37 -0800 Subject: lib/vsprintf.c: expand field_width to 24 bits Maurizio Lombardi reported a problem [1] with the %pb extension: It doesn't work for sufficiently large bitmaps, since the size is stashed in the field_width field of the struct printf_spec, which is currently an s16. Concretely, this manifested itself in /sys/bus/pseudo/drivers/scsi_debug/map being empty, since the bitmap printer got a size of 0, which is the 16 bit truncation of the actual bitmap size. We do want to keep struct printf_spec at 8 bytes so that it can cheaply be passed by value. The qualifier field is only used for internal bookkeeping in format_decode, so we might as well use a local variable for that. This gives us an additional 8 bits, which we can then use for the field width. To stay in 8 bytes, we need to do a little rearranging and make the type member a bitfield as well. For consistency, change all the members to bit fields. gcc doesn't generate much worse code with these changes (in fact, bloat-o-meter says we save 300 bytes - which I think is a little surprising). I didn't find a BUILD_BUG/compiletime_assertion/... which would work outside function context, so for now I just open-coded it. [1] http://thread.gmane.org/gmane.linux.kernel/2034835 [akpm@linux-foundation.org: avoid open-coded BUILD_BUG_ON] Signed-off-by: Rasmus Villemoes Reported-by: Maurizio Lombardi Acked-by: Tejun Heo Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c537a73a62e8..c09182e07b05 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -383,13 +383,12 @@ enum format_type { }; struct printf_spec { - u8 type; /* format_type enum */ - u8 flags; /* flags to number() */ - u8 base; /* number base, 8, 10 or 16 only */ - u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */ - s16 field_width; /* width of output field */ - s16 precision; /* # of digits/chars */ -}; + unsigned int type:8; /* format_type enum */ + signed int field_width:24; /* width of output field */ + unsigned int flags:8; /* flags to number() */ + unsigned int base:8; /* number base, 8, 10 or 16 only */ + signed int precision:16; /* # of digits/chars */ +} __packed; static noinline_for_stack char *number(char *buf, char *end, unsigned long long num, @@ -403,6 +402,8 @@ char *number(char *buf, char *end, unsigned long long num, int i; bool is_zero = num == 0LL; + BUILD_BUG_ON(sizeof(struct printf_spec) != 8); + /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ locase = (spec.flags & SMALL); @@ -1670,6 +1671,7 @@ static noinline_for_stack int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; + char qualifier; /* we finished early by reading the field width */ if (spec->type == FORMAT_TYPE_WIDTH) { @@ -1752,16 +1754,16 @@ precision: qualifier: /* get the conversion qualifier */ - spec->qualifier = -1; + qualifier = 0; if (*fmt == 'h' || _tolower(*fmt) == 'l' || _tolower(*fmt) == 'z' || *fmt == 't') { - spec->qualifier = *fmt++; - if (unlikely(spec->qualifier == *fmt)) { - if (spec->qualifier == 'l') { - spec->qualifier = 'L'; + qualifier = *fmt++; + if (unlikely(qualifier == *fmt)) { + if (qualifier == 'l') { + qualifier = 'L'; ++fmt; - } else if (spec->qualifier == 'h') { - spec->qualifier = 'H'; + } else if (qualifier == 'h') { + qualifier = 'H'; ++fmt; } } @@ -1818,19 +1820,19 @@ qualifier: return fmt - start; } - if (spec->qualifier == 'L') + if (qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; - else if (spec->qualifier == 'l') { + else if (qualifier == 'l') { BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG); spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN); - } else if (_tolower(spec->qualifier) == 'z') { + } else if (_tolower(qualifier) == 'z') { spec->type = FORMAT_TYPE_SIZE_T; - } else if (spec->qualifier == 't') { + } else if (qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; - } else if (spec->qualifier == 'H') { + } else if (qualifier == 'H') { BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE); spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN); - } else if (spec->qualifier == 'h') { + } else if (qualifier == 'h') { BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT); spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN); } else { -- cgit v1.2.3 From 1c7a8e622e84c9164dd665f5ad4879eac71bdc1e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:41 -0800 Subject: lib/vsprintf.c: help gcc make number() smaller One consequence of the reorganization of struct printf_spec to make field_width 24 bits was that number() gained about 180 bytes. Since spec is never passed to other functions, we can help gcc make number() lose most of that extra weight by using local variables for the field width and precision. Reviewed-by: Andy Shevchenko Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c09182e07b05..8056dff6de64 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -401,6 +401,8 @@ char *number(char *buf, char *end, unsigned long long num, int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); int i; bool is_zero = num == 0LL; + int field_width = spec.field_width; + int precision = spec.precision; BUILD_BUG_ON(sizeof(struct printf_spec) != 8); @@ -414,20 +416,20 @@ char *number(char *buf, char *end, unsigned long long num, if ((signed long long)num < 0) { sign = '-'; num = -(signed long long)num; - spec.field_width--; + field_width--; } else if (spec.flags & PLUS) { sign = '+'; - spec.field_width--; + field_width--; } else if (spec.flags & SPACE) { sign = ' '; - spec.field_width--; + field_width--; } } if (need_pfx) { if (spec.base == 16) - spec.field_width -= 2; + field_width -= 2; else if (!is_zero) - spec.field_width--; + field_width--; } /* generate full string in tmp[], in reverse order */ @@ -449,12 +451,12 @@ char *number(char *buf, char *end, unsigned long long num, } /* printing 100 using %2d gives "100", not "00" */ - if (i > spec.precision) - spec.precision = i; + if (i > precision) + precision = i; /* leading space padding */ - spec.field_width -= spec.precision; + field_width -= precision; if (!(spec.flags & (ZEROPAD | LEFT))) { - while (--spec.field_width >= 0) { + while (--field_width >= 0) { if (buf < end) *buf = ' '; ++buf; @@ -483,14 +485,14 @@ char *number(char *buf, char *end, unsigned long long num, if (!(spec.flags & LEFT)) { char c = ' ' + (spec.flags & ZEROPAD); BUILD_BUG_ON(' ' + ZEROPAD != '0'); - while (--spec.field_width >= 0) { + while (--field_width >= 0) { if (buf < end) *buf = c; ++buf; } } /* hmm even more zero padding? */ - while (i <= --spec.precision) { + while (i <= --precision) { if (buf < end) *buf = '0'; ++buf; @@ -502,7 +504,7 @@ char *number(char *buf, char *end, unsigned long long num, ++buf; } /* trailing space padding */ - while (--spec.field_width >= 0) { + while (--field_width >= 0) { if (buf < end) *buf = ' '; ++buf; -- cgit v1.2.3 From 4d72ba014b4b0913f448ccaaaa2e8b39c54e3738 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:44 -0800 Subject: lib/vsprintf.c: warn about too large precisions and field widths The field width is overloaded to pass some extra information for some %p extensions (e.g. #bits for %pb). But we might silently truncate the passed value when we stash it in struct printf_spec (see e.g. "lib/vsprintf.c: expand field_width to 24 bits"). Hopefully 23 value bits should now be enough for everybody, but if not, let's make some noise. Do the same for the precision. In both cases, clamping seems more sensible than truncating. While, according to POSIX, "A negative precision is taken as if the precision were omitted.", the kernel's printf has always treated that case as if the precision was 0, so we use that as lower bound. For the field width, the smallest representable value is actually -(1<<23), but a negative field width means 'set the LEFT flag and use the absolute value', so we want the absolute value to fit. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8056dff6de64..bdc4985b1ea7 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -389,6 +389,8 @@ struct printf_spec { unsigned int base:8; /* number base, 8, 10 or 16 only */ signed int precision:16; /* # of digits/chars */ } __packed; +#define FIELD_WIDTH_MAX ((1 << 23) - 1) +#define PRECISION_MAX ((1 << 15) - 1) static noinline_for_stack char *number(char *buf, char *end, unsigned long long num, @@ -1845,6 +1847,24 @@ qualifier: return ++fmt - start; } +static void +set_field_width(struct printf_spec *spec, int width) +{ + spec->field_width = width; + if (WARN_ONCE(spec->field_width != width, "field width %d too large", width)) { + spec->field_width = clamp(width, -FIELD_WIDTH_MAX, FIELD_WIDTH_MAX); + } +} + +static void +set_precision(struct printf_spec *spec, int prec) +{ + spec->precision = prec; + if (WARN_ONCE(spec->precision != prec, "precision %d too large", prec)) { + spec->precision = clamp(prec, 0, PRECISION_MAX); + } +} + /** * vsnprintf - Format a string and place it in a buffer * @buf: The buffer to place the result into @@ -1912,11 +1932,11 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) } case FORMAT_TYPE_WIDTH: - spec.field_width = va_arg(args, int); + set_field_width(&spec, va_arg(args, int)); break; case FORMAT_TYPE_PRECISION: - spec.precision = va_arg(args, int); + set_precision(&spec, va_arg(args, int)); break; case FORMAT_TYPE_CHAR: { @@ -2356,11 +2376,11 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } case FORMAT_TYPE_WIDTH: - spec.field_width = get_arg(int); + set_field_width(&spec, get_arg(int)); break; case FORMAT_TYPE_PRECISION: - spec.precision = get_arg(int); + set_precision(&spec, get_arg(int)); break; case FORMAT_TYPE_CHAR: { -- cgit v1.2.3 From 8e2a2bfdb86ecb2421e3dd18d0fbbb42f2d943ad Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:47 -0800 Subject: lib/kasprintf.c: add sanity check to kvasprintf kasprintf relies on being able to replay the formatting and getting the same result (in particular, the same length). This will almost always work, but it is possible that the object pointed to by a %s or %p argument changed under us (so we might get truncated output). Add a somewhat paranoid sanity check and let's see if it ever triggers. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kasprintf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/kasprintf.c b/lib/kasprintf.c index f194e6e593e1..7f6c506a4942 100644 --- a/lib/kasprintf.c +++ b/lib/kasprintf.c @@ -13,19 +13,21 @@ /* Simplified asprintf. */ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) { - unsigned int len; + unsigned int first, second; char *p; va_list aq; va_copy(aq, ap); - len = vsnprintf(NULL, 0, fmt, aq); + first = vsnprintf(NULL, 0, fmt, aq); va_end(aq); - p = kmalloc_track_caller(len+1, gfp); + p = kmalloc_track_caller(first+1, gfp); if (!p) return NULL; - vsnprintf(p, len+1, fmt, ap); + second = vsnprintf(p, first+1, fmt, ap); + WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)", + first, second, fmt); return p; } -- cgit v1.2.3 From fd0515d50ff33865d0f8cdd74510e8bd1aee88ac Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:50 -0800 Subject: lib/test_printf.c: don't BUG BUG is a completely unnecessarily big hammer, and we're more likely to get the internal bug reported if we just pr_err() and ensure the test suite fails. Signed-off-by: Rasmus Villemoes Acked-by: Kees Cook Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index c5a666af9ba5..9232a2add28c 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -91,7 +91,12 @@ __test(const char *expect, int elen, const char *fmt, ...) int rand; char *p; - BUG_ON(elen >= BUF_SIZE); + if (elen >= BUF_SIZE) { + pr_err("error in test suite: expected output length %d too long. Format was '%s'.\n", + elen, fmt); + failed_tests++; + return; + } va_start(ap, fmt); -- cgit v1.2.3 From 331e4deb6dfdac50d3f9c4ccbc41b1427335e212 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:53 -0800 Subject: lib/test_printf.c: check for out-of-bound writes Add a few padding bytes on either side of the test buffer, and check that these (and the part of the buffer not used) are untouched by vsnprintf. Signed-off-by: Rasmus Villemoes Acked-by: Kees Cook Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 9232a2add28c..1ce1a1dd8faf 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -16,6 +16,7 @@ #include #define BUF_SIZE 256 +#define PAD_SIZE 16 #define FILL_CHAR '$' #define PTR1 ((void*)0x01234567) @@ -39,6 +40,7 @@ static unsigned total_tests __initdata; static unsigned failed_tests __initdata; static char *test_buffer __initdata; +static char *alloced_buffer __initdata; static int __printf(4, 0) __init do_test(int bufsize, const char *expect, int elen, @@ -49,7 +51,7 @@ do_test(int bufsize, const char *expect, int elen, total_tests++; - memset(test_buffer, FILL_CHAR, BUF_SIZE); + memset(alloced_buffer, FILL_CHAR, BUF_SIZE + 2*PAD_SIZE); va_copy(aq, ap); ret = vsnprintf(test_buffer, bufsize, fmt, aq); va_end(aq); @@ -60,8 +62,13 @@ do_test(int bufsize, const char *expect, int elen, return 1; } + if (memchr_inv(alloced_buffer, FILL_CHAR, PAD_SIZE)) { + pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote before buffer\n", bufsize, fmt); + return 1; + } + if (!bufsize) { - if (memchr_inv(test_buffer, FILL_CHAR, BUF_SIZE)) { + if (memchr_inv(test_buffer, FILL_CHAR, BUF_SIZE + PAD_SIZE)) { pr_warn("vsnprintf(buf, 0, \"%s\", ...) wrote to buffer\n", fmt); return 1; @@ -76,6 +83,12 @@ do_test(int bufsize, const char *expect, int elen, return 1; } + if (memchr_inv(test_buffer + written + 1, FILL_CHAR, BUF_SIZE + PAD_SIZE - (written + 1))) { + pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote beyond the nul-terminator\n", + bufsize, fmt); + return 1; + } + if (memcmp(test_buffer, expect, written)) { pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n", bufsize, fmt, test_buffer, written, expect); @@ -342,16 +355,17 @@ test_pointer(void) static int __init test_printf_init(void) { - test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); - if (!test_buffer) + alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL); + if (!alloced_buffer) return -ENOMEM; + test_buffer = alloced_buffer + PAD_SIZE; test_basic(); test_number(); test_string(); test_pointer(); - kfree(test_buffer); + kfree(alloced_buffer); if (failed_tests == 0) pr_info("all %u tests passed\n", total_tests); -- cgit v1.2.3 From f176eb4ce9d433184178d37a598ffdfe1cee1463 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:56 -0800 Subject: lib/test_printf.c: test precision quirks The kernel's printf doesn't follow the standards in a few corner cases (which are probably mostly irrelevant). Add tests that document the current behaviour. Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 1ce1a1dd8faf..3393d667c6b8 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -166,14 +166,23 @@ test_string(void) test("", "%s%.0s", "", "123"); test("ABCD|abc|123", "%s|%.3s|%.*s", "ABCD", "abcdef", 3, "123456"); test("1 | 2|3 | 4|5 ", "%-3s|%3s|%-*s|%*s|%*s", "1", "2", 3, "3", 3, "4", -3, "5"); + test("1234 ", "%-10.4s", "123456"); + test(" 1234", "%10.4s", "123456"); /* - * POSIX and C99 say that a missing precision should be - * treated as a precision of 0. However, the kernel's printf - * implementation treats this case as if the . wasn't - * present. Let's add a test case documenting the current - * behaviour; should anyone ever feel the need to follow the - * standards more closely, this can be revisited. + * POSIX and C99 say that a negative precision (which is only + * possible to pass via a * argument) should be treated as if + * the precision wasn't present, and that if the precision is + * omitted (as in %.s), the precision should be taken to be + * 0. However, the kernel's printf behave exactly opposite, + * treating a negative precision as 0 and treating an omitted + * precision specifier as if no precision was given. + * + * These test cases document the current behaviour; should + * anyone ever feel the need to follow the standards more + * closely, this can be revisited. */ + test(" ", "%4.*s", -5, "123456"); + test("123456", "%.s", "123456"); test("a||", "%.s|%.0s|%.*s", "a", "b", 0, "c"); test("a | | ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c"); } -- cgit v1.2.3 From 1ca8e8ebe991ff628d7e03ad4b5a11817a9655d5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:58:59 -0800 Subject: lib/test_printf.c: add a few number() tests This adds a few tests to test_number, one of which serves to document another deviation from POSIX/C99 (printing 0 with an explicit precision of 0). Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 3393d667c6b8..b23ce824766f 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -158,6 +158,30 @@ test_number(void) test("0x1234abcd ", "%#-12x", 0x1234abcd); test(" 0x1234abcd", "%#12x", 0x1234abcd); test("0|001| 12|+123| 1234|-123|-1234", "%d|%03d|%3d|%+d|% d|%+d|% d", 0, 1, 12, 123, 1234, -123, -1234); + test("0|1|1|128|255", "%hhu|%hhu|%hhu|%hhu|%hhu", 0, 1, 257, 128, -1); + test("0|1|1|-128|-1", "%hhd|%hhd|%hhd|%hhd|%hhd", 0, 1, 257, 128, -1); + test("2015122420151225", "%ho%ho%#ho", 1037, 5282, -11627); + /* + * POSIX/C99: »The result of converting zero with an explicit + * precision of zero shall be no characters.« Hence the output + * from the below test should really be "00|0||| ". However, + * the kernel's printf also produces a single 0 in that + * case. This test case simply documents the current + * behaviour. + */ + test("00|0|0|0|0", "%.2d|%.1d|%.0d|%.*d|%1.0d", 0, 0, 0, 0, 0, 0); +#ifndef __CHAR_UNSIGNED__ + { + /* + * Passing a 'char' to a %02x specifier doesn't do + * what was presumably the intention when char is + * signed and the value is negative. One must either & + * with 0xff or cast to u8. + */ + char val = -16; + test("0xfffffff0|0xf0|0xf0", "%#02x|%#02x|%#02x", val, val & 0xff, (u8)val); + } +#endif } static void __init -- cgit v1.2.3 From b79a7db37d560c9e4b6fdb5314cb8b226a2d6567 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:59:02 -0800 Subject: lib/test_printf.c: account for kvasprintf tests These should also count as performed tests. Signed-off-by: Rasmus Villemoes Acked-by: Kees Cook Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index b23ce824766f..3e21170d327d 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -127,6 +127,7 @@ __test(const char *expect, int elen, const char *fmt, ...) p = kvasprintf(GFP_KERNEL, fmt, ap); if (p) { + total_tests++; if (memcmp(p, expect, elen+1)) { pr_warn("kvasprintf(..., \"%s\", ...) returned '%s', expected '%s'\n", fmt, p, expect); -- cgit v1.2.3 From 857cca4d565d85857597ccf0b6f72cf0f06e046c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:59:06 -0800 Subject: lib/test_printf.c: add test for large bitmaps Following "lib/vsprintf.c: expand field_width to 24 bits", let's add a test to see that we now actually support bitmaps with 65536 bits. Signed-off-by: Rasmus Villemoes Acked-by: Kees Cook Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 3e21170d327d..60740c10c3e8 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -340,6 +341,20 @@ struct_clk(void) { } +static void __init +large_bitmap(void) +{ + const int nbits = 1 << 16; + unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), GFP_KERNEL); + if (!bits) + return; + + bitmap_set(bits, 1, 20); + bitmap_set(bits, 60000, 15); + test("1-20,60000-60014", "%*pbl", nbits, bits); + kfree(bits); +} + static void __init bitmap(void) { @@ -359,6 +374,8 @@ bitmap(void) bitmap_fill(bits, 20); test("fffff|fffff", "%20pb|%*pb", bits, 20, bits); test("0-19|0-19", "%20pbl|%*pbl", bits, 20, bits); + + large_bitmap(); } static void __init -- cgit v1.2.3 From 251c72345579b82df59eddfa184af53d6830959c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 15 Jan 2016 16:59:09 -0800 Subject: lib/test_printf.c: test dentry printing Signed-off-by: Rasmus Villemoes Cc: Al Viro Cc: Andy Shevchenko Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Maurizio Lombardi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_printf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 60740c10c3e8..4f6ae60433bc 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -326,9 +327,35 @@ uuid(void) test("03020100-0504-0706-0809-0A0B0C0D0E0F", "%pUL", uuid); } +static struct dentry test_dentry[4] __initdata = { + { .d_parent = &test_dentry[0], + .d_name = QSTR_INIT(test_dentry[0].d_iname, 3), + .d_iname = "foo" }, + { .d_parent = &test_dentry[0], + .d_name = QSTR_INIT(test_dentry[1].d_iname, 5), + .d_iname = "bravo" }, + { .d_parent = &test_dentry[1], + .d_name = QSTR_INIT(test_dentry[2].d_iname, 4), + .d_iname = "alfa" }, + { .d_parent = &test_dentry[2], + .d_name = QSTR_INIT(test_dentry[3].d_iname, 5), + .d_iname = "romeo" }, +}; + static void __init dentry(void) { + test("foo", "%pd", &test_dentry[0]); + test("foo", "%pd2", &test_dentry[0]); + + test("romeo", "%pd", &test_dentry[3]); + test("alfa/romeo", "%pd2", &test_dentry[3]); + test("bravo/alfa/romeo", "%pd3", &test_dentry[3]); + test("/bravo/alfa/romeo", "%pd4", &test_dentry[3]); + test("/bravo/alfa", "%pd4", &test_dentry[2]); + + test("bravo/alfa |bravo/alfa ", "%-12pd2|%*pd2", &test_dentry[2], -12, &test_dentry[2]); + test(" bravo/alfa| bravo/alfa", "%12pd2|%*pd2", &test_dentry[2], 12, &test_dentry[2]); } static void __init -- cgit v1.2.3 From 3cab1e711297c4fbc526ca9b802ef61894c06545 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Jan 2016 16:59:18 -0800 Subject: lib/vsprintf: refactor duplicate code to special_hex_number() special_hex_number() is a helper to print a fixed size type in a hex format with '0x' prefix, zero padding, and small letters. In the module we have already several copies of such code. Consolidate them under special_hex_number() helper. There are couple of differences though. It seems nobody cared about the output in case of CONFIG_KALLSYMS=n, when printing symbol address, because the asked field width is not enough to care last 2 characters in the string represantation of the pointer. Fixed here. The %pNF specifier used to be allowed with a specific field width, though there is neither any user of it nor mention the possibility in the documentation. Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index bdc4985b1ea7..b30203b00adf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -515,6 +515,20 @@ char *number(char *buf, char *end, unsigned long long num, return buf; } +static noinline_for_stack +char *special_hex_number(char *buf, char *end, unsigned long long num, int size) +{ + struct printf_spec spec; + + spec.type = FORMAT_TYPE_PTR; + spec.field_width = 2 + 2 * size; /* 0x + hex */ + spec.flags = SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + spec.precision = -1; + + return number(buf, end, num, spec); +} + static void move_right(char *buf, char *end, unsigned len, unsigned spaces) { size_t size; @@ -670,11 +684,7 @@ char *symbol_string(char *buf, char *end, void *ptr, return string(buf, end, sym, spec); #else - spec.field_width = 2 * sizeof(void *); - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.base = 16; - - return number(buf, end, value, spec); + return special_hex_number(buf, end, value, sizeof(void *)); #endif } @@ -1336,39 +1346,33 @@ char *uuid_string(char *buf, char *end, const u8 *addr, } static -char *netdev_feature_string(char *buf, char *end, const u8 *addr, - struct printf_spec spec) +char *netdev_feature_string(char *buf, char *end, const void *addr) { - spec.flags |= SPECIAL | SMALL | ZEROPAD; - if (spec.field_width == -1) - spec.field_width = 2 + 2 * sizeof(netdev_features_t); - spec.base = 16; + unsigned long long num = *(const netdev_features_t *)addr; + int size = sizeof(netdev_features_t); - return number(buf, end, *(const netdev_features_t *)addr, spec); + return special_hex_number(buf, end, num, size); } static noinline_for_stack -char *address_val(char *buf, char *end, const void *addr, - struct printf_spec spec, const char *fmt) +char *address_val(char *buf, char *end, const void *addr, const char *fmt) { unsigned long long num; - - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.base = 16; + int size; switch (fmt[1]) { case 'd': num = *(const dma_addr_t *)addr; - spec.field_width = sizeof(dma_addr_t) * 2 + 2; + size = sizeof(dma_addr_t); break; case 'p': default: num = *(const phys_addr_t *)addr; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; + size = sizeof(phys_addr_t); break; } - return number(buf, end, num, spec); + return special_hex_number(buf, end, num, size); } static noinline_for_stack @@ -1387,10 +1391,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, #ifdef CONFIG_COMMON_CLK return string(buf, end, __clk_get_name(clk), spec); #else - spec.base = 16; - spec.field_width = sizeof(unsigned long) * 2 + 2; - spec.flags |= SPECIAL | SMALL | ZEROPAD; - return number(buf, end, (unsigned long)clk, spec); + return special_hex_number(buf, end, (unsigned long)clk, sizeof(unsigned long)); #endif } } @@ -1622,11 +1623,11 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'N': switch (fmt[1]) { case 'F': - return netdev_feature_string(buf, end, ptr, spec); + return netdev_feature_string(buf, end, ptr); } break; case 'a': - return address_val(buf, end, ptr, spec, fmt); + return address_val(buf, end, ptr, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'C': -- cgit v1.2.3 From 5b17aecfcdd3dc82c237677f5efc9a44deeaa39e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Jan 2016 16:59:20 -0800 Subject: lib/vsprintf: factor out %pN[F] handler as netdev_bits() Move switch case to the netdev_features_string() and rename it to netdev_bits(). In the future we can extend it as needed. Here we replace the fallback of %pN from '%p' with possible flags to sticter '0x%p' without any flags variation. Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b30203b00adf..48ff9c36644d 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1345,11 +1345,22 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } -static -char *netdev_feature_string(char *buf, char *end, const void *addr) +static noinline_for_stack +char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt) { - unsigned long long num = *(const netdev_features_t *)addr; - int size = sizeof(netdev_features_t); + unsigned long long num; + int size; + + switch (fmt[1]) { + case 'F': + num = *(const netdev_features_t *)addr; + size = sizeof(netdev_features_t); + break; + default: + num = (unsigned long)addr; + size = sizeof(unsigned long); + break; + } return special_hex_number(buf, end, num, size); } @@ -1621,11 +1632,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, break; case 'N': - switch (fmt[1]) { - case 'F': - return netdev_feature_string(buf, end, ptr); - } - break; + return netdev_bits(buf, end, ptr, fmt); case 'a': return address_val(buf, end, ptr, fmt); case 'd': -- cgit v1.2.3 From 5b57167749274961baf15ed1f05a4996b3ab0487 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Jan 2016 23:24:02 +0100 Subject: lib: sw842: select crc32 The sw842 library code was merged in linux-4.1 and causes a very rare randconfig failure when CONFIG_CRC32 is not set: lib/built-in.o: In function `sw842_compress': oid_registry.c:(.text+0x12ddc): undefined reference to `crc32_be' lib/built-in.o: In function `sw842_decompress': oid_registry.c:(.text+0x137e4): undefined reference to `crc32_be' This adds an explict 'select CRC32' statement, similar to what the other users of the crc32 code have. In practice, CRC32 is always enabled anyway because over 100 other symbols select it. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Fixes: 2da572c959dd ("lib: add software 842 compression/decompression") Acked-by: Dan Streetman Signed-off-by: Herbert Xu --- lib/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index f0df318104e7..1a48744253d7 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -210,9 +210,11 @@ config RANDOM32_SELFTEST # compression support is select'ed if needed # config 842_COMPRESS + select CRC32 tristate config 842_DECOMPRESS + select CRC32 tristate config ZLIB_INFLATE -- cgit v1.2.3 From fd7f6727102a1ccf6b4c1dfcc631f9b546526b26 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 18 Jan 2016 17:06:05 +0100 Subject: crypto: crc32c - Fix crc32c soft dependency I don't think it makes sense for a module to have a soft dependency on itself. This seems quite cyclic by nature and I can't see what purpose it could serve. OTOH libcrc32c calls crypto_alloc_shash("crc32c", 0, 0) so it pretty much assumes that some incarnation of the "crc32c" hash algorithm has been loaded. Therefore it makes sense to have the soft dependency there (as crc-t10dif does.) Cc: stable@vger.kernel.org Cc: Tim Chen Cc: "David S. Miller" Signed-off-by: Jean Delvare Signed-off-by: Herbert Xu --- crypto/crc32c_generic.c | 1 - lib/libcrc32c.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 06f1b60f02b2..4c0a0e271876 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -172,4 +172,3 @@ MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32c"); MODULE_ALIAS_CRYPTO("crc32c-generic"); -MODULE_SOFTDEP("pre: crc32c"); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 6a08ce7d6adc..acf9da449f81 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -74,3 +74,4 @@ module_exit(libcrc32c_mod_fini); MODULE_AUTHOR("Clay Haapala "); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); -- cgit v1.2.3 From 2ee177e945e000a40b520ea667fd4fbe7a61634e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 31 Dec 2015 09:56:03 +0100 Subject: irq_poll: Fix irq_poll_sched() The IRQ_POLL_F_SCHED bit is set as long as polling is ongoing. This means that irq_poll_sched() must proceed if this bit has not yet been set. Fixes: commit ea51190c0315 ("irq_poll: fold irq_poll_sched_prep into irq_poll_sched"). Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- lib/irq_poll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2836620e889f..836f7db4e548 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -29,7 +29,7 @@ void irq_poll_sched(struct irq_poll *iop) if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) return; - if (!test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) return; local_irq_save(flags); -- cgit v1.2.3 From 290e0e0f2b54b2eed5018f921c585bb694f9e68a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 20 Jan 2016 14:58:06 -0800 Subject: lib/libcrc32c.c: fix build warning Fix the following build warning: lib/libcrc32c.c:42:5: warning: no previous prototype for "crc32c" [-Wmissing-prototypes] u32 crc32c(u32 crc, const void *address, unsigned int length) ^ Signed-off-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/libcrc32c.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 6a08ce7d6adc..31ce853fbfb1 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -36,6 +36,7 @@ #include #include #include +#include static struct crypto_shash *tfm; -- cgit v1.2.3 From 564b026fbd0d28e9f70fb3831293d2922bb7855b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 20 Jan 2016 14:58:29 -0800 Subject: string_helpers: fix precision loss for some inputs It was noticed that we lose precision in the final calculation for some inputs. The most egregious example is size=3000 blk_size=1900 in units of 10 should yield 5.70 MB but in fact yields 3.00 MB (oops). This is because the current algorithm doesn't correctly account for all the remainders in the logarithms. Fix this by doing a correct calculation in the remainders based on napier's algorithm. Additionally, now we have the correct result, we have to account for arithmetic rounding because we're printing 3 digits of precision. This means that if the fourth digit is five or greater, we have to round up, so add a section to ensure correct rounding. Finally account for all possible inputs correctly, including zero for block size. Fixes: b9f28d863594c429e1df35a0474d2663ca28b307 Signed-off-by: James Bottomley Reported-by: Vitaly Kuznetsov Cc: [delay until after 4.4 release] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string_helpers.c | 63 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 5939f63d90cd..5c88204b6f1f 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -43,50 +43,73 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, [STRING_UNITS_10] = 1000, [STRING_UNITS_2] = 1024, }; - int i, j; - u32 remainder = 0, sf_cap, exp; + static const unsigned int rounding[] = { 500, 50, 5 }; + int i = 0, j; + u32 remainder = 0, sf_cap; char tmp[8]; const char *unit; tmp[0] = '\0'; - i = 0; - if (!size) + + if (blk_size == 0) + size = 0; + if (size == 0) goto out; - while (blk_size >= divisor[units]) { - remainder = do_div(blk_size, divisor[units]); + /* This is Napier's algorithm. Reduce the original block size to + * + * coefficient * divisor[units]^i + * + * we do the reduction so both coefficients are just under 32 bits so + * that multiplying them together won't overflow 64 bits and we keep + * as much precision as possible in the numbers. + * + * Note: it's safe to throw away the remainders here because all the + * precision is in the coefficients. + */ + while (blk_size >> 32) { + do_div(blk_size, divisor[units]); i++; } - exp = divisor[units] / (u32)blk_size; - /* - * size must be strictly greater than exp here to ensure that remainder - * is greater than divisor[units] coming out of the if below. - */ - if (size > exp) { - remainder = do_div(size, divisor[units]); - remainder *= blk_size; + while (size >> 32) { + do_div(size, divisor[units]); i++; - } else { - remainder *= size; } + /* now perform the actual multiplication keeping i as the sum of the + * two logarithms */ size *= blk_size; - size += remainder / divisor[units]; - remainder %= divisor[units]; + /* and logarithmically reduce it until it's just under the divisor */ while (size >= divisor[units]) { remainder = do_div(size, divisor[units]); i++; } + /* work out in j how many digits of precision we need from the + * remainder */ sf_cap = size; for (j = 0; sf_cap*10 < 1000; j++) sf_cap *= 10; - if (j) { + if (units == STRING_UNITS_2) { + /* express the remainder as a decimal. It's currently the + * numerator of a fraction whose denominator is + * divisor[units], which is 1 << 10 for STRING_UNITS_2 */ remainder *= 1000; - remainder /= divisor[units]; + remainder >>= 10; + } + + /* add a 5 to the digit below what will be printed to ensure + * an arithmetical round up and carry it through to size */ + remainder += rounding[j]; + if (remainder >= 1000) { + remainder -= 1000; + size += 1; + } + + if (j) { snprintf(tmp, sizeof(tmp), ".%03u", remainder); tmp[j+1] = '\0'; } -- cgit v1.2.3 From a9aec5881b9d4aca184b29d33484a6a58d23f7f2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Jan 2016 14:58:35 -0800 Subject: lib/iomap_copy.c: add __ioread32_copy() Some drivers need to read data out of iomem areas 32-bits at a time. Add an API to do this. Signed-off-by: Stephen Boyd Cc: Bjorn Andersson Cc: Cc: David Howells Cc: Hauke Mehrtens Cc: Paul Walmsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 1 + lib/iomap_copy.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'lib') diff --git a/include/linux/io.h b/include/linux/io.h index fffd88d7f426..32403b5716e5 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -29,6 +29,7 @@ struct device; struct resource; __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +void __ioread32_copy(void *to, const void __iomem *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c index 4527e751b5e0..b8f1d6cbb200 100644 --- a/lib/iomap_copy.c +++ b/lib/iomap_copy.c @@ -41,6 +41,27 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to, } EXPORT_SYMBOL_GPL(__iowrite32_copy); +/** + * __ioread32_copy - copy data from MMIO space, in 32-bit units + * @to: destination (must be 32-bit aligned) + * @from: source, in MMIO space (must be 32-bit aligned) + * @count: number of 32-bit quantities to copy + * + * Copy data from MMIO space to kernel space, in units of 32 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + */ +void __ioread32_copy(void *to, const void __iomem *from, size_t count) +{ + u32 *dst = to; + const u32 __iomem *src = from; + const u32 __iomem *end = src + count; + + while (src < end) + *dst++ = __raw_readl(src++); +} +EXPORT_SYMBOL_GPL(__ioread32_copy); + /** * __iowrite64_copy - copy data to MMIO space, in 64-bit or 32-bit units * @to: destination, in MMIO space (must be 64-bit aligned) -- cgit v1.2.3 From 60b2e8f4f71a21b96306a8a3ea4dd345ea3bfb46 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:44 -0800 Subject: test_hexdump: rename to test_hexdump The test suite currently doesn't cover many corner cases when hex_dump_to_buffer() runs into overflow. Refactor and amend test suite to cover most of the cases. This patch (of 9): Just to follow the scheme that most of the test modules are using. There is no fuctional change. Signed-off-by: Andy Shevchenko Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 2 +- lib/test-hexdump.c | 180 ----------------------------------------------------- lib/test_hexdump.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 181 deletions(-) delete mode 100644 lib/test-hexdump.c create mode 100644 lib/test_hexdump.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 180dd4d0dd41..fdeb7e304554 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o -obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o +obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c deleted file mode 100644 index 5241df36eedf..000000000000 --- a/lib/test-hexdump.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Test cases for lib/hexdump.c module. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include - -static const unsigned char data_b[] = { - '\xbe', '\x32', '\xdb', '\x7b', '\x0a', '\x18', '\x93', '\xb2', /* 00 - 07 */ - '\x70', '\xba', '\xc4', '\x24', '\x7d', '\x83', '\x34', '\x9b', /* 08 - 0f */ - '\xa6', '\x9c', '\x31', '\xad', '\x9c', '\x0f', '\xac', '\xe9', /* 10 - 17 */ - '\x4c', '\xd1', '\x19', '\x99', '\x43', '\xb1', '\xaf', '\x0c', /* 18 - 1f */ -}; - -static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; - -static const char * const test_data_1_le[] __initconst = { - "be", "32", "db", "7b", "0a", "18", "93", "b2", - "70", "ba", "c4", "24", "7d", "83", "34", "9b", - "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", - "4c", "d1", "19", "99", "43", "b1", "af", "0c", -}; - -static const char * const test_data_2_le[] __initconst = { - "32be", "7bdb", "180a", "b293", - "ba70", "24c4", "837d", "9b34", - "9ca6", "ad31", "0f9c", "e9ac", - "d14c", "9919", "b143", "0caf", -}; - -static const char * const test_data_4_le[] __initconst = { - "7bdb32be", "b293180a", "24c4ba70", "9b34837d", - "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", -}; - -static const char * const test_data_8_le[] __initconst = { - "b293180a7bdb32be", "9b34837d24c4ba70", - "e9ac0f9cad319ca6", "0cafb1439919d14c", -}; - -static void __init test_hexdump(size_t len, int rowsize, int groupsize, - bool ascii) -{ - char test[32 * 3 + 2 + 32 + 1]; - char real[32 * 3 + 2 + 32 + 1]; - char *p; - const char * const *result; - size_t l = len; - int gs = groupsize, rs = rowsize; - unsigned int i; - - hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii); - - if (rs != 16 && rs != 32) - rs = 16; - - if (l > rs) - l = rs; - - if (!is_power_of_2(gs) || gs > 8 || (len % gs != 0)) - gs = 1; - - if (gs == 8) - result = test_data_8_le; - else if (gs == 4) - result = test_data_4_le; - else if (gs == 2) - result = test_data_2_le; - else - result = test_data_1_le; - - memset(test, ' ', sizeof(test)); - - /* hex dump */ - p = test; - for (i = 0; i < l / gs; i++) { - const char *q = *result++; - size_t amount = strlen(q); - - strncpy(p, q, amount); - p += amount + 1; - } - if (i) - p--; - - /* ASCII part */ - if (ascii) { - p = test + rs * 2 + rs / gs + 1; - strncpy(p, data_a, l); - p += l; - } - - *p = '\0'; - - if (strcmp(test, real)) { - pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); - pr_err("Result: '%s'\n", real); - pr_err("Expect: '%s'\n", test); - } -} - -static void __init test_hexdump_set(int rowsize, bool ascii) -{ - size_t d = min_t(size_t, sizeof(data_b), rowsize); - size_t len = get_random_int() % d + 1; - - test_hexdump(len, rowsize, 4, ascii); - test_hexdump(len, rowsize, 2, ascii); - test_hexdump(len, rowsize, 8, ascii); - test_hexdump(len, rowsize, 1, ascii); -} - -static void __init test_hexdump_overflow(bool ascii) -{ - char buf[56]; - const char *t = test_data_1_le[0]; - size_t l = get_random_int() % sizeof(buf); - bool a; - int e, r; - - memset(buf, ' ', sizeof(buf)); - - r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); - - if (ascii) - e = 50; - else - e = 2; - buf[e + 2] = '\0'; - - if (!l) { - a = r == e && buf[0] == ' '; - } else if (l < 3) { - a = r == e && buf[0] == '\0'; - } else if (l < 4) { - a = r == e && !strcmp(buf, t); - } else if (ascii) { - if (l < 51) - a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' '; - else - a = r == e && buf[50] == '\0' && buf[49] == '.'; - } else { - a = r == e && buf[e] == '\0'; - } - - if (!a) { - pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf)); - pr_err("Result: '%s'\n", buf); - } -} - -static int __init test_hexdump_init(void) -{ - unsigned int i; - int rowsize; - - pr_info("Running tests...\n"); - - rowsize = (get_random_int() % 2 + 1) * 16; - for (i = 0; i < 16; i++) - test_hexdump_set(rowsize, false); - - rowsize = (get_random_int() % 2 + 1) * 16; - for (i = 0; i < 16; i++) - test_hexdump_set(rowsize, true); - - for (i = 0; i < 16; i++) - test_hexdump_overflow(false); - - for (i = 0; i < 16; i++) - test_hexdump_overflow(true); - - return -EINVAL; -} -module_init(test_hexdump_init); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c new file mode 100644 index 000000000000..5241df36eedf --- /dev/null +++ b/lib/test_hexdump.c @@ -0,0 +1,180 @@ +/* + * Test cases for lib/hexdump.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +static const unsigned char data_b[] = { + '\xbe', '\x32', '\xdb', '\x7b', '\x0a', '\x18', '\x93', '\xb2', /* 00 - 07 */ + '\x70', '\xba', '\xc4', '\x24', '\x7d', '\x83', '\x34', '\x9b', /* 08 - 0f */ + '\xa6', '\x9c', '\x31', '\xad', '\x9c', '\x0f', '\xac', '\xe9', /* 10 - 17 */ + '\x4c', '\xd1', '\x19', '\x99', '\x43', '\xb1', '\xaf', '\x0c', /* 18 - 1f */ +}; + +static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; + +static const char * const test_data_1_le[] __initconst = { + "be", "32", "db", "7b", "0a", "18", "93", "b2", + "70", "ba", "c4", "24", "7d", "83", "34", "9b", + "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", + "4c", "d1", "19", "99", "43", "b1", "af", "0c", +}; + +static const char * const test_data_2_le[] __initconst = { + "32be", "7bdb", "180a", "b293", + "ba70", "24c4", "837d", "9b34", + "9ca6", "ad31", "0f9c", "e9ac", + "d14c", "9919", "b143", "0caf", +}; + +static const char * const test_data_4_le[] __initconst = { + "7bdb32be", "b293180a", "24c4ba70", "9b34837d", + "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", +}; + +static const char * const test_data_8_le[] __initconst = { + "b293180a7bdb32be", "9b34837d24c4ba70", + "e9ac0f9cad319ca6", "0cafb1439919d14c", +}; + +static void __init test_hexdump(size_t len, int rowsize, int groupsize, + bool ascii) +{ + char test[32 * 3 + 2 + 32 + 1]; + char real[32 * 3 + 2 + 32 + 1]; + char *p; + const char * const *result; + size_t l = len; + int gs = groupsize, rs = rowsize; + unsigned int i; + + hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii); + + if (rs != 16 && rs != 32) + rs = 16; + + if (l > rs) + l = rs; + + if (!is_power_of_2(gs) || gs > 8 || (len % gs != 0)) + gs = 1; + + if (gs == 8) + result = test_data_8_le; + else if (gs == 4) + result = test_data_4_le; + else if (gs == 2) + result = test_data_2_le; + else + result = test_data_1_le; + + memset(test, ' ', sizeof(test)); + + /* hex dump */ + p = test; + for (i = 0; i < l / gs; i++) { + const char *q = *result++; + size_t amount = strlen(q); + + strncpy(p, q, amount); + p += amount + 1; + } + if (i) + p--; + + /* ASCII part */ + if (ascii) { + p = test + rs * 2 + rs / gs + 1; + strncpy(p, data_a, l); + p += l; + } + + *p = '\0'; + + if (strcmp(test, real)) { + pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); + pr_err("Result: '%s'\n", real); + pr_err("Expect: '%s'\n", test); + } +} + +static void __init test_hexdump_set(int rowsize, bool ascii) +{ + size_t d = min_t(size_t, sizeof(data_b), rowsize); + size_t len = get_random_int() % d + 1; + + test_hexdump(len, rowsize, 4, ascii); + test_hexdump(len, rowsize, 2, ascii); + test_hexdump(len, rowsize, 8, ascii); + test_hexdump(len, rowsize, 1, ascii); +} + +static void __init test_hexdump_overflow(bool ascii) +{ + char buf[56]; + const char *t = test_data_1_le[0]; + size_t l = get_random_int() % sizeof(buf); + bool a; + int e, r; + + memset(buf, ' ', sizeof(buf)); + + r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); + + if (ascii) + e = 50; + else + e = 2; + buf[e + 2] = '\0'; + + if (!l) { + a = r == e && buf[0] == ' '; + } else if (l < 3) { + a = r == e && buf[0] == '\0'; + } else if (l < 4) { + a = r == e && !strcmp(buf, t); + } else if (ascii) { + if (l < 51) + a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' '; + else + a = r == e && buf[50] == '\0' && buf[49] == '.'; + } else { + a = r == e && buf[e] == '\0'; + } + + if (!a) { + pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf)); + pr_err("Result: '%s'\n", buf); + } +} + +static int __init test_hexdump_init(void) +{ + unsigned int i; + int rowsize; + + pr_info("Running tests...\n"); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, false); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, true); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(false); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(true); + + return -EINVAL; +} +module_init(test_hexdump_init); +MODULE_LICENSE("Dual BSD/GPL"); -- cgit v1.2.3 From 87977ca6bcd051b8bd20adff0a023548ff25902c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:47 -0800 Subject: test_hexdump: introduce test_hexdump_prepare_test() helper The function prepares the expected result in the provided buffer. Signed-off-by: Andy Shevchenko Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 5241df36eedf..ed7c6a704f34 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -42,19 +42,16 @@ static const char * const test_data_8_le[] __initconst = { "e9ac0f9cad319ca6", "0cafb1439919d14c", }; -static void __init test_hexdump(size_t len, int rowsize, int groupsize, - bool ascii) +static void __init test_hexdump_prepare_test(size_t len, int rowsize, + int groupsize, char *test, + size_t testlen, bool ascii) { - char test[32 * 3 + 2 + 32 + 1]; - char real[32 * 3 + 2 + 32 + 1]; char *p; const char * const *result; size_t l = len; int gs = groupsize, rs = rowsize; unsigned int i; - hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii); - if (rs != 16 && rs != 32) rs = 16; @@ -73,7 +70,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, else result = test_data_1_le; - memset(test, ' ', sizeof(test)); + memset(test, ' ', testlen); /* hex dump */ p = test; @@ -95,6 +92,21 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, } *p = '\0'; +} + +#define TEST_HEXDUMP_BUF_SIZE (32 * 3 + 2 + 32 + 1) + +static void __init test_hexdump(size_t len, int rowsize, int groupsize, + bool ascii) +{ + char test[TEST_HEXDUMP_BUF_SIZE]; + char real[TEST_HEXDUMP_BUF_SIZE]; + + hex_dump_to_buffer(data_b, len, rowsize, groupsize, real, sizeof(real), + ascii); + + test_hexdump_prepare_test(len, rowsize, groupsize, test, sizeof(test), + ascii); if (strcmp(test, real)) { pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); -- cgit v1.2.3 From 3db4a987180acfba3bc117575bfedb81e055778c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:50 -0800 Subject: test_hexdump: define FILL_CHAR constant Define a character to fill the test buffers. Though the character should be printable since it's used when errors are reported. It should neither be from hex digit [a-fA-F0-9] dictionary nor space. It is recommended not to use one which is present in ASCII part of the test data. Later on we might switch to unprintable character to make test case more robust. Signed-off-by: Andy Shevchenko Suggested-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index ed7c6a704f34..1ecdb97b370c 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -42,6 +42,8 @@ static const char * const test_data_8_le[] __initconst = { "e9ac0f9cad319ca6", "0cafb1439919d14c", }; +#define FILL_CHAR '#' + static void __init test_hexdump_prepare_test(size_t len, int rowsize, int groupsize, char *test, size_t testlen, bool ascii) @@ -70,7 +72,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize, else result = test_data_1_le; - memset(test, ' ', testlen); + memset(test, FILL_CHAR, testlen); /* hex dump */ p = test; @@ -79,14 +81,19 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize, size_t amount = strlen(q); strncpy(p, q, amount); - p += amount + 1; + p += amount; + + *p++ = ' '; } if (i) p--; /* ASCII part */ if (ascii) { - p = test + rs * 2 + rs / gs + 1; + do { + *p++ = ' '; + } while (p < test + rs * 2 + rs / gs + 1); + strncpy(p, data_a, l); p += l; } @@ -134,7 +141,7 @@ static void __init test_hexdump_overflow(bool ascii) bool a; int e, r; - memset(buf, ' ', sizeof(buf)); + memset(buf, FILL_CHAR, sizeof(buf)); r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); @@ -145,14 +152,14 @@ static void __init test_hexdump_overflow(bool ascii) buf[e + 2] = '\0'; if (!l) { - a = r == e && buf[0] == ' '; + a = r == e && buf[0] == FILL_CHAR; } else if (l < 3) { a = r == e && buf[0] == '\0'; } else if (l < 4) { a = r == e && !strcmp(buf, t); } else if (ascii) { if (l < 51) - a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' '; + a = r == e && buf[l - 1] == '\0' && buf[l - 2] == FILL_CHAR; else a = r == e && buf[50] == '\0' && buf[49] == '.'; } else { -- cgit v1.2.3 From a3d601fcc2f94fd1583053a1b1aea5de66ffc79c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:53 -0800 Subject: test_hexdump: go through all possible lengths of buffer When test for overflow do iterate the buffer length in a range 0 .. BUF_SIZE. Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 1ecdb97b370c..940b1d318831 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -133,17 +133,16 @@ static void __init test_hexdump_set(int rowsize, bool ascii) test_hexdump(len, rowsize, 1, ascii); } -static void __init test_hexdump_overflow(bool ascii) +static void __init test_hexdump_overflow(size_t buflen, bool ascii) { - char buf[56]; + char buf[TEST_HEXDUMP_BUF_SIZE]; const char *t = test_data_1_le[0]; - size_t l = get_random_int() % sizeof(buf); bool a; int e, r; memset(buf, FILL_CHAR, sizeof(buf)); - r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); + r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, buflen, ascii); if (ascii) e = 50; @@ -151,15 +150,15 @@ static void __init test_hexdump_overflow(bool ascii) e = 2; buf[e + 2] = '\0'; - if (!l) { + if (!buflen) { a = r == e && buf[0] == FILL_CHAR; - } else if (l < 3) { + } else if (buflen < 3) { a = r == e && buf[0] == '\0'; - } else if (l < 4) { + } else if (buflen < 4) { a = r == e && !strcmp(buf, t); } else if (ascii) { - if (l < 51) - a = r == e && buf[l - 1] == '\0' && buf[l - 2] == FILL_CHAR; + if (buflen < 51) + a = r == e && buf[buflen - 1] == '\0' && buf[buflen - 2] == FILL_CHAR; else a = r == e && buf[50] == '\0' && buf[49] == '.'; } else { @@ -167,7 +166,7 @@ static void __init test_hexdump_overflow(bool ascii) } if (!a) { - pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf)); + pr_err("Len: %zu rc: %u strlen: %zu\n", buflen, r, strlen(buf)); pr_err("Result: '%s'\n", buf); } } @@ -187,11 +186,11 @@ static int __init test_hexdump_init(void) for (i = 0; i < 16; i++) test_hexdump_set(rowsize, true); - for (i = 0; i < 16; i++) - test_hexdump_overflow(false); + for (i = 0; i <= TEST_HEXDUMP_BUF_SIZE; i++) + test_hexdump_overflow(i, false); - for (i = 0; i < 16; i++) - test_hexdump_overflow(true); + for (i = 0; i <= TEST_HEXDUMP_BUF_SIZE; i++) + test_hexdump_overflow(i, true); return -EINVAL; } -- cgit v1.2.3 From ad27a7559a85309a4775389d012f3728c92f5eb0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:56 -0800 Subject: test_hexdump: replace magic numbers by their meaning The magic numbers of the length are converted to their actual meaning, such as end of the buffer with and without ASCII part. We don't touch the rest of the magic constants that will be removed in the following commits. Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 940b1d318831..141d031e6a13 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -137,17 +137,26 @@ static void __init test_hexdump_overflow(size_t buflen, bool ascii) { char buf[TEST_HEXDUMP_BUF_SIZE]; const char *t = test_data_1_le[0]; + size_t len = 1; + int rs = 16, gs = 1; + int ae, he, e, r; bool a; - int e, r; memset(buf, FILL_CHAR, sizeof(buf)); - r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, buflen, ascii); + r = hex_dump_to_buffer(data_b, len, rs, gs, buf, buflen, ascii); + + /* + * Caller must provide the data length multiple of groupsize. The + * calculations below are made with that assumption in mind. + */ + ae = rs * 2 /* hex */ + rs / gs /* spaces */ + 1 /* space */ + len /* ascii */; + he = (gs * 2 /* hex */ + 1 /* space */) * len / gs - 1 /* no trailing space */; if (ascii) - e = 50; + e = ae; else - e = 2; + e = he; buf[e + 2] = '\0'; if (!buflen) { -- cgit v1.2.3 From 7047d813718c8e40929b7267a8d20cbf212f8565 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:58:58 -0800 Subject: test_hexdump: switch to memcmp() Better to use memcmp() against entire buffer to check that nothing is happened to the data in the tail. Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 141d031e6a13..4b949aac6f08 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -72,8 +72,6 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize, else result = test_data_1_le; - memset(test, FILL_CHAR, testlen); - /* hex dump */ p = test; for (i = 0; i < l / gs; i++) { @@ -109,13 +107,15 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, char test[TEST_HEXDUMP_BUF_SIZE]; char real[TEST_HEXDUMP_BUF_SIZE]; + memset(real, FILL_CHAR, sizeof(real)); hex_dump_to_buffer(data_b, len, rowsize, groupsize, real, sizeof(real), ascii); + memset(test, FILL_CHAR, sizeof(test)); test_hexdump_prepare_test(len, rowsize, groupsize, test, sizeof(test), ascii); - if (strcmp(test, real)) { + if (memcmp(test, real, TEST_HEXDUMP_BUF_SIZE)) { pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); pr_err("Result: '%s'\n", real); pr_err("Expect: '%s'\n", test); -- cgit v1.2.3 From cc77a719a5cfd419d057277fd0fdfca568bcfdd2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:59:01 -0800 Subject: test_hexdump: check all bytes in real buffer After processing by hex_dump_to_buffer() check all the parts to be expected. Part 1. The actual expected hex dump with or without ASCII part. Part 2. Check if the buffer is dirty beyond needed. Part 3. Return code should be as expected. This is done by using comparison of the return code and memcmp() against the test buffer. We fill the buffer by FILL_CHAR ('#') characters, so, we expect to have a tail of the buffer will be left untouched. The terminating NUL is also checked by memcmp(). Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 4b949aac6f08..16a759374730 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -135,11 +135,10 @@ static void __init test_hexdump_set(int rowsize, bool ascii) static void __init test_hexdump_overflow(size_t buflen, bool ascii) { + char test[TEST_HEXDUMP_BUF_SIZE]; char buf[TEST_HEXDUMP_BUF_SIZE]; - const char *t = test_data_1_le[0]; - size_t len = 1; - int rs = 16, gs = 1; - int ae, he, e, r; + int rs = rowsize, gs = groupsize; + int ae, he, e, f, r; bool a; memset(buf, FILL_CHAR, sizeof(buf)); @@ -157,26 +156,23 @@ static void __init test_hexdump_overflow(size_t buflen, bool ascii) e = ae; else e = he; - buf[e + 2] = '\0'; - - if (!buflen) { - a = r == e && buf[0] == FILL_CHAR; - } else if (buflen < 3) { - a = r == e && buf[0] == '\0'; - } else if (buflen < 4) { - a = r == e && !strcmp(buf, t); - } else if (ascii) { - if (buflen < 51) - a = r == e && buf[buflen - 1] == '\0' && buf[buflen - 2] == FILL_CHAR; - else - a = r == e && buf[50] == '\0' && buf[49] == '.'; - } else { - a = r == e && buf[e] == '\0'; + + f = min_t(int, e + 1, buflen); + if (buflen) { + test_hexdump_prepare_test(len, rs, gs, test, sizeof(test), ascii); + test[f - 1] = '\0'; } + memset(test + f, FILL_CHAR, sizeof(test) - f); + + a = r == e && !memcmp(test, buf, TEST_HEXDUMP_BUF_SIZE); + + buf[sizeof(buf) - 1] = '\0'; if (!a) { - pr_err("Len: %zu rc: %u strlen: %zu\n", buflen, r, strlen(buf)); - pr_err("Result: '%s'\n", buf); + pr_err("Len: %zu buflen: %zu strlen: %zu\n", + len, buflen, strnlen(buf, sizeof(buf))); + pr_err("Result: %d '%s'\n", r, buf); + pr_err("Expect: %d '%s'\n", e, test); } } -- cgit v1.2.3 From 1dacd9ddd359eed63b210bd9b5000c2cfae287ff Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:59:04 -0800 Subject: test_hexdump: test all possible group sizes for overflow Currently the only one combination is tested for overflow, i.e. rowsize = 16, groupsize = 1, len = 1. Do various test to go through all possible branches. Signed-off-by: Andy Shevchenko Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 16a759374730..11d45f510d6c 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -133,7 +133,9 @@ static void __init test_hexdump_set(int rowsize, bool ascii) test_hexdump(len, rowsize, 1, ascii); } -static void __init test_hexdump_overflow(size_t buflen, bool ascii) +static void __init test_hexdump_overflow(size_t buflen, size_t len, + int rowsize, int groupsize, + bool ascii) { char test[TEST_HEXDUMP_BUF_SIZE]; char buf[TEST_HEXDUMP_BUF_SIZE]; @@ -176,6 +178,19 @@ static void __init test_hexdump_overflow(size_t buflen, bool ascii) } } +static void __init test_hexdump_overflow_set(size_t buflen, bool ascii) +{ + unsigned int i = 0; + int rs = (get_random_int() % 2 + 1) * 16; + + do { + int gs = 1 << i; + size_t len = get_random_int() % rs + gs; + + test_hexdump_overflow(buflen, rounddown(len, gs), rs, gs, ascii); + } while (i++ < 3); +} + static int __init test_hexdump_init(void) { unsigned int i; @@ -192,10 +207,10 @@ static int __init test_hexdump_init(void) test_hexdump_set(rowsize, true); for (i = 0; i <= TEST_HEXDUMP_BUF_SIZE; i++) - test_hexdump_overflow(i, false); + test_hexdump_overflow_set(i, false); for (i = 0; i <= TEST_HEXDUMP_BUF_SIZE; i++) - test_hexdump_overflow(i, true); + test_hexdump_overflow_set(i, true); return -EINVAL; } -- cgit v1.2.3 From 7aaf4c3e1235cca77dcc1c5a0848687e7d26a42f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 14:59:07 -0800 Subject: test_hexdump: print statistics at the end Like others test are doing print the gathered statistics after test module is finished. Return from the module based on the result. Signed-off-by: Andy Shevchenko Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_hexdump.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c index 11d45f510d6c..3f415d8101f3 100644 --- a/lib/test_hexdump.c +++ b/lib/test_hexdump.c @@ -44,6 +44,9 @@ static const char * const test_data_8_le[] __initconst = { #define FILL_CHAR '#' +static unsigned total_tests __initdata; +static unsigned failed_tests __initdata; + static void __init test_hexdump_prepare_test(size_t len, int rowsize, int groupsize, char *test, size_t testlen, bool ascii) @@ -107,6 +110,8 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, char test[TEST_HEXDUMP_BUF_SIZE]; char real[TEST_HEXDUMP_BUF_SIZE]; + total_tests++; + memset(real, FILL_CHAR, sizeof(real)); hex_dump_to_buffer(data_b, len, rowsize, groupsize, real, sizeof(real), ascii); @@ -119,6 +124,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); pr_err("Result: '%s'\n", real); pr_err("Expect: '%s'\n", test); + failed_tests++; } } @@ -143,6 +149,8 @@ static void __init test_hexdump_overflow(size_t buflen, size_t len, int ae, he, e, f, r; bool a; + total_tests++; + memset(buf, FILL_CHAR, sizeof(buf)); r = hex_dump_to_buffer(data_b, len, rs, gs, buf, buflen, ascii); @@ -175,6 +183,7 @@ static void __init test_hexdump_overflow(size_t buflen, size_t len, len, buflen, strnlen(buf, sizeof(buf))); pr_err("Result: %d '%s'\n", r, buf); pr_err("Expect: %d '%s'\n", e, test); + failed_tests++; } } @@ -196,8 +205,6 @@ static int __init test_hexdump_init(void) unsigned int i; int rowsize; - pr_info("Running tests...\n"); - rowsize = (get_random_int() % 2 + 1) * 16; for (i = 0; i < 16; i++) test_hexdump_set(rowsize, false); @@ -212,7 +219,20 @@ static int __init test_hexdump_init(void) for (i = 0; i <= TEST_HEXDUMP_BUF_SIZE; i++) test_hexdump_overflow_set(i, true); - return -EINVAL; + if (failed_tests == 0) + pr_info("all %u tests passed\n", total_tests); + else + pr_err("failed %u out of %u tests\n", failed_tests, total_tests); + + return failed_tests ? -EINVAL : 0; } module_init(test_hexdump_init); + +static void __exit test_hexdump_exit(void) +{ + /* do nothing */ +} +module_exit(test_hexdump_exit); + +MODULE_AUTHOR("Andy Shevchenko "); MODULE_LICENSE("Dual BSD/GPL"); -- cgit v1.2.3 From f5948701891322770ad6ede317da5fc9cf33d2f0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 20 Jan 2016 14:59:12 -0800 Subject: lib/clz_tab.c: put in lib-y rather than obj-y The clz table (__clz_tab) in lib/clz_tab.c is also provided as part of libgcc.a, and many architectures link against libgcc. To allow the linker to avoid a multiple-definition link failure, clz_tab.o has to be in lib/lib.a rather than lib/builtin.o. The specific issue is that libgcc.a comes before lib/builtin.o on vmlinux.o's link command line, so its _clz.o is pulled to satisfy __clz_tab, and then when the remainder of lib/builtin.o is pulled in to satisfy all the other dependencies, the __clz_tab symbols conflict. By putting clz_tab.o in lib.a, the linker can simply avoid pulling it into vmlinux.o when this situation arises. The definitions of __clz_tab are the same in libgcc.a and in the kernel; arguably we could also simply rename the kernel version, but it's unlikely the libgcc version will ever change to become incompatible, so just using it seems reasonably safe. Signed-off-by: Chris Metcalf Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index fdeb7e304554..b2a82e600987 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -154,7 +154,7 @@ obj-$(CONFIG_GLOB) += glob.o obj-$(CONFIG_MPILIB) += mpi/ obj-$(CONFIG_SIGNATURE) += digsig.o -obj-$(CONFIG_CLZ_TAB) += clz_tab.o +lib-$(CONFIG_CLZ_TAB) += clz_tab.o obj-$(CONFIG_DDR) += jedec_ddr_data.o -- cgit v1.2.3 From c6d308534aef6c99904bf5862066360ae067abc4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 20 Jan 2016 15:00:55 -0800 Subject: UBSAN: run-time undefined behavior sanity checker UBSAN uses compile-time instrumentation to catch undefined behavior (UB). Compiler inserts code that perform certain kinds of checks before operations that could cause UB. If check fails (i.e. UB detected) __ubsan_handle_* function called to print error message. So the most of the work is done by compiler. This patch just implements ubsan handlers printing errors. GCC has this capability since 4.9.x [1] (see -fsanitize=undefined option and its suboptions). However GCC 5.x has more checkers implemented [2]. Article [3] has a bit more details about UBSAN in the GCC. [1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html [2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html [3] - http://developerblog.redhat.com/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/ Issues which UBSAN has found thus far are: Found bugs: * out-of-bounds access - 97840cb67ff5 ("netfilter: nfnetlink: fix insufficient validation in nfnetlink_bind") undefined shifts: * d48458d4a768 ("jbd2: use a better hash function for the revoke table") * 10632008b9e1 ("clockevents: Prevent shift out of bounds") * 'x << -1' shift in ext4 - http://lkml.kernel.org/r/<5444EF21.8020501@samsung.com> * undefined rol32(0) - http://lkml.kernel.org/r/<1449198241-20654-1-git-send-email-sasha.levin@oracle.com> * undefined dirty_ratelimit calculation - http://lkml.kernel.org/r/<566594E2.3050306@odin.com> * undefined roundown_pow_of_two(0) - http://lkml.kernel.org/r/<1449156616-11474-1-git-send-email-sasha.levin@oracle.com> * [WONTFIX] undefined shift in __bpf_prog_run - http://lkml.kernel.org/r/ WONTFIX here because it should be fixed in bpf program, not in kernel. signed overflows: * 32a8df4e0b33f ("sched: Fix odd values in effective_load() calculations") * mul overflow in ntp - http://lkml.kernel.org/r/<1449175608-1146-1-git-send-email-sasha.levin@oracle.com> * incorrect conversion into rtc_time in rtc_time64_to_tm() - http://lkml.kernel.org/r/<1449187944-11730-1-git-send-email-sasha.levin@oracle.com> * unvalidated timespec in io_getevents() - http://lkml.kernel.org/r/ * [NOTABUG] signed overflow in ktime_add_safe() - http://lkml.kernel.org/r/ [akpm@linux-foundation.org: fix unused local warning] [akpm@linux-foundation.org: fix __int128 build woes] Signed-off-by: Andrey Ryabinin Cc: Peter Zijlstra Cc: Sasha Levin Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: Jonathan Corbet Cc: Michal Marek Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yury Gribov Cc: Dmitry Vyukov Cc: Konstantin Khlebnikov Cc: Kostya Serebryany Cc: Johannes Berg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ubsan.txt | 84 +++++++ Makefile | 3 +- arch/x86/Kconfig | 1 + arch/x86/boot/Makefile | 1 + arch/x86/boot/compressed/Makefile | 1 + arch/x86/entry/vdso/Makefile | 1 + arch/x86/realmode/rm/Makefile | 1 + drivers/firmware/efi/libstub/Makefile | 1 + include/linux/sched.h | 3 + lib/Kconfig.debug | 2 + lib/Kconfig.ubsan | 29 +++ lib/Makefile | 3 + lib/ubsan.c | 456 ++++++++++++++++++++++++++++++++++ lib/ubsan.h | 84 +++++++ mm/kasan/Makefile | 1 + scripts/Makefile.lib | 6 + scripts/Makefile.ubsan | 17 ++ 17 files changed, 693 insertions(+), 1 deletion(-) create mode 100644 Documentation/ubsan.txt create mode 100644 lib/Kconfig.ubsan create mode 100644 lib/ubsan.c create mode 100644 lib/ubsan.h create mode 100644 scripts/Makefile.ubsan (limited to 'lib') diff --git a/Documentation/ubsan.txt b/Documentation/ubsan.txt new file mode 100644 index 000000000000..f58215ef5797 --- /dev/null +++ b/Documentation/ubsan.txt @@ -0,0 +1,84 @@ +Undefined Behavior Sanitizer - UBSAN + +Overview +-------- + +UBSAN is a runtime undefined behaviour checker. + +UBSAN uses compile-time instrumentation to catch undefined behavior (UB). +Compiler inserts code that perform certain kinds of checks before operations +that may cause UB. If check fails (i.e. UB detected) __ubsan_handle_* +function called to print error message. + +GCC has that feature since 4.9.x [1] (see -fsanitize=undefined option and +its suboptions). GCC 5.x has more checkers implemented [2]. + +Report example +--------------- + + ================================================================================ + UBSAN: Undefined behaviour in ../include/linux/bitops.h:110:33 + shift exponent 32 is to large for 32-bit type 'unsigned int' + CPU: 0 PID: 0 Comm: swapper Not tainted 4.4.0-rc1+ #26 + 0000000000000000 ffffffff82403cc8 ffffffff815e6cd6 0000000000000001 + ffffffff82403cf8 ffffffff82403ce0 ffffffff8163a5ed 0000000000000020 + ffffffff82403d78 ffffffff8163ac2b ffffffff815f0001 0000000000000002 + Call Trace: + [] dump_stack+0x45/0x5f + [] ubsan_epilogue+0xd/0x40 + [] __ubsan_handle_shift_out_of_bounds+0xeb/0x130 + [] ? radix_tree_gang_lookup_slot+0x51/0x150 + [] _mix_pool_bytes+0x1e6/0x480 + [] ? dmi_walk_early+0x48/0x5c + [] add_device_randomness+0x61/0x130 + [] ? dmi_save_one_device+0xaa/0xaa + [] dmi_walk_early+0x48/0x5c + [] dmi_scan_machine+0x278/0x4b4 + [] ? vprintk_default+0x1a/0x20 + [] ? early_idt_handler_array+0x120/0x120 + [] setup_arch+0x405/0xc2c + [] ? early_idt_handler_array+0x120/0x120 + [] start_kernel+0x83/0x49a + [] ? early_idt_handler_array+0x120/0x120 + [] x86_64_start_reservations+0x2a/0x2c + [] x86_64_start_kernel+0x16b/0x17a + ================================================================================ + +Usage +----- + +To enable UBSAN configure kernel with: + + CONFIG_UBSAN=y + +and to check the entire kernel: + + CONFIG_UBSAN_SANITIZE_ALL=y + +To enable instrumentation for specific files or directories, add a line +similar to the following to the respective kernel Makefile: + + For a single file (e.g. main.o): + UBSAN_SANITIZE_main.o := y + + For all files in one directory: + UBSAN_SANITIZE := y + +To exclude files from being instrumented even if +CONFIG_UBSAN_SANITIZE_ALL=y, use: + + UBSAN_SANITIZE_main.o := n + and: + UBSAN_SANITIZE := n + +Detection of unaligned accesses controlled through the separate option - +CONFIG_UBSAN_ALIGNMENT. It's off by default on architectures that support +unaligned accesses (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y). One could +still enable it in config, just note that it will produce a lot of UBSAN +reports. + +References +---------- + +[1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html +[2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html diff --git a/Makefile b/Makefile index 7f4ac1ee4a2b..abfb3e8eb0b1 100644 --- a/Makefile +++ b/Makefile @@ -411,7 +411,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -784,6 +784,7 @@ endif include scripts/Makefile.kasan include scripts/Makefile.extrawarn +include scripts/Makefile.ubsan # Add any arch overrides and user supplied CPPFLAGS, AFLAGS and CFLAGS as the # last assignments diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4a10ba9e95da..92b2a73162ee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -31,6 +31,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_SG_CHAIN + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_PC_PARPORT diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 2ee62dba0373..bbe1a62efc02 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -60,6 +60,7 @@ clean-files += cpustr.h KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n +UBSAN_SANITIZE := n $(obj)/bzImage: asflags-y := $(SVGA_MODE) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0a291cdfaf77..f9ce75d80101 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -33,6 +33,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n +UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) LDFLAGS_vmlinux := -T diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 265c0ed68118..c854541d93ff 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -4,6 +4,7 @@ KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n +UBSAN_SANITIZE := n VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 2730d775ef9a..3e75fcf6b836 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -70,3 +70,4 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \ -I$(srctree)/arch/x86/boot KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n +UBSAN_SANITIZE := n diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 9c12e18031d5..aaf9c0bab42e 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -22,6 +22,7 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ GCOV_PROFILE := n KASAN_SANITIZE := n +UBSAN_SANITIZE := n lib-y := efi-stub-helper.o diff --git a/include/linux/sched.h b/include/linux/sched.h index 61aa9bbea871..02dabf281b2f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1643,6 +1643,9 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; gfp_t lockdep_reclaim_gfp; #endif +#ifdef CONFIG_UBSAN + unsigned int in_ubsan; +#endif /* journalling filesystem info */ void *journal_info; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f75a33f29f6e..157220b9ff05 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1893,6 +1893,8 @@ source "samples/Kconfig" source "lib/Kconfig.kgdb" +source "lib/Kconfig.ubsan" + config ARCH_HAS_DEVMEM_IS_ALLOWED bool diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan new file mode 100644 index 000000000000..49518fb48cab --- /dev/null +++ b/lib/Kconfig.ubsan @@ -0,0 +1,29 @@ +config ARCH_HAS_UBSAN_SANITIZE_ALL + bool + +config UBSAN + bool "Undefined behaviour sanity checker" + help + This option enables undefined behaviour sanity checker + Compile-time instrumentation is used to detect various undefined + behaviours in runtime. Various types of checks may be enabled + via boot parameter ubsan_handle (see: Documentation/ubsan.txt). + +config UBSAN_SANITIZE_ALL + bool "Enable instrumentation for the entire kernel" + depends on UBSAN + depends on ARCH_HAS_UBSAN_SANITIZE_ALL + default y + help + This option activates instrumentation for the entire kernel. + If you don't enable this option, you have to explicitly specify + UBSAN_SANITIZE := y for the files/directories you want to check for UB. + +config UBSAN_ALIGNMENT + bool "Enable checking of pointers alignment" + depends on UBSAN + default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS + help + This option enables detection of unaligned memory accesses. + Enabling this option on architectures that support unalligned + accesses may produce a lot of false positives. diff --git a/lib/Makefile b/lib/Makefile index b2a82e600987..2d4bc33d09b4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -209,3 +209,6 @@ quiet_cmd_build_OID_registry = GEN $@ clean-files += oid_registry_data.c obj-$(CONFIG_UCS2_STRING) += ucs2_string.o +obj-$(CONFIG_UBSAN) += ubsan.o + +UBSAN_SANITIZE_ubsan.o := n diff --git a/lib/ubsan.c b/lib/ubsan.c new file mode 100644 index 000000000000..8799ae5e2e42 --- /dev/null +++ b/lib/ubsan.c @@ -0,0 +1,456 @@ +/* + * UBSAN error reporting functions + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "ubsan.h" + +const char *type_check_kinds[] = { + "load of", + "store to", + "reference binding to", + "member access within", + "member call on", + "constructor call on", + "downcast of", + "downcast of" +}; + +#define REPORTED_BIT 31 + +#if (BITS_PER_LONG == 64) && defined(__BIG_ENDIAN) +#define COLUMN_MASK (~(1U << REPORTED_BIT)) +#define LINE_MASK (~0U) +#else +#define COLUMN_MASK (~0U) +#define LINE_MASK (~(1U << REPORTED_BIT)) +#endif + +#define VALUE_LENGTH 40 + +static bool was_reported(struct source_location *location) +{ + return test_and_set_bit(REPORTED_BIT, &location->reported); +} + +static void print_source_location(const char *prefix, + struct source_location *loc) +{ + pr_err("%s %s:%d:%d\n", prefix, loc->file_name, + loc->line & LINE_MASK, loc->column & COLUMN_MASK); +} + +static bool suppress_report(struct source_location *loc) +{ + return current->in_ubsan || was_reported(loc); +} + +static bool type_is_int(struct type_descriptor *type) +{ + return type->type_kind == type_kind_int; +} + +static bool type_is_signed(struct type_descriptor *type) +{ + WARN_ON(!type_is_int(type)); + return type->type_info & 1; +} + +static unsigned type_bit_width(struct type_descriptor *type) +{ + return 1 << (type->type_info >> 1); +} + +static bool is_inline_int(struct type_descriptor *type) +{ + unsigned inline_bits = sizeof(unsigned long)*8; + unsigned bits = type_bit_width(type); + + WARN_ON(!type_is_int(type)); + + return bits <= inline_bits; +} + +static s_max get_signed_val(struct type_descriptor *type, unsigned long val) +{ + if (is_inline_int(type)) { + unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type); + return ((s_max)val) << extra_bits >> extra_bits; + } + + if (type_bit_width(type) == 64) + return *(s64 *)val; + + return *(s_max *)val; +} + +static bool val_is_negative(struct type_descriptor *type, unsigned long val) +{ + return type_is_signed(type) && get_signed_val(type, val) < 0; +} + +static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val) +{ + if (is_inline_int(type)) + return val; + + if (type_bit_width(type) == 64) + return *(u64 *)val; + + return *(u_max *)val; +} + +static void val_to_string(char *str, size_t size, struct type_descriptor *type, + unsigned long value) +{ + if (type_is_int(type)) { + if (type_bit_width(type) == 128) { +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) + u_max val = get_unsigned_val(type, value); + + scnprintf(str, size, "0x%08x%08x%08x%08x", + (u32)(val >> 96), + (u32)(val >> 64), + (u32)(val >> 32), + (u32)(val)); +#else + WARN_ON(1); +#endif + } else if (type_is_signed(type)) { + scnprintf(str, size, "%lld", + (s64)get_signed_val(type, value)); + } else { + scnprintf(str, size, "%llu", + (u64)get_unsigned_val(type, value)); + } + } +} + +static bool location_is_valid(struct source_location *loc) +{ + return loc->file_name != NULL; +} + +static DEFINE_SPINLOCK(report_lock); + +static void ubsan_prologue(struct source_location *location, + unsigned long *flags) +{ + current->in_ubsan++; + spin_lock_irqsave(&report_lock, *flags); + + pr_err("========================================" + "========================================\n"); + print_source_location("UBSAN: Undefined behaviour in", location); +} + +static void ubsan_epilogue(unsigned long *flags) +{ + dump_stack(); + pr_err("========================================" + "========================================\n"); + spin_unlock_irqrestore(&report_lock, *flags); + current->in_ubsan--; +} + +static void handle_overflow(struct overflow_data *data, unsigned long lhs, + unsigned long rhs, char op) +{ + + struct type_descriptor *type = data->type; + unsigned long flags; + char lhs_val_str[VALUE_LENGTH]; + char rhs_val_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs); + val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs); + pr_err("%s integer overflow:\n", + type_is_signed(type) ? "signed" : "unsigned"); + pr_err("%s %c %s cannot be represented in type %s\n", + lhs_val_str, + op, + rhs_val_str, + type->type_name); + + ubsan_epilogue(&flags); +} + +void __ubsan_handle_add_overflow(struct overflow_data *data, + unsigned long lhs, + unsigned long rhs) +{ + + handle_overflow(data, lhs, rhs, '+'); +} +EXPORT_SYMBOL(__ubsan_handle_add_overflow); + +void __ubsan_handle_sub_overflow(struct overflow_data *data, + unsigned long lhs, + unsigned long rhs) +{ + handle_overflow(data, lhs, rhs, '-'); +} +EXPORT_SYMBOL(__ubsan_handle_sub_overflow); + +void __ubsan_handle_mul_overflow(struct overflow_data *data, + unsigned long lhs, + unsigned long rhs) +{ + handle_overflow(data, lhs, rhs, '*'); +} +EXPORT_SYMBOL(__ubsan_handle_mul_overflow); + +void __ubsan_handle_negate_overflow(struct overflow_data *data, + unsigned long old_val) +{ + unsigned long flags; + char old_val_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val); + + pr_err("negation of %s cannot be represented in type %s:\n", + old_val_str, data->type->type_name); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_negate_overflow); + + +void __ubsan_handle_divrem_overflow(struct overflow_data *data, + unsigned long lhs, + unsigned long rhs) +{ + unsigned long flags; + char rhs_val_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs); + + if (type_is_signed(data->type) && get_signed_val(data->type, rhs) == -1) + pr_err("division of %s by -1 cannot be represented in type %s\n", + rhs_val_str, data->type->type_name); + else + pr_err("division by zero\n"); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_divrem_overflow); + +static void handle_null_ptr_deref(struct type_mismatch_data *data) +{ + unsigned long flags; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + pr_err("%s null pointer of type %s\n", + type_check_kinds[data->type_check_kind], + data->type->type_name); + + ubsan_epilogue(&flags); +} + +static void handle_missaligned_access(struct type_mismatch_data *data, + unsigned long ptr) +{ + unsigned long flags; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + pr_err("%s misaligned address %p for type %s\n", + type_check_kinds[data->type_check_kind], + (void *)ptr, data->type->type_name); + pr_err("which requires %ld byte alignment\n", data->alignment); + + ubsan_epilogue(&flags); +} + +static void handle_object_size_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + unsigned long flags; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + pr_err("%s address %pk with insufficient space\n", + type_check_kinds[data->type_check_kind], + (void *) ptr); + pr_err("for an object of type %s\n", data->type->type_name); + ubsan_epilogue(&flags); +} + +void __ubsan_handle_type_mismatch(struct type_mismatch_data *data, + unsigned long ptr) +{ + + if (!ptr) + handle_null_ptr_deref(data); + else if (data->alignment && !IS_ALIGNED(ptr, data->alignment)) + handle_missaligned_access(data, ptr); + else + handle_object_size_mismatch(data, ptr); +} +EXPORT_SYMBOL(__ubsan_handle_type_mismatch); + +void __ubsan_handle_nonnull_return(struct nonnull_return_data *data) +{ + unsigned long flags; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + pr_err("null pointer returned from function declared to never return null\n"); + + if (location_is_valid(&data->attr_location)) + print_source_location("returns_nonnull attribute specified in", + &data->attr_location); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_nonnull_return); + +void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data, + unsigned long bound) +{ + unsigned long flags; + char bound_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(bound_str, sizeof(bound_str), data->type, bound); + pr_err("variable length array bound value %s <= 0\n", bound_str); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive); + +void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, + unsigned long index) +{ + unsigned long flags; + char index_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(index_str, sizeof(index_str), data->index_type, index); + pr_err("index %s is out of range for type %s\n", index_str, + data->array_type->type_name); + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_out_of_bounds); + +void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, + unsigned long lhs, unsigned long rhs) +{ + unsigned long flags; + struct type_descriptor *rhs_type = data->rhs_type; + struct type_descriptor *lhs_type = data->lhs_type; + char rhs_str[VALUE_LENGTH]; + char lhs_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(rhs_str, sizeof(rhs_str), rhs_type, rhs); + val_to_string(lhs_str, sizeof(lhs_str), lhs_type, lhs); + + if (val_is_negative(rhs_type, rhs)) + pr_err("shift exponent %s is negative\n", rhs_str); + + else if (get_unsigned_val(rhs_type, rhs) >= + type_bit_width(lhs_type)) + pr_err("shift exponent %s is too large for %u-bit type %s\n", + rhs_str, + type_bit_width(lhs_type), + lhs_type->type_name); + else if (val_is_negative(lhs_type, lhs)) + pr_err("left shift of negative value %s\n", + lhs_str); + else + pr_err("left shift of %s by %s places cannot be" + " represented in type %s\n", + lhs_str, rhs_str, + lhs_type->type_name); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); + + +void __noreturn +__ubsan_handle_builtin_unreachable(struct unreachable_data *data) +{ + unsigned long flags; + + ubsan_prologue(&data->location, &flags); + pr_err("calling __builtin_unreachable()\n"); + ubsan_epilogue(&flags); + panic("can't return from __builtin_unreachable()"); +} +EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable); + +void __ubsan_handle_load_invalid_value(struct invalid_value_data *data, + unsigned long val) +{ + unsigned long flags; + char val_str[VALUE_LENGTH]; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, &flags); + + val_to_string(val_str, sizeof(val_str), data->type, val); + + pr_err("load of value %s is not a valid value for type %s\n", + val_str, data->type->type_name); + + ubsan_epilogue(&flags); +} +EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); diff --git a/lib/ubsan.h b/lib/ubsan.h new file mode 100644 index 000000000000..b2d18d4a53f5 --- /dev/null +++ b/lib/ubsan.h @@ -0,0 +1,84 @@ +#ifndef _LIB_UBSAN_H +#define _LIB_UBSAN_H + +enum { + type_kind_int = 0, + type_kind_float = 1, + type_unknown = 0xffff +}; + +struct type_descriptor { + u16 type_kind; + u16 type_info; + char type_name[1]; +}; + +struct source_location { + const char *file_name; + union { + unsigned long reported; + struct { + u32 line; + u32 column; + }; + }; +}; + +struct overflow_data { + struct source_location location; + struct type_descriptor *type; +}; + +struct type_mismatch_data { + struct source_location location; + struct type_descriptor *type; + unsigned long alignment; + unsigned char type_check_kind; +}; + +struct nonnull_arg_data { + struct source_location location; + struct source_location attr_location; + int arg_index; +}; + +struct nonnull_return_data { + struct source_location location; + struct source_location attr_location; +}; + +struct vla_bound_data { + struct source_location location; + struct type_descriptor *type; +}; + +struct out_of_bounds_data { + struct source_location location; + struct type_descriptor *array_type; + struct type_descriptor *index_type; +}; + +struct shift_out_of_bounds_data { + struct source_location location; + struct type_descriptor *lhs_type; + struct type_descriptor *rhs_type; +}; + +struct unreachable_data { + struct source_location location; +}; + +struct invalid_value_data { + struct source_location location; + struct type_descriptor *type; +}; + +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) +typedef __int128 s_max; +typedef unsigned __int128 u_max; +#else +typedef s64 s_max; +typedef u64 u_max; +#endif + +#endif diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 64710148941e..a61460d9f5b0 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -1,4 +1,5 @@ KASAN_SANITIZE := n +UBSAN_SANITIZE_kasan.o := n CFLAGS_REMOVE_kasan.o = -pg # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 39d6bb18ce76..2edbcadb3d7f 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -130,6 +130,12 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_KASAN)) endif +ifeq ($(CONFIG_UBSAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(UBSAN_SANITIZE_$(basetarget).o)$(UBSAN_SANITIZE)$(CONFIG_UBSAN_SANITIZE_ALL)), \ + $(CFLAGS_UBSAN)) +endif + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan new file mode 100644 index 000000000000..8ab68679cfb5 --- /dev/null +++ b/scripts/Makefile.ubsan @@ -0,0 +1,17 @@ +ifdef CONFIG_UBSAN + CFLAGS_UBSAN += $(call cc-option, -fsanitize=shift) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=null) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=returns-nonnull-attribute) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool) + CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum) + +ifdef CONFIG_UBSAN_ALIGNMENT + CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment) +endif +endif -- cgit v1.2.3 From 19a3dd7621af01b7e44a70d16beab59326c38824 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 20 Jan 2016 14:37:22 -0800 Subject: Do not enable CONFIG_IO_STRICT_DEVMEM by default Larry Finger reports: "My PowerBook G4 Aluminum with a 32-bit PPC processor fails to boot for the 4.4-git series". This is likely due to X still needing /dev/mem access on this platform. CONFIG_IO_STRICT_DEVMEM is not yet safe to turn on when CONFIG_STRICT_DEVMEM=y. Remove the default so that old configurations do not change behavior. Fixes: 90a545e98126 ("restrict /dev/mem to idle io memory ranges") Reported-by: Larry Finger Tested-by: Larry Finger Link: http://marc.info/?l=linux-kernel&m=145332012023825&w=2 Acked-by: Kees Cook Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Russell King Cc: Andrew Morton Cc: Greg Kroah-Hartman Signed-off-by: Dan Williams Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f75a33f29f6e..7d0b49c536c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1919,7 +1919,6 @@ config STRICT_DEVMEM config IO_STRICT_DEVMEM bool "Filter I/O access to /dev/mem" depends on STRICT_DEVMEM - default STRICT_DEVMEM ---help--- If this option is disabled, you allow userspace (root) access to all io-memory regardless of whether a driver is actively using that -- cgit v1.2.3 From c2594bc37f4464bc74f2c119eb3269a643400aa0 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Thu, 21 Jan 2016 16:55:07 -0800 Subject: ratelimit: fix bug in time interval by resetting right begin time rs->begin in ratelimit is set in two cases. 1) when rs->begin was not initialized 2) when rs->interval was passed For case #2, current ratelimit sets the begin to 0. This incurrs improper suppression. The begin value will be set in the next ratelimit call by 1). Then the time interval check will be always false, and rs->printed will not be initialized. Although enough time passed, ratelimit may return 0 if rs->printed is not less than rs->burst. To reset interval properly, begin should be jiffies rather than 0. For an example code below: static DEFINE_RATELIMIT_STATE(mylimit, 1, 1); for (i = 1; i <= 10; i++) { if (__ratelimit(&mylimit)) printk("ratelimit test count %d\n", i); msleep(3000); } test result in the current code shows suppression even there is 3 seconds sleep. [ 78.391148] ratelimit test count 1 [ 81.295988] ratelimit test count 2 [ 87.315981] ratelimit test count 4 [ 93.336267] ratelimit test count 6 [ 99.356031] ratelimit test count 8 [ 105.376367] ratelimit test count 10 Signed-off-by: Jaewon Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ratelimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 40e03ea2a967..2c5de86460c5 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -49,7 +49,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", func, rs->missed); - rs->begin = 0; + rs->begin = jiffies; rs->printed = 0; rs->missed = 0; } -- cgit v1.2.3 From 0b6ec8c0a3708f0a54b75ee1200772ec5226ec49 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 27 Jan 2016 15:37:58 +0100 Subject: debugobjects: Allow bigger number of early boot objects On my bigger s390 systems I always get "Out of memory. ODEBUG disabled". Since the number of objects is needed at compile time, we can not change the size dynamically before the caches etc are available. Doubling the size seems to do the trick. Since it is init data it will be freed anyway, this should be ok. Signed-off-by: Christian Borntraeger Link: http://lkml.kernel.org/r/1453905478-13409-1-git-send-email-borntraeger@de.ibm.com Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 547f7f923dbc..519b5a10fd70 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -21,7 +21,7 @@ #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) -#define ODEBUG_POOL_SIZE 512 +#define ODEBUG_POOL_SIZE 1024 #define ODEBUG_POOL_MIN_LEVEL 256 #define ODEBUG_CHUNK_SHIFT PAGE_SHIFT -- cgit v1.2.3