From 529a781ee07aaa58be8164d75ba5998eb7dd216c Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 20 Jun 2020 10:54:27 +0800 Subject: jbd2: remove unused parameter in jbd2_journal_try_to_free_buffers() Parameter gfp_mask in jbd2_journal_try_to_free_buffers() is no longer used after commit <536fc240e7147> ("jbd2: clean up jbd2_journal_try_to_free_buffers()"), so just remove it. Signed-off-by: zhangyi (F) Link: https://lore.kernel.org/r/20200620025427.1756360-6-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d56128df2aff..a756a4cdf939 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1380,7 +1380,7 @@ extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned int, unsigned int); -extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); +extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page); extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_flush (journal_t *); extern void jbd2_journal_lock_updates (journal_t *); -- cgit v1.2.3 From 2404b73c3f1a5f15726c6ecd226b56f6f992767f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 10 Aug 2020 13:52:15 +0200 Subject: netfilter: avoid ipv6 -> nf_defrag_ipv6 module dependency nf_ct_frag6_gather is part of nf_defrag_ipv6.ko, not ipv6 core. The current use of the netfilter ipv6 stub indirections causes a module dependency between ipv6 and nf_defrag_ipv6. This prevents nf_defrag_ipv6 module from being removed because ipv6 can't be unloaded. Remove the indirection and always use a direct call. This creates a depency from nf_conntrack_bridge to nf_defrag_ipv6 instead: modinfo nf_conntrack depends: nf_conntrack,nf_defrag_ipv6,bridge .. and nf_conntrack already depends on nf_defrag_ipv6 anyway. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 18 ------------------ net/bridge/netfilter/nf_conntrack_bridge.c | 8 ++++++-- net/ipv6/netfilter.c | 3 --- 3 files changed, 6 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index aac42c28fe62..9b67394471e1 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -58,7 +58,6 @@ struct nf_ipv6_ops { int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); #if IS_MODULE(CONFIG_IPV6) - int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user); int (*br_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, @@ -117,23 +116,6 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, #include -static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb, - u32 user) -{ -#if IS_MODULE(CONFIG_IPV6) - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return 1; - - return v6_ops->br_defrag(net, skb, user); -#elif IS_BUILTIN(CONFIG_IPV6) - return nf_ct_frag6_gather(net, skb, user); -#else - return 1; -#endif -} - int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 809673222382..8d033a75a766 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -168,6 +168,7 @@ static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, const struct nf_hook_state *state) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) u16 zone_id = NF_CT_DEFAULT_ZONE_ID; enum ip_conntrack_info ctinfo; struct br_input_skb_cb cb; @@ -180,14 +181,17 @@ static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); - err = nf_ipv6_br_defrag(state->net, skb, - IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + err = nf_ct_frag6_gather(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); /* queued */ if (err == -EINPROGRESS) return NF_STOLEN; br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); return err == 0 ? NF_ACCEPT : NF_DROP; +#else + return NF_ACCEPT; +#endif } static int nf_ct_br_ip_check(const struct sk_buff *skb) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 409e79b84a83..6d0e942d082d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -245,9 +245,6 @@ static const struct nf_ipv6_ops ipv6ops = { .route_input = ip6_route_input, .fragment = ip6_fragment, .reroute = nf_ip6_reroute, -#if IS_MODULE(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - .br_defrag = nf_ct_frag6_gather, -#endif #if IS_MODULE(CONFIG_IPV6) .br_fragment = br_ip6_fragment, #endif -- cgit v1.2.3 From 9420139f516d7fbc248ce17f35275cb005ed98ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Aug 2020 12:26:24 +0200 Subject: dma-pool: fix coherent pool allocations for IOMMU mappings When allocating coherent pool memory for an IOMMU mapping we don't care about the DMA mask. Move the guess for the initial GFP mask into the dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer argument so that it doesn't get applied to the IOMMU case. Signed-off-by: Christoph Hellwig Tested-by: Amit Pundir --- drivers/iommu/dma-iommu.c | 4 +- include/linux/dma-direct.h | 3 -- include/linux/dma-mapping.h | 5 +- kernel/dma/direct.c | 13 +++-- kernel/dma/pool.c | 114 +++++++++++++++++++------------------------- 5 files changed, 62 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4959f5df21bd..5141d49a046b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1035,8 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !gfpflags_allow_blocking(gfp) && !coherent) - cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, - gfp); + page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr, + gfp, NULL); else cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); if (!cpu_addr) diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 5a3ce2a24794..6e87225600ae 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -73,9 +73,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, } u64 dma_direct_get_required_mask(struct device *dev); -gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, - u64 *phys_mask); -bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 016b96b384bd..52635e91143b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -522,8 +522,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size, pgprot_t prot, const void *caller); void dma_common_free_remap(void *cpu_addr, size_t size); -void *dma_alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, gfp_t flags); +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t flags, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)); bool dma_free_from_pool(struct device *dev, void *start, size_t size); int diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index bb0041e99659..db6ef07aec3b 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -43,7 +43,7 @@ u64 dma_direct_get_required_mask(struct device *dev) return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } -gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, +static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_limit) { u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); @@ -68,7 +68,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, return 0; } -bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { return phys_to_dma_direct(dev, phys) + size - 1 <= min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); @@ -161,8 +161,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_should_alloc_from_pool(dev, gfp, attrs)) { - ret = dma_alloc_from_pool(dev, size, &page, gfp); - if (!ret) + u64 phys_mask; + + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); + page = dma_alloc_from_pool(dev, size, &ret, gfp, + dma_coherent_ok); + if (!page) return NULL; goto done; } diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 6bc74a2d5127..5d071d4a3cba 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -196,93 +196,75 @@ static int __init dma_atomic_pool_init(void) } postcore_initcall(dma_atomic_pool_init); -static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev) +static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) { - u64 phys_mask; - gfp_t gfp; - - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA) + if (prev == NULL) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return atomic_pool_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + return atomic_pool_dma; + return atomic_pool_kernel; + } + if (prev == atomic_pool_kernel) + return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma; + if (prev == atomic_pool_dma32) return atomic_pool_dma; - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32) - return atomic_pool_dma32; - return atomic_pool_kernel; + return NULL; } -static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool) +static struct page *__dma_alloc_from_pool(struct device *dev, size_t size, + struct gen_pool *pool, void **cpu_addr, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) { - if (bad_pool == atomic_pool_kernel) - return atomic_pool_dma32 ? : atomic_pool_dma; + unsigned long addr; + phys_addr_t phys; - if (bad_pool == atomic_pool_dma32) - return atomic_pool_dma; + addr = gen_pool_alloc(pool, size); + if (!addr) + return NULL; - return NULL; -} + phys = gen_pool_virt_to_phys(pool, addr); + if (phys_addr_ok && !phys_addr_ok(dev, phys, size)) { + gen_pool_free(pool, addr, size); + return NULL; + } -static inline struct gen_pool *dma_guess_pool(struct device *dev, - struct gen_pool *bad_pool) -{ - if (bad_pool) - return dma_get_safer_pool(bad_pool); + if (gen_pool_avail(pool) < atomic_pool_size) + schedule_work(&atomic_pool_work); - return dma_guess_pool_from_device(dev); + *cpu_addr = (void *)addr; + memset(*cpu_addr, 0, size); + return pfn_to_page(__phys_to_pfn(phys)); } -void *dma_alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, gfp_t flags) +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t gfp, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) { struct gen_pool *pool = NULL; - unsigned long val = 0; - void *ptr = NULL; - phys_addr_t phys; - - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) { - WARN(1, "Failed to get suitable pool for %s\n", - dev_name(dev)); - break; - } - - val = gen_pool_alloc(pool, size); - if (!val) - continue; - - phys = gen_pool_virt_to_phys(pool, val); - if (dma_coherent_ok(dev, phys, size)) - break; - - gen_pool_free(pool, val, size); - val = 0; - } - - - if (val) { - *ret_page = pfn_to_page(__phys_to_pfn(phys)); - ptr = (void *)val; - memset(ptr, 0, size); + struct page *page; - if (gen_pool_avail(pool) < atomic_pool_size) - schedule_work(&atomic_pool_work); + while ((pool = dma_guess_pool(pool, gfp))) { + page = __dma_alloc_from_pool(dev, size, pool, cpu_addr, + phys_addr_ok); + if (page) + return page; } - return ptr; + WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev)); + return NULL; } bool dma_free_from_pool(struct device *dev, void *start, size_t size) { struct gen_pool *pool = NULL; - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) - return false; - - if (gen_pool_has_addr(pool, (unsigned long)start, size)) { - gen_pool_free(pool, (unsigned long)start, size); - return true; - } + while ((pool = dma_guess_pool(pool, 0))) { + if (!gen_pool_has_addr(pool, (unsigned long)start, size)) + continue; + gen_pool_free(pool, (unsigned long)start, size); + return true; } + + return false; } -- cgit v1.2.3 From 29e44f4535faa71a70827af3639b5e6762d8f02a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Aug 2020 11:07:28 +0100 Subject: watch_queue: Limit the number of watches a user can hold Impose a limit on the number of watches that a user can hold so that they can't use this mechanism to fill up all the available memory. This is done by putting a counter in user_struct that's incremented when a watch is allocated and decreased when it is released. If the number exceeds the RLIMIT_NOFILE limit, the watch is rejected with EAGAIN. This can be tested by the following means: (1) Create a watch queue and attach it to fd 5 in the program given - in this case, bash: keyctl watch_session /tmp/nlog /tmp/gclog 5 bash (2) In the shell, set the maximum number of files to, say, 99: ulimit -n 99 (3) Add 200 keyrings: for ((i=0; i<200; i++)); do keyctl newring a$i @s || break; done (4) Try to watch all of the keyrings: for ((i=0; i<200; i++)); do echo $i; keyctl watch_add 5 %:a$i || break; done This should fail when the number of watches belonging to the user hits 99. (5) Remove all the keyrings and all of those watches should go away: for ((i=0; i<200; i++)); do keyctl unlink %:a$i; done (6) Kill off the watch queue by exiting the shell spawned by watch_session. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Reported-by: Linus Torvalds Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/sched/user.h | 3 +++ kernel/watch_queue.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 917d88edb7b9..a8ec3b6093fc 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -36,6 +36,9 @@ struct user_struct { defined(CONFIG_NET) || defined(CONFIG_IO_URING) atomic_long_t locked_vm; #endif +#ifdef CONFIG_WATCH_QUEUE + atomic_t nr_watches; /* The number of watches this user currently has */ +#endif /* Miscellaneous per-user rate limit */ struct ratelimit_state ratelimit; diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index f74020f6bd9d..0ef8f65bd2d7 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -393,6 +393,7 @@ static void free_watch(struct rcu_head *rcu) struct watch *watch = container_of(rcu, struct watch, rcu); put_watch_queue(rcu_access_pointer(watch->queue)); + atomic_dec(&watch->cred->user->nr_watches); put_cred(watch->cred); } @@ -452,6 +453,13 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist) watch->cred = get_current_cred(); rcu_assign_pointer(watch->watch_list, wlist); + if (atomic_inc_return(&watch->cred->user->nr_watches) > + task_rlimit(current, RLIMIT_NOFILE)) { + atomic_dec(&watch->cred->user->nr_watches); + put_cred(watch->cred); + return -EAGAIN; + } + spin_lock_bh(&wqueue->lock); kref_get(&wqueue->usage); kref_get(&watch->usage); -- cgit v1.2.3 From 0b76e642f9ad2471e899e2dd71b9543b7e85e9f6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 Aug 2020 15:25:49 -0700 Subject: phylink: : fix function prototype kernel-doc warning Fix a kernel-doc warning for the pcs_config() function prototype: ../include/linux/phylink.h:406: warning: Excess function parameter 'permit_pause_to_mac' description in 'pcs_config' Fixes: 7137e18f6f88 ("net: phylink: add struct phylink_pcs") Signed-off-by: Randy Dunlap Cc: Russell King Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index a8e876317e25..c36fb41a7d90 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -402,7 +402,8 @@ void pcs_get_state(struct phylink_pcs *pcs, * For most 10GBASE-R, there is no advertisement. */ int pcs_config(struct phylink_pcs *pcs, unsigned int mode, - phy_interface_t interface, const unsigned long *advertising); + phy_interface_t interface, const unsigned long *advertising, + bool permit_pause_to_mac); /** * pcs_an_restart() - restart 802.3z BaseX autonegotiation -- cgit v1.2.3 From bd05220c7be3356046861c317d9c287ca50445ba Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Tue, 11 Aug 2020 19:24:57 +0100 Subject: arch/ia64: Restore arch-specific pgd_offset_k implementation IA-64 is special and treats pgd_offset_k() differently to pgd_offset(), using different formulae to calculate the indices into the kernel and user PGDs. The index into the user PGDs takes into account the region number, but the index into the kernel (init_mm) PGD always assumes a predefined kernel region number. Commit 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") made IA-64 use a generic pgd_offset_k() which incorrectly used pgd_index() for kernel page tables. As a result, the index into the kernel PGD was going out of bounds and the kernel hung during early boot. Allow overrides of pgd_offset_k() and override it on IA-64 with the old implementation that will correctly index the kernel PGD. Fixes: 974b9b2c68f3 ("mm: consolidate pte_index() and pte_offset_*() definitions") Reported-by: John Paul Adrian Glaubitz Signed-off-by: Jessica Clarke Tested-by: John Paul Adrian Glaubitz Acked-by: Tony Luck Signed-off-by: Mike Rapoport --- arch/ia64/include/asm/pgtable.h | 9 +++++++++ include/linux/pgtable.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 10850897a91c..779b6972aa84 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -366,6 +366,15 @@ pgd_index (unsigned long address) } #define pgd_index pgd_index +/* + * In the kernel's mapped region we know everything is in region number 5, so + * as an optimisation its PGD already points to the area for that region. + * However, this also means that we cannot use pgd_index() and we must + * never add the region here. + */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + /* Look up a pgd entry in the gate area. On IA-64, the gate-area resides in the kernel-mapped segment, hence we use pgd_offset_k() here. */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a124c21e3204..e8cbc2e795d5 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -117,7 +117,9 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) * a shortcut which implies the use of the kernel's pgd, instead * of a process's */ +#ifndef pgd_offset_k #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) +#endif /* * In many cases it is known that a virtual address is mapped at PMD or PTE -- cgit v1.2.3 From 2ac6795fcc085e8d03649f1bbd0d70aaff612cad Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 17 Aug 2020 18:12:49 +0530 Subject: clocksource/drivers: Add CLINT timer driver We add a separate CLINT timer driver for Linux RISC-V M-mode (i.e. RISC-V NoMMU kernel). The CLINT MMIO device provides three things: 1. 64bit free running counter register 2. 64bit per-CPU time compare registers 3. 32bit per-CPU inter-processor interrupt registers Unlike other timer devices, CLINT provides IPI registers along with timer registers. To use CLINT IPI registers, the CLINT timer driver provides IPI related callbacks to arch/riscv. Signed-off-by: Anup Patel Tested-by: Emil Renner Berhing Acked-by: Daniel Lezcano Reviewed-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- drivers/clocksource/Kconfig | 9 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-clint.c | 226 ++++++++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 237 insertions(+) create mode 100644 drivers/clocksource/timer-clint.c (limited to 'include/linux') diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 3576ad7bd380..d95cc7234a66 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -663,6 +663,15 @@ config RISCV_TIMER is accessed via both the SBI and the rdcycle instruction. This is required for all RISC-V systems. +config CLINT_TIMER + bool "CLINT Timer for the RISC-V platform" if COMPILE_TEST + depends on GENERIC_SCHED_CLOCK && RISCV + select TIMER_PROBE + select TIMER_OF + help + This option enables the CLINT timer for RISC-V systems. The CLINT + driver is usually used for NoMMU RISC-V systems. + config CSKY_MP_TIMER bool "SMP Timer for the C-SKY platform" if COMPILE_TEST depends on CSKY diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index eaedb7240ae7..1c444cc3bb44 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -89,6 +89,7 @@ obj-$(CONFIG_CLKSRC_ST_LPC) += clksrc_st_lpc.o obj-$(CONFIG_X86_NUMACHIP) += numachip.o obj-$(CONFIG_ATCPIT100_TIMER) += timer-atcpit100.o obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o +obj-$(CONFIG_CLINT_TIMER) += timer-clint.o obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c new file mode 100644 index 000000000000..8eeafa82c03d --- /dev/null +++ b/drivers/clocksource/timer-clint.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * + * Most of the M-mode (i.e. NoMMU) RISC-V systems usually have a + * CLINT MMIO timer device. + */ + +#define pr_fmt(fmt) "clint: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CLINT_IPI_OFF 0 +#define CLINT_TIMER_CMP_OFF 0x4000 +#define CLINT_TIMER_VAL_OFF 0xbff8 + +/* CLINT manages IPI and Timer for RISC-V M-mode */ +static u32 __iomem *clint_ipi_base; +static u64 __iomem *clint_timer_cmp; +static u64 __iomem *clint_timer_val; +static unsigned long clint_timer_freq; +static unsigned int clint_timer_irq; + +static void clint_send_ipi(const struct cpumask *target) +{ + unsigned int cpu; + + for_each_cpu(cpu, target) + writel(1, clint_ipi_base + cpuid_to_hartid_map(cpu)); +} + +static void clint_clear_ipi(void) +{ + writel(0, clint_ipi_base + cpuid_to_hartid_map(smp_processor_id())); +} + +static struct riscv_ipi_ops clint_ipi_ops = { + .ipi_inject = clint_send_ipi, + .ipi_clear = clint_clear_ipi, +}; + +#ifdef CONFIG_64BIT +#define clint_get_cycles() readq_relaxed(clint_timer_val) +#else +#define clint_get_cycles() readl_relaxed(clint_timer_val) +#define clint_get_cycles_hi() readl_relaxed(((u32 *)clint_timer_val) + 1) +#endif + +#ifdef CONFIG_64BIT +static u64 notrace clint_get_cycles64(void) +{ + return clint_get_cycles(); +} +#else /* CONFIG_64BIT */ +static u64 notrace clint_get_cycles64(void) +{ + u32 hi, lo; + + do { + hi = clint_get_cycles_hi(); + lo = clint_get_cycles(); + } while (hi != clint_get_cycles_hi()); + + return ((u64)hi << 32) | lo; +} +#endif /* CONFIG_64BIT */ + +static u64 clint_rdtime(struct clocksource *cs) +{ + return clint_get_cycles64(); +} + +static struct clocksource clint_clocksource = { + .name = "clint_clocksource", + .rating = 300, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .read = clint_rdtime, +}; + +static int clint_clock_next_event(unsigned long delta, + struct clock_event_device *ce) +{ + void __iomem *r = clint_timer_cmp + + cpuid_to_hartid_map(smp_processor_id()); + + csr_set(CSR_IE, IE_TIE); + writeq_relaxed(clint_get_cycles64() + delta, r); + return 0; +} + +static DEFINE_PER_CPU(struct clock_event_device, clint_clock_event) = { + .name = "clint_clockevent", + .features = CLOCK_EVT_FEAT_ONESHOT, + .rating = 100, + .set_next_event = clint_clock_next_event, +}; + +static int clint_timer_starting_cpu(unsigned int cpu) +{ + struct clock_event_device *ce = per_cpu_ptr(&clint_clock_event, cpu); + + ce->cpumask = cpumask_of(cpu); + clockevents_config_and_register(ce, clint_timer_freq, 100, 0x7fffffff); + + enable_percpu_irq(clint_timer_irq, + irq_get_trigger_type(clint_timer_irq)); + return 0; +} + +static int clint_timer_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(clint_timer_irq); + return 0; +} + +static irqreturn_t clint_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evdev = this_cpu_ptr(&clint_clock_event); + + csr_clear(CSR_IE, IE_TIE); + evdev->event_handler(evdev); + + return IRQ_HANDLED; +} + +static int __init clint_timer_init_dt(struct device_node *np) +{ + int rc; + u32 i, nr_irqs; + void __iomem *base; + struct of_phandle_args oirq; + + /* + * Ensure that CLINT device interrupts are either RV_IRQ_TIMER or + * RV_IRQ_SOFT. If it's anything else then we ignore the device. + */ + nr_irqs = of_irq_count(np); + for (i = 0; i < nr_irqs; i++) { + if (of_irq_parse_one(np, i, &oirq)) { + pr_err("%pOFP: failed to parse irq %d.\n", np, i); + continue; + } + + if ((oirq.args_count != 1) || + (oirq.args[0] != RV_IRQ_TIMER && + oirq.args[0] != RV_IRQ_SOFT)) { + pr_err("%pOFP: invalid irq %d (hwirq %d)\n", + np, i, oirq.args[0]); + return -ENODEV; + } + + /* Find parent irq domain and map timer irq */ + if (!clint_timer_irq && + oirq.args[0] == RV_IRQ_TIMER && + irq_find_host(oirq.np)) + clint_timer_irq = irq_of_parse_and_map(np, i); + } + + /* If CLINT timer irq not found then fail */ + if (!clint_timer_irq) { + pr_err("%pOFP: timer irq not found\n", np); + return -ENODEV; + } + + base = of_iomap(np, 0); + if (!base) { + pr_err("%pOFP: could not map registers\n", np); + return -ENODEV; + } + + clint_ipi_base = base + CLINT_IPI_OFF; + clint_timer_cmp = base + CLINT_TIMER_CMP_OFF; + clint_timer_val = base + CLINT_TIMER_VAL_OFF; + clint_timer_freq = riscv_timebase; + + pr_info("%pOFP: timer running at %ld Hz\n", np, clint_timer_freq); + + rc = clocksource_register_hz(&clint_clocksource, clint_timer_freq); + if (rc) { + pr_err("%pOFP: clocksource register failed [%d]\n", np, rc); + goto fail_iounmap; + } + + sched_clock_register(clint_get_cycles64, 64, clint_timer_freq); + + rc = request_percpu_irq(clint_timer_irq, clint_timer_interrupt, + "clint-timer", &clint_clock_event); + if (rc) { + pr_err("registering percpu irq failed [%d]\n", rc); + goto fail_iounmap; + } + + rc = cpuhp_setup_state(CPUHP_AP_CLINT_TIMER_STARTING, + "clockevents/clint/timer:starting", + clint_timer_starting_cpu, + clint_timer_dying_cpu); + if (rc) { + pr_err("%pOFP: cpuhp setup state failed [%d]\n", np, rc); + goto fail_free_irq; + } + + riscv_set_ipi_ops(&clint_ipi_ops); + clint_clear_ipi(); + + return 0; + +fail_free_irq: + free_irq(clint_timer_irq, &clint_clock_event); +fail_iounmap: + iounmap(base); + return rc; +} + +TIMER_OF_DECLARE(clint_timer, "riscv,clint0", clint_timer_init_dt); +TIMER_OF_DECLARE(clint_timer1, "sifive,clint0", clint_timer_init_dt); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a2710e654b64..3215023d4852 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -132,6 +132,7 @@ enum cpuhp_state { CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, + CPUHP_AP_CLINT_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, -- cgit v1.2.3