From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: New guest debug interface This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic part, controlling the "main switch" and the single-step feature. The arch specific part adds an x86 interface for intercepting both types of debug exceptions separately and re-injecting them when the host was not interested. Moveover, the foundation for guest debugging via debug registers is layed. To signal breakpoint events properly back to userland, an arch-specific data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block contains the PC, the debug exception, and relevant debug registers to tell debug events properly apart. The availability of this new interface is signaled by KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are provided. Note that both SVM and VTX are supported, but only the latter was tested yet. Based on the experience with all those VTX corner case, I would be fairly surprised if SVM will work out of the box. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bf6f703642fc..e92212f970db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -73,7 +73,7 @@ struct kvm_vcpu { struct kvm_run *run; int guest_mode; unsigned long requests; - struct kvm_guest_debug guest_debug; + unsigned long guest_debug; int fpu_active; int guest_fpu_loaded; wait_queue_head_t wq; @@ -255,8 +255,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); int kvm_arch_init(void *opaque); -- cgit v1.2.3 From 67346440e83d2a2f2e9801f370b6240317c7d9bd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:01 +0800 Subject: KVM: Remove duplicated prototype of kvm_arch_destroy_vm Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e92212f970db..3cf0ede3fd73 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -237,7 +237,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, int user_alloc); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -void kvm_arch_destroy_vm(struct kvm *kvm); int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); -- cgit v1.2.3 From 75858a84a6207f5e60196f6bbd18fde4250e5759 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 17:10:50 +0200 Subject: KVM: Interrupt mask notifiers for ioapic Allow clients to request notifications when the guest masks or unmasks a particular irq line. This complements irq ack notifications, as the guest will not ack an irq line that is masked. Currently implemented for the ioapic only. Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 17 +++++++++++++++++ virt/kvm/ioapic.c | 6 ++++++ virt/kvm/irq_comm.c | 24 ++++++++++++++++++++++++ virt/kvm/kvm_main.c | 3 +++ 4 files changed, 50 insertions(+) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3cf0ede3fd73..99963f36a6db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -127,6 +127,10 @@ struct kvm { struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; #endif +#ifdef CONFIG_HAVE_KVM_IRQCHIP + struct hlist_head mask_notifier_list; +#endif + #ifdef KVM_ARCH_WANT_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_notifier_seq; @@ -320,6 +324,19 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; }; + +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 23b81cf242af..e85a2bcd2db1 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -101,6 +101,7 @@ static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; + bool mask_before, mask_after; switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -120,6 +121,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + mask_before = ioapic->redirtbl[index].fields.mask; if (ioapic->ioregsel & 1) { ioapic->redirtbl[index].bits &= 0xffffffff; ioapic->redirtbl[index].bits |= (u64) val << 32; @@ -128,6 +130,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic->redirtbl[index].bits |= (u32) val; ioapic->redirtbl[index].fields.remote_irr = 0; } + mask_after = ioapic->redirtbl[index].fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); if (ioapic->irr & (1 << index)) ioapic_service(ioapic, index); break; @@ -426,3 +431,4 @@ int kvm_ioapic_init(struct kvm *kvm) kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); return 0; } + diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index aa5d1e5c497e..5162a411e4d2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -99,3 +99,27 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) clear_bit(irq_source_id, &kvm->arch.irq_states[i]); clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); } + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + kimn->irq = irq; + hlist_add_head(&kimn->link, &kvm->mask_notifier_list); +} + +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + hlist_del(&kimn->link); +} + +void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) +{ + struct kvm_irq_mask_notifier *kimn; + struct hlist_node *n; + + hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) + if (kimn->irq == irq) + kimn->func(kimn, mask); +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04401e17c758..786a3ae373b0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -842,6 +842,9 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); -- cgit v1.2.3 From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 19 Nov 2008 13:58:46 +0200 Subject: KVM: Userspace controlled irq routing Currently KVM has a static routing from GSI numbers to interrupts (namely, 0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to the IOAPIC). This is insufficient for several reasons: - HPET requires non 1:1 mapping for the timer interrupt - MSIs need a new method to assign interrupt numbers and dispatch them - ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the ioapics This patch implements an interrupt routing table (as a linked list, but this can be easily changed) and a userspace interface to replace the table. The routing table is initialized according to the current hardwired mapping. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 5 ++ arch/x86/kvm/x86.c | 6 ++ include/linux/kvm.h | 33 ++++++++++ include/linux/kvm_host.h | 31 +++++++++ virt/kvm/irq_comm.c | 168 +++++++++++++++++++++++++++++++++++++++++++++-- virt/kvm/kvm_main.c | 36 ++++++++++ 6 files changed, 275 insertions(+), 4 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 1477f91617a5..dbf527a57341 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -919,6 +919,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 141a0166e51c..32e3a7ec6ad2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp, } } else goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vpic); + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_CREATE_PIT: mutex_lock(&kvm->lock); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 73f348066866..7a5d73a8d4fa 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -398,6 +398,38 @@ struct kvm_trace_rec { #endif #if defined(CONFIG_X86) #define KVM_CAP_REINJECT_CONTROL 24 +#endif +#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#define KVM_CAP_IRQ_ROUTING 25 +#endif + +#ifdef KVM_CAP_IRQ_ROUTING + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + __u32 pad[8]; + } u; +}; + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + #endif /* @@ -430,6 +462,7 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 99963f36a6db..ce285e01bd57 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -107,6 +107,19 @@ struct kvm_memory_slot { int user_alloc; }; +struct kvm_kernel_irq_routing_entry { + u32 gsi; + void (*set)(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level); + union { + struct { + unsigned irqchip; + unsigned pin; + } irqchip; + }; + struct list_head link; +}; + struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; @@ -128,6 +141,7 @@ struct kvm { #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP + struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ struct hlist_head mask_notifier_list; #endif @@ -480,4 +494,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se } #endif +#ifdef CONFIG_HAVE_KVM_IRQCHIP + +#define KVM_MAX_IRQ_ROUTES 1024 + +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *entries, + unsigned nr, + unsigned flags); +void kvm_free_irq_routing(struct kvm *kvm); + +#else + +static inline void kvm_free_irq_routing(struct kvm *kvm) {} + +#endif + #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 5162a411e4d2..a797fa5e6420 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -24,9 +24,24 @@ #include "ioapic.h" +static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ +#ifdef CONFIG_X86 + kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); +#endif +} + +static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); +} + /* This should be called with the kvm->lock mutex held */ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { + struct kvm_kernel_irq_routing_entry *e; unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; /* Logical OR for level trig interrupt */ @@ -39,10 +54,9 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); -#ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); -#endif + list_for_each_entry(e, &kvm->irq_routing, link) + if (e->gsi == irq) + e->set(e, kvm, !!(*irq_state)); } void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) @@ -123,3 +137,149 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } +static void __kvm_free_irq_routing(struct list_head *irq_routing) +{ + struct kvm_kernel_irq_routing_entry *e, *n; + + list_for_each_entry_safe(e, n, irq_routing, link) + kfree(e); +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + __kvm_free_irq_routing(&kvm->irq_routing); +} + +int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + int delta; + + e->gsi = ue->gsi; + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; + switch (ue->u.irqchip.irqchip) { + case KVM_IRQCHIP_PIC_MASTER: + e->set = kvm_set_pic_irq; + break; + case KVM_IRQCHIP_PIC_SLAVE: + e->set = kvm_set_pic_irq; + delta = 8; + break; + case KVM_IRQCHIP_IOAPIC: + e->set = kvm_set_ioapic_irq; + break; + default: + goto out; + } + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin + delta; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct list_head irq_list = LIST_HEAD_INIT(irq_list); + struct list_head tmp = LIST_HEAD_INIT(tmp); + struct kvm_kernel_irq_routing_entry *e = NULL; + unsigned i; + int r; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->gsi >= KVM_MAX_IRQ_ROUTES) + goto out; + if (ue->flags) + goto out; + r = -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + goto out; + r = setup_routing_entry(e, ue); + if (r) + goto out; + ++ue; + list_add(&e->link, &irq_list); + e = NULL; + } + + mutex_lock(&kvm->lock); + list_splice(&kvm->irq_routing, &tmp); + INIT_LIST_HEAD(&kvm->irq_routing); + list_splice(&irq_list, &kvm->irq_routing); + INIT_LIST_HEAD(&irq_list); + list_splice(&tmp, &irq_list); + mutex_unlock(&kvm->lock); + + r = 0; + +out: + kfree(e); + __kvm_free_irq_routing(&irq_list); + return r; +} + +#define IOAPIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } +#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) + +#ifdef CONFIG_X86 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) +# define PIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) +#else +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq) +#endif + +static const struct kvm_irq_routing_entry default_routing[] = { + ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), + ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), + ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), + ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), + ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), + ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), + ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), + ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), + ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), + ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), + ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), + ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), +#ifdef CONFIG_IA64 + ROUTING_ENTRY1(24), ROUTING_ENTRY1(25), + ROUTING_ENTRY1(26), ROUTING_ENTRY1(27), + ROUTING_ENTRY1(28), ROUTING_ENTRY1(29), + ROUTING_ENTRY1(30), ROUTING_ENTRY1(31), + ROUTING_ENTRY1(32), ROUTING_ENTRY1(33), + ROUTING_ENTRY1(34), ROUTING_ENTRY1(35), + ROUTING_ENTRY1(36), ROUTING_ENTRY1(37), + ROUTING_ENTRY1(38), ROUTING_ENTRY1(39), + ROUTING_ENTRY1(40), ROUTING_ENTRY1(41), + ROUTING_ENTRY1(42), ROUTING_ENTRY1(43), + ROUTING_ENTRY1(44), ROUTING_ENTRY1(45), + ROUTING_ENTRY1(46), ROUTING_ENTRY1(47), +#endif +}; + +int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, default_routing, + ARRAY_SIZE(default_routing), 0); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 786a3ae373b0..c65484b471c6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -843,6 +843,7 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_LIST_HEAD(&kvm->irq_routing); INIT_HLIST_HEAD(&kvm->mask_notifier_list); #endif @@ -926,6 +927,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -1945,6 +1947,36 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); @@ -2012,6 +2044,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: return 1; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + case KVM_CAP_IRQ_ROUTING: + return 1; +#endif default: break; } -- cgit v1.2.3 From 44882eed2ebe7f75f8cdae5671ab1d6e0fa40dbc Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Jan 2009 15:12:38 -0200 Subject: KVM: make irq ack notifications aware of routing table IRQ ack notifications assume an identity mapping between pin->gsi, which might not be the case with, for example, HPET. Translate before acking. Signed-off-by: Marcelo Tosatti Acked-by: Gleb Natapov --- arch/x86/kvm/i8259.c | 5 +++-- arch/x86/kvm/irq.h | 2 ++ include/linux/kvm_host.h | 2 +- virt/kvm/ioapic.c | 10 +++++----- virt/kvm/irq_comm.c | 13 ++++++++++--- 5 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 179dcb0103fd..93160375c841 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -49,7 +49,8 @@ static void pic_unlock(struct kvm_pic *s) spin_unlock(&s->lock); while (acks) { - kvm_notify_acked_irq(kvm, __ffs(acks)); + kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)), + __ffs(acks)); acks &= acks - 1; } @@ -232,7 +233,7 @@ int kvm_pic_read_irq(struct kvm *kvm) } pic_update_irq(s); pic_unlock(s); - kvm_notify_acked_irq(kvm, irq); + kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq); return intno; } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 82579ee538d0..9f593188129e 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -32,6 +32,8 @@ #include "lapic.h" #define PIC_NUM_PINS 16 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) struct kvm; struct kvm_vcpu; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce285e01bd57..c03a0a9a8584 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e85a2bcd2db1..1c986ac59ad6 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -293,20 +293,20 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, int trigger_mode) { union ioapic_redir_entry *ent; - ent = &ioapic->redirtbl[gsi]; + ent = &ioapic->redirtbl[pin]; - kvm_notify_acked_irq(ioapic->kvm, gsi); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); if (trigger_mode == IOAPIC_LEVEL_TRIG) { ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_service(ioapic, gsi); + if (!ent->fields.mask && (ioapic->irr & (1 << pin))) + ioapic_service(ioapic, pin); } } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a797fa5e6420..7aa5086c8622 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -59,10 +59,19 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) e->set(e, kvm, !!(*irq_state)); } -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_ack_notifier *kian; struct hlist_node *n; + unsigned gsi = pin; + + list_for_each_entry(e, &kvm->irq_routing, link) + if (e->irqchip.irqchip == irqchip && + e->irqchip.pin == pin) { + gsi = e->gsi; + break; + } hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) if (kian->gsi == gsi) @@ -237,8 +246,6 @@ out: #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 -#define SELECT_PIC(irq) \ - ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } -- cgit v1.2.3 From 79950e1073150909619b7c0f9a39a2fea83a42d8 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 10 Feb 2009 13:57:06 +0800 Subject: KVM: Use irq routing API for MSI Merge MSI userspace interface with IRQ routing table. Notice the API have been changed, and using IRQ routing table would be the only interface kvm-userspace supported. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/linux/kvm.h | 9 ++++++ include/linux/kvm_host.h | 2 +- virt/kvm/irq_comm.c | 78 +++++++++++++++++++++++++++++++++++++++++++----- virt/kvm/kvm_main.c | 70 ++++--------------------------------------- 4 files changed, 86 insertions(+), 73 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 869462ca7625..2163b3dd36e7 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -410,8 +410,16 @@ struct kvm_irq_routing_irqchip { __u32 pin; }; +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 struct kvm_irq_routing_entry { __u32 gsi; @@ -420,6 +428,7 @@ struct kvm_irq_routing_entry { __u32 pad; union { struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; __u32 pad[8]; } u; }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c03a0a9a8584..339eda3ca6ee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -116,6 +116,7 @@ struct kvm_kernel_irq_routing_entry { unsigned irqchip; unsigned pin; } irqchip; + struct msi_msg msi; }; struct list_head link; }; @@ -327,7 +328,6 @@ struct kvm_assigned_dev_kernel { int host_irq; bool host_irq_disabled; int guest_irq; - struct msi_msg guest_msi; #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 7aa5086c8622..6bc7439eff6e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -20,6 +20,11 @@ */ #include + +#ifdef CONFIG_X86 +#include +#endif + #include "irq.h" #include "ioapic.h" @@ -38,17 +43,70 @@ static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } +static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + int vcpu_id; + struct kvm_vcpu *vcpu; + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) + >> MSI_ADDR_DEST_ID_SHIFT; + int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) + >> MSI_DATA_VECTOR_SHIFT; + int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, + (unsigned long *)&e->msi.address_lo); + int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, + (unsigned long *)&e->msi.data); + int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, + (unsigned long *)&e->msi.data); + u32 deliver_bitmask; + + BUG_ON(!ioapic); + + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, + dest_id, dest_mode); + /* IOAPIC delivery mode value is the same as MSI here */ + switch (delivery_mode) { + case IOAPIC_LOWEST_PRIORITY: + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, + deliver_bitmask); + if (vcpu != NULL) + kvm_apic_set_irq(vcpu, vector, trig_mode); + else + printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); + break; + case IOAPIC_FIXED: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) + kvm_apic_set_irq(vcpu, vector, trig_mode); + } + break; + default: + break; + } +} + /* This should be called with the kvm->lock mutex held */ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; + unsigned long *irq_state, sig_level; + + if (irq < KVM_IOAPIC_NUM_PINS) { + irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + sig_level = !!(*irq_state); + } else /* Deal with MSI/MSI-X */ + sig_level = 1; /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore @@ -56,7 +114,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) */ list_for_each_entry(e, &kvm->irq_routing, link) if (e->gsi == irq) - e->set(e, kvm, !!(*irq_state)); + e->set(e, kvm, sig_level); } void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) @@ -186,6 +244,12 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; default: goto out; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c65484b471c6..266bdaf0ce44 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,10 +47,6 @@ #include #include -#ifdef CONFIG_X86 -#include -#endif - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" #endif @@ -85,57 +81,6 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static bool kvm_rebooting; #ifdef KVM_CAP_DEVICE_ASSIGNMENT - -#ifdef CONFIG_X86 -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) -{ - int vcpu_id; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); - int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&dev->guest_msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&dev->guest_msi.data); - u32 deliver_bitmask; - - BUG_ON(!ioapic); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) - kvm_apic_set_irq(vcpu, vector, trig_mode); - } - break; - default: - printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); - } -} -#else -static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} -#endif - static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { @@ -162,13 +107,10 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&assigned_dev->kvm->lock); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - else if (assigned_dev->irq_requested_type & - KVM_ASSIGNED_DEV_GUEST_MSI) { - assigned_device_msi_dispatch(assigned_dev); + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { enable_irq(assigned_dev->host_irq); assigned_dev->host_irq_disabled = false; } @@ -331,17 +273,15 @@ static int assigned_device_update_msi(struct kvm *kvm, { int r; + adev->guest_irq = airq->guest_irq; if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { /* x86 don't care upper address of guest msi message addr */ adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->guest_msi.address_lo = airq->guest_msi.addr_lo; - adev->guest_msi.data = airq->guest_msi.data; adev->ack_notifier.gsi = -1; } else if (msi2intx) { adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->guest_irq = airq->guest_irq; adev->ack_notifier.gsi = airq->guest_irq; } else { /* -- cgit v1.2.3 From c807660407a695f390034e402edfe544a1d2e40c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 4 Feb 2009 17:52:04 +0100 Subject: KVM: Fix kvmclock on !constant_tsc boxes kvmclock currently falls apart on machines without constant tsc. This patch fixes it. Changes: * keep tsc frequency in a per-cpu variable. * handle kvmclock update using a new request flag, thus checking whenever we need an update each time we enter guest context. * use a cpufreq notifier to track frequency changes and force kvmclock updates. * send ipis to kick cpu out of guest context if needed to make sure the guest doesn't see stale values. Signed-off-by: Gerd Hoffmann Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 103 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/kvm_host.h | 1 + 2 files changed, 95 insertions(+), 9 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f83590b47dd..05d7be89b5eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * hv_clock->tsc_to_system_mul); } +static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { - kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = tsc_khz; + if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { + kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); } /* Keep irq disabled to prevent changes to the clock */ @@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static int kvm_request_guest_time_update(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + + if (!vcpu->time_page) + return 0; + set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); + return 1; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = NULL; } - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); break; } default: @@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; - case KVM_CAP_CLOCKSOURCE: - r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); - break; default: r = 0; break; @@ -1098,7 +1109,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +static void bounce_off(void *info) +{ + /* nothing */ +} + +static unsigned int ref_freq; +static unsigned long tsc_khz_ref; + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i, send_ipi = 0; + + if (!ref_freq) + ref_freq = freq->old; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->cpu != freq->cpu) + continue; + if (!kvm_request_guest_time_update(vcpu)) + continue; + if (vcpu->cpu != smp_processor_id()) + send_ipi++; + } + } + spin_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* + * We upscale the frequency. Must make the guest + * doesn't see old kvmclock values while running with + * the new frequency, otherwise we risk the guest sees + * time go backwards. + * + * In case we update the frequency for another cpu + * (which might be in guest context) send an interrupt + * to kick the cpu out of guest context. Next time + * guest context is entered kvmclock will be updated, + * so the guest will not see stale values. + */ + smp_call_function_single(freq->cpu, bounce_off, NULL, 1); + } + return 0; +} + +static struct notifier_block kvmclock_cpufreq_notifier_block = { + .notifier_call = kvmclock_cpufreq_notifier +}; + int kvm_arch_init(void *opaque) { - int r; + int r, cpu; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + return 0; out: @@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) + kvm_write_guest_time(vcpu); if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) kvm_mmu_sync_roots(vcpu); if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 339eda3ca6ee..18b4df8264cf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -37,6 +37,7 @@ #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 +#define KVM_REQ_KVMCLOCK_UPDATE 8 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 -- cgit v1.2.3 From 4925663a079c77d95d8685228ad6675fc5639c8e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 Feb 2009 17:28:14 +0200 Subject: KVM: Report IRQ injection status to userspace. IRQ injection status is either -1 (if there was no CPU found that should except the interrupt because IRQ was masked or ioapic was misconfigured or ...) or >= 0 in that case the number indicates to how many CPUs interrupt was injected. If the value is 0 it means that the interrupt was coalesced and probably should be reinjected. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 12 ++++++++++-- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/i8259.c | 18 +++++++++++++----- arch/x86/kvm/x86.c | 13 +++++++++++-- include/linux/kvm.h | 7 ++++++- include/linux/kvm_host.h | 4 ++-- virt/kvm/ioapic.c | 23 ++++++++++++++++------- virt/kvm/ioapic.h | 2 +- virt/kvm/irq_comm.c | 41 ++++++++++++++++++++++++++++------------- 9 files changed, 88 insertions(+), 34 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9c77e3939e97..076b00d1dbff 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: case KVM_CAP_MP_STATE: - + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -927,6 +927,7 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; } break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -934,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55fd4c5fd388..f0faf58044ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -616,7 +616,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); -void kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_set_irq(void *opaque, int irq, int level); void kvm_inject_nmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93160375c841..b4e662e94ddc 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -77,12 +77,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm) /* * set irq level. If an edge is detected, then the IRR is set to 1 */ -static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) { - int mask; + int mask, ret = 1; mask = 1 << irq; if (s->elcr & mask) /* level triggered */ if (level) { + ret = !(s->irr & mask); s->irr |= mask; s->last_irr |= mask; } else { @@ -91,11 +92,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) } else /* edge triggered */ if (level) { - if ((s->last_irr & mask) == 0) + if ((s->last_irr & mask) == 0) { + ret = !(s->irr & mask); s->irr |= mask; + } s->last_irr |= mask; } else s->last_irr &= ~mask; + + return (s->imr & mask) ? -1 : ret; } /* @@ -172,16 +177,19 @@ void kvm_pic_update_irq(struct kvm_pic *s) pic_unlock(s); } -void kvm_pic_set_irq(void *opaque, int irq, int level) +int kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; + int ret = -1; pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } pic_unlock(s); + + return ret; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d7be89b5eb..e4db5be7c953 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1019,6 +1019,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: case KVM_CAP_REINJECT_CONTROL: + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1877,6 +1878,7 @@ long kvm_arch_vm_ioctl(struct file *filp, create_pit_unlock: mutex_unlock(&kvm->lock); break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -1884,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2163b3dd36e7..dd48225d1824 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -48,7 +48,10 @@ struct kvm_irq_level { * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. */ - __u32 irq; + union { + __u32 irq; + __s32 status; + }; __u32 level; }; @@ -402,6 +405,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 #endif +#define KVM_CAP_IRQ_INJECT_STATUS 26 #ifdef KVM_CAP_IRQ_ROUTING @@ -465,6 +469,7 @@ struct kvm_irq_routing { #define KVM_CREATE_PIT _IO(KVMIO, 0x64) #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) #define KVM_REGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18b4df8264cf..894a56e365e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -110,7 +110,7 @@ struct kvm_memory_slot { struct kvm_kernel_irq_routing_entry { u32 gsi; - void (*set)(struct kvm_kernel_irq_routing_entry *e, + int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level); union { struct { @@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); -void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); +int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1c986ac59ad6..c3b99def9cbc 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -83,19 +83,22 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } -static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { union ioapic_redir_entry *pent; + int injected = -1; pent = &ioapic->redirtbl[idx]; if (!pent->fields.mask) { - int injected = ioapic_deliver(ioapic, idx); + injected = ioapic_deliver(ioapic, idx); if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } if (!pent->fields.trig_mode) ioapic->irr &= ~(1 << idx); + + return injected; } static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) @@ -207,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; u32 deliver_bitmask; struct kvm_vcpu *vcpu; - int vcpu_id, r = 0; + int vcpu_id, r = -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", @@ -247,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) deliver_bitmask &= ~(1 << vcpu_id); vcpu = ioapic->kvm->vcpus[vcpu_id]; if (vcpu) { - r = ioapic_inj_irq(ioapic, vcpu, vector, + if (r < 0) + r = 0; + r += ioapic_inj_irq(ioapic, vcpu, vector, trig_mode, delivery_mode); } } @@ -258,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) continue; deliver_bitmask &= ~(1 << vcpu_id); vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) + if (vcpu) { ioapic_inj_nmi(vcpu); + r = 1; + } else ioapic_debug("NMI to vcpu %d failed\n", vcpu->vcpu_id); @@ -273,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) return r; } -void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { u32 old_irr = ioapic->irr; u32 mask = 1 << irq; union ioapic_redir_entry entry; + int ret = 1; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; @@ -288,9 +296,10 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) ioapic->irr |= mask; if ((!entry.fields.trig_mode && old_irr != ioapic->irr) || !entry.fields.remote_irr) - ioapic_service(ioapic, irq); + ret = ioapic_service(ioapic, irq); } } + return ret; } static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 49c9581d2586..a34bd5e6436b 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, unsigned long bitmap); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); -void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, u8 dest_mode); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 6bc7439eff6e..be8aba791554 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -29,22 +29,24 @@ #include "ioapic.h" -static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { #ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); + return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); +#else + return -1; #endif } -static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { - kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); + return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { int vcpu_id; struct kvm_vcpu *vcpu; @@ -88,13 +90,20 @@ static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, default: break; } + return 1; } -/* This should be called with the kvm->lock mutex held */ -void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) +/* This should be called with the kvm->lock mutex held + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; unsigned long *irq_state, sig_level; + int ret = -1; if (irq < KVM_IOAPIC_NUM_PINS) { irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; @@ -113,8 +122,14 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * writes to the unused one. */ list_for_each_entry(e, &kvm->irq_routing, link) - if (e->gsi == irq) - e->set(e, kvm, sig_level); + if (e->gsi == irq) { + int r = e->set(e, kvm, sig_level); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + return ret; } void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) @@ -232,7 +247,7 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->set = kvm_set_pic_irq; break; case KVM_IRQCHIP_PIC_SLAVE: - e->set = kvm_set_pic_irq; + e->set = kvm_set_pic_irq; delta = 8; break; case KVM_IRQCHIP_IOAPIC: -- cgit v1.2.3