From c10207fe86b1761c3ad135eb922fdb41bbde3025 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 19 Feb 2010 11:00:45 +0100 Subject: KVM: PPC: Add capability for paired singles We need to tell userspace that we can emulate paired single instructions. So let's add a capability export. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 60df9c84ecae..360f85e8c435 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -501,6 +501,7 @@ struct kvm_ioeventfd { #define KVM_CAP_HYPERV_VAPIC 45 #define KVM_CAP_HYPERV_SPIN 46 #define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_PPC_PAIRED_SINGLES 48 #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 48005f64d0ea965d454e38b5181af4aba9bdef5b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 19 Feb 2010 19:38:07 +0100 Subject: KVM: x86: Save&restore interrupt shadow mask The interrupt shadow created by STI or MOV-SS-like operations is part of the VCPU state and must be preserved across migration. Transfer it in the spare padding field of kvm_vcpu_events.interrupt. As a side effect we now have to make vmx_set_interrupt_shadow robust against both shadow types being set. Give MOV SS a higher priority and skip STI in that case to avoid that VMX throws a fault on next entry. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 11 ++++++++++- arch/x86/include/asm/kvm.h | 7 ++++++- arch/x86/include/asm/kvm_emulate.h | 3 --- arch/x86/kvm/emulate.c | 4 ++-- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 8 ++++---- arch/x86/kvm/x86.c | 12 ++++++++++-- include/linux/kvm.h | 1 + 8 files changed, 34 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index beb444a95013..9e5de5a1c4ef 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -656,6 +656,7 @@ struct kvm_clock_data { 4.29 KVM_GET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (out) @@ -676,7 +677,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; @@ -688,9 +689,13 @@ struct kvm_vcpu_events { __u32 flags; }; +KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that +interrupt.shadow contains a valid state. Otherwise, this field is undefined. + 4.30 KVM_SET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (in) @@ -709,6 +714,10 @@ current in-kernel state. The bits are: KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector +If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in +the flags field to signal that interrupt.shadow contains a valid state and +shall be written into the VCPU. + 5. The kvm_run structure diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..fb6117063ea3 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -257,6 +257,11 @@ struct kvm_reinject_control { /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 +#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 + +/* Interrupt shadow states */ +#define KVM_X86_SHADOW_INT_MOV_SS 0x01 +#define KVM_X86_SHADOW_INT_STI 0x02 /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { @@ -271,7 +276,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..2666d7ac3229 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -153,9 +153,6 @@ struct decode_cache { struct fetch_cache fetch; }; -#define X86_SHADOW_INT_MOV_SS 1 -#define X86_SHADOW_INT_STI 2 - struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 35f7acd4a91f..c9f604b0819c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2128,7 +2128,7 @@ special_insn: } if (c->modrm_reg == VCPU_SREG_SS) - toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); @@ -2366,7 +2366,7 @@ special_insn: if (emulator_bad_iopl(ctxt)) kvm_inject_gp(ctxt->vcpu, 0); else { - toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); ctxt->eflags |= X86_EFLAGS_IF; c->dst.type = OP_NONE; /* Disable writeback. */ } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 294bbca34173..bd8f52f0823f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -265,7 +265,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 68f895b00450..61f03980adae 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -846,9 +846,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) int ret = 0; if (interruptibility & GUEST_INTR_STATE_STI) - ret |= X86_SHADOW_INT_STI; + ret |= KVM_X86_SHADOW_INT_STI; if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -860,9 +860,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - if (mask & X86_SHADOW_INT_MOV_SS) + if (mask & KVM_X86_SHADOW_INT_MOV_SS) interruptibility |= GUEST_INTR_STATE_MOV_SS; - if (mask & X86_SHADOW_INT_STI) + else if (mask & KVM_X86_SHADOW_INT_STI) interruptibility |= GUEST_INTR_STATE_STI; if ((interruptibility != interruptibility_old)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2b1c9f2fb8dd..84ffd95ee198 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2111,6 +2111,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; + events->interrupt.shadow = + kvm_x86_ops->get_interrupt_shadow(vcpu, + KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; @@ -2119,7 +2122,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR); + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW); vcpu_put(vcpu); } @@ -2128,7 +2132,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW)) return -EINVAL; vcpu_load(vcpu); @@ -2143,6 +2148,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.soft = events->interrupt.soft; if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) kvm_pic_clear_isr_ack(vcpu->kvm); + if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) + kvm_x86_ops->set_interrupt_shadow(vcpu, + events->interrupt.shadow); vcpu->arch.nmi_injected = events->nmi.injected; if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 360f85e8c435..48516a2a0b84 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -502,6 +502,7 @@ struct kvm_ioeventfd { #define KVM_CAP_HYPERV_SPIN 46 #define KVM_CAP_PCI_SEGMENT 47 #define KVM_CAP_PPC_PAIRED_SINGLES 48 +#define KVM_CAP_INTR_SHADOW 49 #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a1efbe77c1fd7c34a97a76a61520bf23fb3663f6 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Feb 2010 10:45:43 +0100 Subject: KVM: x86: Add support for saving&restoring debug registers So far user space was not able to save and restore debug registers for migration or after reset. Plug this hole. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 31 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm.h | 10 +++++++++ arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 6 ++++++ 4 files changed, 101 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 9e5de5a1c4ef..d170cb435545 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -718,6 +718,37 @@ If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in the flags field to signal that interrupt.shadow contains a valid state and shall be written into the VCPU. +4.32 KVM_GET_DEBUGREGS + +Capability: KVM_CAP_DEBUGREGS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_debugregs (out) +Returns: 0 on success, -1 on error + +Reads debug registers from the vcpu. + +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + +4.33 KVM_SET_DEBUGREGS + +Capability: KVM_CAP_DEBUGREGS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_debugregs (in) +Returns: 0 on success, -1 on error + +Writes debug registers into the vcpu. + +See KVM_GET_DEBUGREGS for the data structure. The flags field is unused +yet and must be cleared on entry. + 5. The kvm_run structure diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index fb6117063ea3..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -21,6 +21,7 @@ #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS +#define __KVM_HAVE_DEBUGREGS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -289,4 +290,13 @@ struct kvm_vcpu_events { __u32 reserved[10]; }; +/* for KVM_GET/SET_DEBUGREGS */ +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 84ffd95ee198..efeeabd84ecd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1548,6 +1548,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: case KVM_CAP_PCI_SEGMENT: + case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: r = 1; break; @@ -2165,6 +2166,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) +{ + vcpu_load(vcpu); + + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); + dbgregs->dr6 = vcpu->arch.dr6; + dbgregs->dr7 = vcpu->arch.dr7; + dbgregs->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) +{ + if (dbgregs->flags) + return -EINVAL; + + vcpu_load(vcpu); + + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); + vcpu->arch.dr6 = dbgregs->dr6; + vcpu->arch.dr7 = dbgregs->dr7; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2343,6 +2374,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); break; } + case KVM_GET_DEBUGREGS: { + struct kvm_debugregs dbgregs; + + kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + + r = -EFAULT; + if (copy_to_user(argp, &dbgregs, + sizeof(struct kvm_debugregs))) + break; + r = 0; + break; + } + case KVM_SET_DEBUGREGS: { + struct kvm_debugregs dbgregs; + + r = -EFAULT; + if (copy_from_user(&dbgregs, argp, + sizeof(struct kvm_debugregs))) + break; + + r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 48516a2a0b84..ce2876717a8b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -503,6 +503,9 @@ struct kvm_ioeventfd { #define KVM_CAP_PCI_SEGMENT 47 #define KVM_CAP_PPC_PAIRED_SINGLES 48 #define KVM_CAP_INTR_SHADOW 49 +#ifdef __KVM_HAVE_DEBUGREGS +#define KVM_CAP_DEBUGREGS 50 +#endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 #ifdef KVM_CAP_IRQ_ROUTING @@ -690,6 +693,9 @@ struct kvm_clock_data { /* Available with KVM_CAP_VCPU_EVENTS */ #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) +/* Available with KVM_CAP_DEBUGREGS */ +#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) +#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 18978768d89f638165646718c50ced19f2a10164 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 24 Mar 2010 21:48:18 +0100 Subject: KVM: PPC: Allow userspace to unset the IRQ line Userspace can tell us that it wants to trigger an interrupt. But so far it can't tell us that it wants to stop triggering one. So let's interpret the parameter to the ioctl that we have anyways to tell us if we want to raise or lower the interrupt line. Signed-off-by: Alexander Graf v2 -> v3: - Add CAP for unset irq Signed-off-by: Avi Kivity --- arch/powerpc/include/asm/kvm.h | 3 +++ arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s.c | 6 ++++++ arch/powerpc/kvm/powerpc.c | 6 +++++- include/linux/kvm.h | 1 + 5 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 19bae31202ce..6c5547d82bbe 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -84,4 +84,7 @@ struct kvm_guest_debug_arch { #define KVM_REG_QPR 0x0040 #define KVM_REG_FQPR 0x0060 +#define KVM_INTERRUPT_SET -1U +#define KVM_INTERRUPT_UNSET -2U + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c7fcdd751f14..6a2464e4d6b9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -92,6 +92,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ff5a42058257..34e1a342bec8 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -231,6 +231,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); } +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3f8677e9d8f9..0bb6a7e826d1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -149,6 +149,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_PAIRED_SINGLES: + case KVM_CAP_PPC_UNSET_IRQ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -451,7 +452,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_core_queue_external(vcpu, irq); + if (irq->irq == KVM_INTERRUPT_UNSET) + kvmppc_core_dequeue_external(vcpu, irq); + else + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ce2876717a8b..c36d093e9806 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -507,6 +507,7 @@ struct kvm_ioeventfd { #define KVM_CAP_DEBUGREGS 50 #endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_PPC_UNSET_IRQ 53 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 71fbfd5f38f73515f1516a68fbe04dba198b70f0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 24 Mar 2010 21:48:29 +0100 Subject: KVM: Add support for enabling capabilities per-vcpu Some times we don't want all capabilities to be available to all our vcpus. One example for that is the OSI interface, implemented in the next patch. In order to have a generic mechanism in how to enable capabilities individually, this patch introduces a new ioctl that can be used for this purpose. That way features we don't want in all guests or userspace configurations can just not be enabled and we're good. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 35 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 27 +++++++++++++++++++++++++++ include/linux/kvm.h | 11 +++++++++++ 3 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 2cc0120ccdb5..f9724dc8d079 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -813,6 +813,41 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware because of a quirk in the virtualization implementation (see the internals documentation when it pops into existence). +4.36 KVM_ENABLE_CAP + +Capability: KVM_CAP_ENABLE_CAP +Architectures: ppc +Type: vcpu ioctl +Parameters: struct kvm_enable_cap (in) +Returns: 0 on success; -1 on error + ++Not all extensions are enabled by default. Using this ioctl the application +can enable an extension, making it available to the guest. + +On systems that do not support this ioctl, it always fails. On systems that +do support it, it only works for extensions that are supported for enablement. + +To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should +be used. + +struct kvm_enable_cap { + /* in */ + __u32 cap; + +The capability that is supposed to get enabled. + + __u32 flags; + +A bitfield indicating future enhancements. Has to be 0 for now. + + __u64 args[4]; + +Arguments for enabling a feature. If a feature needs initial values to +function properly, this is the place to put them. + + __u8 pad[64]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0bb6a7e826d1..646bfd256e5d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -150,6 +150,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -465,6 +466,23 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + default: + r = -EINVAL; + break; + } + + return r; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -493,6 +511,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c36d093e9806..ecb68e433558 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -400,6 +400,15 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +/* for KVM_ENABLE_CAP */ +struct kvm_enable_cap { + /* in */ + __u32 cap; + __u32 flags; + __u64 args[4]; + __u8 pad[64]; +}; + #define KVMIO 0xAE /* @@ -508,6 +517,7 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 #define KVM_CAP_PPC_UNSET_IRQ 53 +#define KVM_CAP_ENABLE_CAP 54 #ifdef KVM_CAP_IRQ_ROUTING @@ -697,6 +707,7 @@ struct kvm_clock_data { /* Available with KVM_CAP_DEBUGREGS */ #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From ad0a048b096ac819f28667602285453468a8d8f9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 24 Mar 2010 21:48:30 +0100 Subject: KVM: PPC: Add OSI hypercall interface MOL uses its own hypercall interface to call back into userspace when the guest wants to do something. So let's implement that as an exit reason, specify it with a CAP and only really use it when userspace wants us to. The only user of it so far is MOL. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 19 ++++++++++++++++--- arch/powerpc/include/asm/kvm_book3s.h | 5 +++++ arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kvm/book3s.c | 24 ++++++++++++++++++------ arch/powerpc/kvm/powerpc.c | 12 ++++++++++++ include/linux/kvm.h | 6 ++++++ 6 files changed, 59 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index f9724dc8d079..6f362356e738 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -958,9 +958,9 @@ executed a memory-mapped I/O instruction which could not be satisfied by kvm. The 'data' member contains the written data if 'is_write' is true, and should be filled by application code otherwise. -NOTE: For KVM_EXIT_IO and KVM_EXIT_MMIO, the corresponding operations -are complete (and guest state is consistent) only after userspace has -re-entered the kernel with KVM_RUN. The kernel side will first finish +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding +operations are complete (and guest state is consistent) only after userspace +has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace can re-enter the guest with an unmasked signal pending to complete pending operations. @@ -1015,6 +1015,19 @@ s390 specific. powerpc specific. + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; + +MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch +hypercalls and exit with this exit struct that contains all the guest gprs. + +If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. +Userspace can now handle the hypercall and when it's done modify the gprs as +necessary. Upon guest entry all guest GPRs will then be replaced by the values +in this struct. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bea76371dbe1..7e243b2cac72 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -148,6 +148,11 @@ static inline ulong dsisr(void) extern void kvm_return_point(void); +/* Magic register values loaded into r3 and r4 before the 'sc' assembly + * instruction for the OSI hypercalls */ +#define OSI_SC_MAGIC_R3 0x113724FA +#define OSI_SC_MAGIC_R4 0x77810F9B + #define INS_DCBZ 0x7c0007ec #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0ebda67ad6a8..486f1cafd5f7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -273,6 +273,8 @@ struct kvm_vcpu_arch { u8 mmio_sign_extend; u8 dcr_needed; u8 dcr_is_write; + u8 osi_needed; + u8 osi_enabled; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index de12202fe1c6..7696d0f547e3 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -872,12 +872,24 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: -#ifdef EXIT_DEBUG - printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); -#endif - vcpu->stat.syscall_exits++; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; + // XXX make user settable + if (vcpu->arch.osi_enabled && + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + u64 *gprs = run->osi.gprs; + int i; + + run->exit_reason = KVM_EXIT_OSI; + for (i = 0; i < 32; i++) + gprs[i] = kvmppc_get_gpr(vcpu, i); + vcpu->arch.osi_needed = 1; + r = RESUME_HOST_NV; + + } else { + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } break; case BOOK3S_INTERRUPT_FP_UNAVAIL: case BOOK3S_INTERRUPT_ALTIVEC: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 646bfd256e5d..9a4dd8146d39 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -151,6 +151,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_PPC_OSI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -433,6 +434,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->arch.dcr_is_write) kvmppc_complete_dcr_load(vcpu, run); vcpu->arch.dcr_needed = 0; + } else if (vcpu->arch.osi_needed) { + u64 *gprs = run->osi.gprs; + int i; + + for (i = 0; i < 32; i++) + kvmppc_set_gpr(vcpu, i, gprs[i]); + vcpu->arch.osi_needed = 0; } kvmppc_core_deliver_interrupts(vcpu); @@ -475,6 +483,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; switch (cap->cap) { + case KVM_CAP_PPC_OSI: + r = 0; + vcpu->arch.osi_enabled = true; + break; default: r = -EINVAL; break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ecb68e433558..23ea02253900 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -160,6 +160,7 @@ struct kvm_pit_config { #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 +#define KVM_EXIT_OSI 18 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -259,6 +260,10 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; /* Fix the size of the union. */ char padding[256]; }; @@ -516,6 +521,7 @@ struct kvm_enable_cap { #define KVM_CAP_DEBUGREGS 50 #endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_PPC_OSI 52 #define KVM_CAP_PPC_UNSET_IRQ 53 #define KVM_CAP_ENABLE_CAP 54 -- cgit v1.2.3 From 660c22c425cbe14badfb3b0a0206862577701ab7 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 13 Apr 2010 22:47:24 +0900 Subject: KVM: limit the number of pages per memory slot This patch limits the number of pages per memory slot to make us free from extra care about type issues. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 6 ++++++ virt/kvm/kvm_main.c | 11 ++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 169d07758ee5..55830638aeb1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -105,6 +105,12 @@ struct kvm_vcpu { struct kvm_vcpu_arch arch; }; +/* + * Some of the bitops functions do not support too long bitmaps. + * This number must be determined not to exceed such limits. + */ +#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) + struct kvm_memory_slot { gfn_t base_gfn; unsigned long npages; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55a5d4804499..d6351a34b297 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -557,6 +557,10 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; + r = -EINVAL; + if (npages > KVM_MEM_MAX_NR_PAGES) + goto out; + if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; @@ -1187,13 +1191,10 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - unsigned long *p = memslot->dirty_bitmap + - rel_gfn / BITS_PER_LONG; - int offset = rel_gfn % BITS_PER_LONG; /* avoid RMW */ - if (!generic_test_le_bit(offset, p)) - generic___set_le_bit(offset, p); + if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) + generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); } } -- cgit v1.2.3 From 90d83dc3d49f5101addae962ccc1b4aff66b68d8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 19 Apr 2010 17:41:23 +0800 Subject: KVM: use the correct RCU API for PROVE_RCU=y The RCU/SRCU API have already changed for proving RCU usage. I got the following dmesg when PROVE_RCU=y because we used incorrect API. This patch coverts rcu_deference() to srcu_dereference() or family API. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- arch/x86/kvm/mmu.c:3020 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by qemu-system-x86/8550: #0: (&kvm->slots_lock){+.+.+.}, at: [] kvm_set_memory_region+0x29/0x50 [kvm] #1: (&(&kvm->mmu_lock)->rlock){+.+...}, at: [] kvm_arch_commit_memory_region+0xa6/0xe2 [kvm] stack backtrace: Pid: 8550, comm: qemu-system-x86 Not tainted 2.6.34-rc4-tip-01028-g939eab1 #27 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb3 [] kvm_mmu_calculate_mmu_pages+0x44/0x7d [kvm] [] kvm_arch_commit_memory_region+0xb7/0xe2 [kvm] [] __kvm_set_memory_region+0x636/0x6e2 [kvm] [] kvm_set_memory_region+0x37/0x50 [kvm] [] vmx_set_tss_addr+0x46/0x5a [kvm_intel] [] kvm_arch_vm_ioctl+0x17a/0xcf8 [kvm] [] ? unlock_page+0x27/0x2c [] ? __do_fault+0x3a9/0x3e1 [] kvm_vm_ioctl+0x364/0x38d [kvm] [] ? up_read+0x23/0x3d [] vfs_ioctl+0x32/0xa6 [] do_vfs_ioctl+0x495/0x4db [] ? fget_light+0xc2/0x241 [] ? do_sys_open+0x104/0x116 [] ? retint_swapgs+0xe/0x13 [] sys_ioctl+0x47/0x6a [] system_call_fastpath+0x16/0x1b Signed-off-by: Lai Jiangshan Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/s390/kvm/kvm-s390.h | 2 +- arch/x86/kvm/mmu.c | 7 ++++--- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- arch/x86/kvm/x86.h | 7 +++++++ include/linux/kvm_host.h | 7 +++++++ virt/kvm/iommu.c | 4 ++-- virt/kvm/kvm_main.c | 13 ++++++++----- 9 files changed, 33 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d7bac1f75af0..d5f4e9161201 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1381,7 +1381,7 @@ static void kvm_release_vm_pages(struct kvm *kvm) int i, j; unsigned long base_gfn; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { memslot = &slots->memslots[i]; base_gfn = memslot->base_gfn; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 60f09ab3672c..cfa9d1777457 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -72,7 +72,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) struct kvm_memslots *memslots; idx = srcu_read_lock(&vcpu->kvm->srcu); - memslots = rcu_dereference(vcpu->kvm->memslots); + memslots = kvm_memslots(vcpu->kvm); mem = &memslots->memslots[0]; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a17db1cdcd6..0682a393ad90 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -787,7 +787,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int retval = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -3016,7 +3016,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) unsigned int nr_pages = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); + for (i = 0; i < slots->nmemslots; i++) nr_pages += slots->memslots[i].npages; @@ -3292,7 +3293,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) int i, j, k, idx; idx = srcu_read_lock(&kvm->srcu); - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *m = &slots->memslots[i]; struct kvm_rmap_desc *d; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0b896ac7e4bb..d0a10b5612e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1558,7 +1558,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) struct kvm_memslots *slots; gfn_t base_gfn; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); base_gfn = kvm->memslots->memslots[0].base_gfn + kvm->memslots->memslots[0].npages - 3; return base_gfn << PAGE_SHIFT; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58a96e6a234c..638248c96999 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2497,7 +2497,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) struct kvm_mem_alias *alias; struct kvm_mem_aliases *aliases; - aliases = rcu_dereference(kvm->arch.aliases); + aliases = kvm_aliases(kvm); for (i = 0; i < aliases->naliases; ++i) { alias = &aliases->aliases[i]; @@ -2516,7 +2516,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) struct kvm_mem_alias *alias; struct kvm_mem_aliases *aliases; - aliases = rcu_dereference(kvm->arch.aliases); + aliases = kvm_aliases(kvm); for (i = 0; i < aliases->naliases; ++i) { alias = &aliases->aliases[i]; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_PG); } +static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) +{ + return rcu_dereference_check(kvm->arch.aliases, + srcu_read_lock_held(&kvm->srcu) + || lockdep_is_held(&kvm->slots_lock)); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 55830638aeb1..1ed030bad59e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -250,6 +250,13 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +{ + return rcu_dereference_check(kvm->memslots, + srcu_read_lock_held(&kvm->srcu) + || lockdep_is_held(&kvm->slots_lock)); +} + #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 80fd3ad3b2de..37ca71ebdba8 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) int i, r = 0; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); @@ -217,7 +217,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) int i; struct kvm_memslots *slots; - slots = rcu_dereference(kvm->memslots); + slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; i++) { kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6351a34b297..4901ec5061ba 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -834,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); for (i = 0; i < slots->nmemslots; ++i) { struct kvm_memory_slot *memslot = &slots->memslots[i]; @@ -856,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); gfn = unalias_gfn_instantiation(kvm, gfn); for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { @@ -900,7 +900,7 @@ out: int memslot_id(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memslots *slots = rcu_dereference(kvm->memslots); + struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *memslot = NULL; gfn = unalias_gfn(kvm, gfn); @@ -1994,7 +1994,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) return 0; @@ -2006,8 +2008,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { int i; - struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); + struct kvm_io_bus *bus; + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); for (i = 0; i < bus->dev_count; i++) if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) return 0; -- cgit v1.2.3 From 2a059bf444dd7758ccf48f217cd981570132be85 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Fri, 16 Apr 2010 17:19:48 +0800 Subject: KVM: Get rid of dead function gva_to_page() Nobody use gva_to_page() anymore, get rid of it. Signed-off-by: Gui Jianfeng Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 14 -------------- include/linux/kvm_host.h | 1 - 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7eff794457f3..d2a110e870fa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1618,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) } } -struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) -{ - struct page *page; - - gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); - - if (gpa == UNMAPPED_GVA) - return NULL; - - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - - return page; -} - /* * The function is based on mtrr_type_lookup() in * arch/x86/kernel/cpu/mtrr/generic.c diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1ed030bad59e..ce027d518096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -260,7 +260,6 @@ static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } -struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); extern struct page *bad_page; extern pfn_t bad_pfn; -- cgit v1.2.3 From 0ee75bead83da4791e5cbf659806c54d8ee40f12 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 28 Apr 2010 15:39:01 +0300 Subject: KVM: Let vcpu structure alignment be determined at runtime vmx and svm vcpus have different contents and therefore may have different alignmment requirements. Let each specify its required alignment. Signed-off-by: Avi Kivity --- arch/ia64/kvm/vmm.c | 2 +- arch/powerpc/kvm/44x.c | 2 +- arch/powerpc/kvm/book3s.c | 3 ++- arch/powerpc/kvm/e500.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 3 ++- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 7 ++++--- 9 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 7a62f75778c5..f0b9cac82414 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -51,7 +51,7 @@ static int __init kvm_vmm_init(void) vmm_fpswa_interface = fpswa_interface; /*Register vmm data to kvm side*/ - return kvm_init(&vmm_info, 1024, THIS_MODULE); + return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); } static void __exit kvm_vmm_exit(void) diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 689a57c2ac80..73c0a3f64ed1 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -147,7 +147,7 @@ static int __init kvmppc_44x_init(void) if (r) return r; - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); } static void __exit kvmppc_44x_exit(void) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 28e785fb2cab..11f226ff4468 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1385,7 +1385,8 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) static int kvmppc_book3s_init(void) { - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, + THIS_MODULE); } static void kvmppc_book3s_exit(void) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 669a5c5fc7d7..bc2b4004eb26 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -161,7 +161,7 @@ static int __init kvmppc_e500_init(void) flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); } static void __init kvmppc_e500_exit(void) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ee7c713686ce..8093e6f47f49 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -752,7 +752,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) static int __init kvm_s390_init(void) { int ret; - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (ret) return ret; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 889f66022e57..2511664ff671 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3319,7 +3319,7 @@ static struct kvm_x86_ops svm_x86_ops = { static int __init svm_init(void) { return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), - THIS_MODULE); + __alignof__(struct vcpu_svm), THIS_MODULE); } static void __exit svm_exit(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 875b785228f6..2e8729678600 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4245,7 +4245,8 @@ static int __init vmx_init(void) set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) goto out3; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce027d518096..7cb116afa1cd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -243,7 +243,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -int kvm_init(void *opaque, unsigned int vcpu_size, +int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ab1a77941ef..f032806a212f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2178,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_vcpu_put(vcpu); } -int kvm_init(void *opaque, unsigned int vcpu_size, +int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module) { int r; @@ -2228,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_4; /* A kmem cache lets us meet the alignment requirements of fx_save. */ - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, - __alignof__(struct kvm_vcpu), + if (!vcpu_align) + vcpu_align = __alignof__(struct kvm_vcpu); + kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, 0, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; -- cgit v1.2.3 From cafd66595d92591e4bd25c3904e004fc6f897e2d Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Thu, 29 Apr 2010 12:09:01 -0400 Subject: KVM: VMX: enable VMXON check with SMX enabled (Intel TXT) Per document, for feature control MSR: Bit 1 enables VMXON in SMX operation. If the bit is clear, execution of VMXON in SMX operation causes a general-protection exception. Bit 2 enables VMXON outside SMX operation. If the bit is clear, execution of VMXON outside SMX operation causes a general-protection exception. This patch is to enable this kind of check with SMX for VMXON in KVM. Signed-off-by: Shane Wang Signed-off-by: Avi Kivity --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/kernel/tboot.c | 1 + arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++----------- include/linux/tboot.h | 1 + 4 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bc473acfa7f9..f9324851eba0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -202,8 +202,9 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_IA32_FEATURE_CONTROL 0x0000003a -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) +#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 86c9f91b48ae..46b827778d16 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -46,6 +46,7 @@ /* Global pointer to shared data; NULL means no measured launch. */ struct tboot *tboot __read_mostly; +EXPORT_SYMBOL(tboot); /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ #define AP_WAIT_TIMEOUT 1 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c4f3955c64e0..d2a47aefdee7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -1272,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) u64 msr; rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - return (msr & (FEATURE_CONTROL_LOCKED | - FEATURE_CONTROL_VMXON_ENABLED)) - == FEATURE_CONTROL_LOCKED; + if (msr & FEATURE_CONTROL_LOCKED) { + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) + && tboot_enabled()) + return 1; + if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) + && !tboot_enabled()) + return 1; + } + + return 0; /* locked but not enabled */ } @@ -1282,21 +1290,23 @@ static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old; + u64 old, test_bits; if (read_cr4() & X86_CR4_VMXE) return -EBUSY; INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - if ((old & (FEATURE_CONTROL_LOCKED | - FEATURE_CONTROL_VMXON_ENABLED)) - != (FEATURE_CONTROL_LOCKED | - FEATURE_CONTROL_VMXON_ENABLED)) + + test_bits = FEATURE_CONTROL_LOCKED; + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + if (tboot_enabled()) + test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; + + if ((old & test_bits) != test_bits) { /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | - FEATURE_CONTROL_LOCKED | - FEATURE_CONTROL_VMXON_ENABLED); + wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); + } write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) diff --git a/include/linux/tboot.h b/include/linux/tboot.h index bf2a0c748878..1dba6ee55203 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h @@ -150,6 +150,7 @@ extern int tboot_force_iommu(void); #else +#define tboot_enabled() 0 #define tboot_probe() do { } while (0) #define tboot_shutdown(shutdown_type) do { } while (0) #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ -- cgit v1.2.3