Hi Avi, Hi Marcelo,
this patch-set introduces a generic notion of guest-mode for VCPUs in
KVM. This is already useful as seen in patch 3/3. Nested-VMX also has a
guest-mode, so it will make sense for this code too.
Regards,
Joerg
As usual:
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 15 +++++++++++++
arch/x86/kvm/svm.c | 44 ++++++++++++++++++--------------------
arch/x86/kvm/x86.c | 14 ++++++++---
Joerg Roedel (3):
KVM: X86: Introduce generic guest-mode representation
KVM: SVM: Make Use of the generic guest-mode functions
KVM: X86: Don't report L2 emulation failures to user-space
4 files changed, 47 insertions(+), 27 deletions(-)
--
This patch introduces a generic representation of guest-mode
fpr a vcpu. This currently only exists in the SVM code.
Having this representation generic will help making the
non-svm code aware of nesting when this is necessary.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 15 +++++++++++++++
2 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 54e42c8..d2a66be 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -784,6 +784,7 @@ enum {
#define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
+#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
/*
* Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 975bb45..7e7c52d 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,4 +84,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}
+static inline void kvm_vcpu_enter_gm(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hflags |= HF_GUEST_MASK;
+}
+
+static inline void kvm_vcpu_leave_gm(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hflags &= ~HF_GUEST_MASK;
+}
+
+static inline bool kvm_vcpu_is_gm(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.hflags & HF_GUEST_MASK);
+}
+
#endif
--
1.7.1
--
I don't like the name much - the "meat" is just two letters. Please spell it out. I guess we could do is_guest_mode() like we do !! unneeded with bool. Note we need to live migrate this bit, but that's outside the scope of this patchset. -- error compiling committee.c: too many arguments to function --
This patch prevents that emulation failures which result
from emulating an instruction for an L2-Guest results in
being reported to userspace.
Without this patch a malicious L2-Guest would be able to
kill the L1 by triggering a race-condition between an vmexit
and the instruction emulator.
With this patch the L2 will most likely only kill itself in
this situation.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/x86.c | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 410d2d1..78329dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4320,13 +4320,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
{
+ int r = EMULATE_DONE;
+
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ if (!kvm_vcpu_is_gm(vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ r = EMULATE_FAIL;
+ }
kvm_queue_exception(vcpu, UD_VECTOR);
- return EMULATE_FAIL;
+
+ return r;
}
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
--
1.7.1
--
This patch replaces the is_nested logic in the SVM module
with the generic notion of guest-mode.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/svm.c | 44 +++++++++++++++++++++-----------------------
1 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2fd2f4d..3376fca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,11 +192,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-static inline bool is_nested(struct vcpu_svm *svm)
-{
- return svm->nested.vmcb;
-}
-
static inline void enable_gif(struct vcpu_svm *svm)
{
svm->vcpu.arch.hflags |= HF_GIF_MASK;
@@ -727,7 +722,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
- if (is_nested(svm)) {
+ if (kvm_vcpu_is_gm(vcpu)) {
g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -741,7 +736,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.tsc_offset += adjustment;
- if (is_nested(svm))
+ if (kvm_vcpu_is_gm(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
}
@@ -1209,7 +1204,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
- if (is_nested(svm)) {
+ if (kvm_vcpu_is_gm(&svm->vcpu)) {
struct vmcb *hsave = svm->nested.hsave;
hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
@@ -1220,7 +1215,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
} else {
svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
svm->vmcb->control.intercept_cr_write |= ...Looks good, apart from the trivial comments on patch 1. -- error compiling committee.c: too many arguments to function --
Okay, I do a re-spin and fix this. Thanks for your comments. Joerg -- AMD Operating System Research Center Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach General Managers: Alberto Bozzo, Andrew Bowd Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632 --
This patch introduces a generic representation of guest-mode
fpr a vcpu. This currently only exists in the SVM code.
Having this representation generic will help making the
non-svm code aware of nesting when this is necessary.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 15 +++++++++++++++
2 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 54e42c8..d2a66be 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -784,6 +784,7 @@ enum {
#define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
+#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
/*
* Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 975bb45..95ac3af 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,4 +84,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}
+static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hflags |= HF_GUEST_MASK;
+}
+
+static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hflags &= ~HF_GUEST_MASK;
+}
+
+static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.hflags & HF_GUEST_MASK;
+}
+
#endif
--
1.7.1
--
This patch prevents that emulation failures which result
from emulating an instruction for an L2-Guest results in
being reported to userspace.
Without this patch a malicious L2-Guest would be able to
kill the L1 by triggering a race-condition between an vmexit
and the instruction emulator.
With this patch the L2 will most likely only kill itself in
this situation.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/x86.c | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 410d2d1..4337a8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4320,13 +4320,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
static int handle_emulation_failure(struct kvm_vcpu *vcpu)
{
+ int r = EMULATE_DONE;
+
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ if (!is_guest_mode(vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ r = EMULATE_FAIL;
+ }
kvm_queue_exception(vcpu, UD_VECTOR);
- return EMULATE_FAIL;
+
+ return r;
}
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
--
1.7.1
--
Hi Avi, Hi Marcelo,
here is the re-spin I promised. The change to V1 are essentially the
renames:
kvm_vcpu_enter_gm -> enter_guest_mode
kvm_vcpu_leave_gm -> leave_guest_mode
kvm_vcpu_is_gm -> is_guest_mode
No other changes are in this patch-set compared to V1.
Regards,
Joerg
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/kvm_cache_regs.h | 15 +++++++++++++
arch/x86/kvm/svm.c | 44 ++++++++++++++++++--------------------
arch/x86/kvm/x86.c | 14 ++++++++---
4 files changed, 47 insertions(+), 27 deletions(-)
Joerg Roedel (3):
KVM: X86: Introduce generic guest-mode representation
KVM: SVM: Make Use of the generic guest-mode functions
KVM: X86: Don't report L2 emulation failures to user-space
--
I like this concept, and will be happy to change the nested VMX code to use it as well. One small thing: After the name change, it might not be obvious on first sight that these functions refer to the state of the vcpu, not the state of the actual CPU (which, if you think about it, is never in guest mode while KVM code is running ;-)). I think that a short comment before the definition of these functions might be useful - perhaps saying that they pertain to a hypervisor running in the vcpu (i.e., nested virtualization). Nadav. -- Nadav Har'El | Wednesday, Dec 1 2010, 24 Kislev 5771 nyh@math.technion.ac.il |----------------------------------------- Phone +972-523-790466, ICQ 13349191 |How do you get holy water? Boil the hell http://nadav.harel.org.il |out of it. --
Yes, right. Thats a good thing. I sent a follow-on patch adding the comments. Btw, another idea which came up recently was to concentrate the actuall vmexit emulation at a single point. Every code place which does the exit directly today will be changed to only set a request-bit and the real exit is then done later. Your code might already do this, I havn't checked. In fact the idea is from the neste-VMX patchset for Xen :) This would fit very well in the generic code because it already has request-bit infrastructure. What do you think, can nested VMX also make use of that too? Joerg -- AMD Operating System Research Center Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach General Managers: Alberto Bozzo, Andrew Bowd Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632 --
In my current patches, there is single function nested_vmx_vmexit() which emulates the exit (exits from L2 to L1), but it is called in several places, the most significant are of course in vmx_handle_exit (when L1 asked an exit on the given event), and vmx_interrupt_allowed (when we inject an interrupt and L1 asked to exit on interrupts). This area of my code definitely needs Can you please say a few words why you'd want to move this nested-exit request bit to x86.c? Do you want to move some of the exit logic to x86.c - e.g., for the injection logic? Nadav. -- Nadav Har'El | Wednesday, Dec 1 2010, 24 Kislev 5771 nyh@math.technion.ac.il |----------------------------------------- Phone +972-523-790466, ICQ 13349191 |Fame: when your name is in everything but http://nadav.harel.org.il |the phone book. --
I don't want to move the actual exit-code itself into generic code. This code is different between svm and vmx. I think we could implement a call-back in kvm_x86_ops which is called when a vmexit is requested. The benefit is that we have a single and well-defined place where we emulate a vmexit. SVM already as a similar mechanism internally because nested_svm_vmexit may sleep and can't be called from certain places. Another reason is that emulating a vmexit at a wrong plase may have side-effects (for example when called from within the instruction emulator). With a generic request-bit I can remove the SVM internal implementation and nested vmx could use it too. I am certain you will need something Thats another and probably more complex topic. I need a better understanding of (nested-)vmx before we discuss how this can be done. But a vmexit-callback may be helpful there as well. Joerg -- AMD Operating System Research Center Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach General Managers: Alberto Bozzo, Andrew Bowd Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632 --
This patch replaces the is_nested logic in the SVM module
with the generic notion of guest-mode.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/svm.c | 44 +++++++++++++++++++++-----------------------
1 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2fd2f4d..bff391e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,11 +192,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-static inline bool is_nested(struct vcpu_svm *svm)
-{
- return svm->nested.vmcb;
-}
-
static inline void enable_gif(struct vcpu_svm *svm)
{
svm->vcpu.arch.hflags |= HF_GIF_MASK;
@@ -727,7 +722,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
- if (is_nested(svm)) {
+ if (is_guest_mode(vcpu)) {
g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -741,7 +736,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.tsc_offset += adjustment;
- if (is_nested(svm))
+ if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
}
@@ -1209,7 +1204,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
- if (is_nested(svm)) {
+ if (is_guest_mode(&svm->vcpu)) {
struct vmcb *hsave = svm->nested.hsave;
hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
@@ -1220,7 +1215,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
} else {
svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
svm->vmcb->control.intercept_cr_write |= ...