Re: [PATCH 0/3] KVM: Introduce VCPU-wide notion of guest-mode V2

Previous thread: Webmail Biztonsági figyelmeztetés by irodrigu on Monday, November 29, 2010 - 8:19 am. (1 message)

Next thread: [PATCH] input: misc: add lps001wp_prs driver by mems applications on Monday, November 29, 2010 - 9:43 am. (9 messages)
From: Joerg Roedel
Date: Monday, November 29, 2010 - 8:38 am

Hi Avi, Hi Marcelo,

this patch-set introduces a generic notion of guest-mode for VCPUs in
KVM. This is already useful as seen in patch 3/3. Nested-VMX also has a
guest-mode, so it will make sense for this code too.

Regards,

	Joerg

As usual:

 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/kvm_cache_regs.h   |   15 +++++++++++++
 arch/x86/kvm/svm.c              |   44 ++++++++++++++++++--------------------
 arch/x86/kvm/x86.c              |   14 ++++++++---

Joerg Roedel (3):
      KVM: X86: Introduce generic guest-mode representation
      KVM: SVM: Make Use of the generic guest-mode functions
      KVM: X86: Don't report L2 emulation failures to user-space

 4 files changed, 47 insertions(+), 27 deletions(-)

--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 8:38 am

This patch introduces a generic representation of guest-mode
fpr a vcpu. This currently only exists in the SVM code.
Having this representation generic will help making the
non-svm code aware of nesting when this is necessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/kvm_cache_regs.h   |   15 +++++++++++++++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 54e42c8..d2a66be 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -784,6 +784,7 @@ enum {
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
+#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 975bb45..7e7c52d 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,4 +84,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
 }
 
+static inline void kvm_vcpu_enter_gm(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hflags |= HF_GUEST_MASK;
+}
+
+static inline void kvm_vcpu_leave_gm(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hflags &= ~HF_GUEST_MASK;
+}
+
+static inline bool kvm_vcpu_is_gm(struct kvm_vcpu *vcpu)
+{
+	return !!(vcpu->arch.hflags & HF_GUEST_MASK);
+}
+
 #endif
-- 
1.7.1


--

From: Avi Kivity
Date: Monday, November 29, 2010 - 9:10 am

I don't like the name much - the "meat" is just two letters.  Please 
spell it out.  I guess we could do is_guest_mode() like we do 

!! unneeded with bool.

Note we need to live migrate this bit, but that's outside the scope of 
this patchset.

-- 
error compiling committee.c: too many arguments to function

--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 8:38 am

This patch prevents that emulation failures which result
from emulating an instruction for an L2-Guest results in
being reported to userspace.
Without this patch a malicious L2-Guest would be able to
kill the L1 by triggering a race-condition between an vmexit
and the instruction emulator.
With this patch the L2 will most likely only kill itself in
this situation.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kvm/x86.c |   14 ++++++++++----
 1 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 410d2d1..78329dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4320,13 +4320,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 {
+	int r = EMULATE_DONE;
+
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
-	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-	vcpu->run->internal.ndata = 0;
+	if (!kvm_vcpu_is_gm(vcpu)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		r = EMULATE_FAIL;
+	}
 	kvm_queue_exception(vcpu, UD_VECTOR);
-	return EMULATE_FAIL;
+
+	return r;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
-- 
1.7.1


--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 8:38 am

This patch replaces the is_nested logic in the SVM module
with the generic notion of guest-mode.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kvm/svm.c |   44 +++++++++++++++++++++-----------------------
 1 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2fd2f4d..3376fca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,11 +192,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
-static inline bool is_nested(struct vcpu_svm *svm)
-{
-	return svm->nested.vmcb;
-}
-
 static inline void enable_gif(struct vcpu_svm *svm)
 {
 	svm->vcpu.arch.hflags |= HF_GIF_MASK;
@@ -727,7 +722,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 g_tsc_offset = 0;
 
-	if (is_nested(svm)) {
+	if (kvm_vcpu_is_gm(vcpu)) {
 		g_tsc_offset = svm->vmcb->control.tsc_offset -
 			       svm->nested.hsave->control.tsc_offset;
 		svm->nested.hsave->control.tsc_offset = offset;
@@ -741,7 +736,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb->control.tsc_offset += adjustment;
-	if (is_nested(svm))
+	if (kvm_vcpu_is_gm(vcpu))
 		svm->nested.hsave->control.tsc_offset += adjustment;
 }
 
@@ -1209,7 +1204,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
 	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
 		vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
 		vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
-		if (is_nested(svm)) {
+		if (kvm_vcpu_is_gm(&svm->vcpu)) {
 			struct vmcb *hsave = svm->nested.hsave;
 
 			hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
@@ -1220,7 +1215,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
 	} else {
 		svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
 		svm->vmcb->control.intercept_cr_write |= ...
From: Avi Kivity
Date: Monday, November 29, 2010 - 9:10 am

Looks good, apart from the trivial comments on patch 1.

-- 
error compiling committee.c: too many arguments to function

--

From: Roedel, Joerg
Date: Monday, November 29, 2010 - 9:25 am

Okay, I do a re-spin and fix this. Thanks for your comments.

	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 9:51 am

This patch introduces a generic representation of guest-mode
fpr a vcpu. This currently only exists in the SVM code.
Having this representation generic will help making the
non-svm code aware of nesting when this is necessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/kvm_cache_regs.h   |   15 +++++++++++++++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 54e42c8..d2a66be 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -784,6 +784,7 @@ enum {
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
+#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 975bb45..95ac3af 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,4 +84,19 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
 }
 
+static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hflags |= HF_GUEST_MASK;
+}
+
+static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hflags &= ~HF_GUEST_MASK;
+}
+
+static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hflags & HF_GUEST_MASK;
+}
+
 #endif
-- 
1.7.1


--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 9:51 am

This patch prevents that emulation failures which result
from emulating an instruction for an L2-Guest results in
being reported to userspace.
Without this patch a malicious L2-Guest would be able to
kill the L1 by triggering a race-condition between an vmexit
and the instruction emulator.
With this patch the L2 will most likely only kill itself in
this situation.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kvm/x86.c |   14 ++++++++++----
 1 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 410d2d1..4337a8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4320,13 +4320,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 {
+	int r = EMULATE_DONE;
+
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
-	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-	vcpu->run->internal.ndata = 0;
+	if (!is_guest_mode(vcpu)) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		r = EMULATE_FAIL;
+	}
 	kvm_queue_exception(vcpu, UD_VECTOR);
-	return EMULATE_FAIL;
+
+	return r;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
-- 
1.7.1


--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 9:51 am

Hi Avi, Hi Marcelo,

here is the re-spin I promised. The change to V1 are essentially the
renames:

	kvm_vcpu_enter_gm -> enter_guest_mode
	kvm_vcpu_leave_gm -> leave_guest_mode
	kvm_vcpu_is_gm    -> is_guest_mode

No other changes are in this patch-set compared to V1.

Regards,
	Joerg

 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/kvm_cache_regs.h   |   15 +++++++++++++
 arch/x86/kvm/svm.c              |   44 ++++++++++++++++++--------------------
 arch/x86/kvm/x86.c              |   14 ++++++++---
 4 files changed, 47 insertions(+), 27 deletions(-)

Joerg Roedel (3):
      KVM: X86: Introduce generic guest-mode representation
      KVM: SVM: Make Use of the generic guest-mode functions
      KVM: X86: Don't report L2 emulation failures to user-space


--

From: Nadav Har'El
Date: Wednesday, December 1, 2010 - 1:01 am

I like this concept, and will be happy to change the nested VMX code to use
it as well.

One small thing: After the name change, it might not be obvious on first
sight that these functions refer to the state of the vcpu, not the state
of the actual CPU (which, if you think about it, is never in guest mode while
KVM code is running ;-)). I think that a short comment before the definition
of these functions might be useful - perhaps saying that they pertain to a
hypervisor running in the vcpu (i.e., nested virtualization).

Nadav.

-- 
Nadav Har'El                        |   Wednesday, Dec  1 2010, 24 Kislev 5771
nyh@math.technion.ac.il             |-----------------------------------------
Phone +972-523-790466, ICQ 13349191 |How do you get holy water? Boil the hell
http://nadav.harel.org.il           |out of it.
--

From: Roedel, Joerg
Date: Wednesday, December 1, 2010 - 3:03 am

Yes, right. Thats a good thing. I sent a follow-on patch adding the
comments.

Btw, another idea which came up recently was to concentrate the actuall
vmexit emulation at a single point. Every code place which does the exit
directly today will be changed to only set a request-bit and the real
exit is then done later. Your code might already do this, I havn't
checked. In fact the idea is from the neste-VMX patchset for Xen :)
This would fit very well in the generic code because it already has
request-bit infrastructure. What do you think, can nested VMX also make
use of that too?

	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

--

From: Nadav Har'El
Date: Wednesday, December 1, 2010 - 4:38 am

In my current patches, there is single function nested_vmx_vmexit() which
emulates the exit (exits from L2 to L1), but it is called in several places,
the most significant are of course in vmx_handle_exit (when L1 asked an exit
on the given event), and vmx_interrupt_allowed (when we inject an interrupt
and L1 asked to exit on interrupts). This area of my code definitely needs

Can you please say a few words why you'd want to move this nested-exit
request bit to x86.c? Do you want to move some of the exit logic to x86.c -
e.g., for the injection logic?

Nadav.

-- 
Nadav Har'El                        |   Wednesday, Dec  1 2010, 24 Kislev 5771
nyh@math.technion.ac.il             |-----------------------------------------
Phone +972-523-790466, ICQ 13349191 |Fame: when your name is in everything but
http://nadav.harel.org.il           |the phone book.
--

From: Roedel, Joerg
Date: Wednesday, December 1, 2010 - 6:20 am

I don't want to move the actual exit-code itself into generic code. This
code is different between svm and vmx. I think we could implement a
call-back in kvm_x86_ops which is called when a vmexit is requested.
The benefit is that we have a single and well-defined place where we
emulate a vmexit.
SVM already as a similar mechanism internally because nested_svm_vmexit
may sleep and can't be called from certain places. Another reason is
that emulating a vmexit at a wrong plase may have side-effects (for
example when called from within the instruction emulator).
With a generic request-bit I can remove the SVM internal implementation
and nested vmx could use it too. I am certain you will need something

Thats another and probably more complex topic. I need a better
understanding of (nested-)vmx before we discuss how this can be done.
But a vmexit-callback may be helpful there as well.

	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632

--

From: Marcelo Tosatti
Date: Wednesday, December 1, 2010 - 7:42 pm

Applied, thanks.

--

From: Joerg Roedel
Date: Monday, November 29, 2010 - 9:51 am

This patch replaces the is_nested logic in the SVM module
with the generic notion of guest-mode.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kvm/svm.c |   44 +++++++++++++++++++++-----------------------
 1 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2fd2f4d..bff391e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,11 +192,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
-static inline bool is_nested(struct vcpu_svm *svm)
-{
-	return svm->nested.vmcb;
-}
-
 static inline void enable_gif(struct vcpu_svm *svm)
 {
 	svm->vcpu.arch.hflags |= HF_GIF_MASK;
@@ -727,7 +722,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 g_tsc_offset = 0;
 
-	if (is_nested(svm)) {
+	if (is_guest_mode(vcpu)) {
 		g_tsc_offset = svm->vmcb->control.tsc_offset -
 			       svm->nested.hsave->control.tsc_offset;
 		svm->nested.hsave->control.tsc_offset = offset;
@@ -741,7 +736,7 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb->control.tsc_offset += adjustment;
-	if (is_nested(svm))
+	if (is_guest_mode(vcpu))
 		svm->nested.hsave->control.tsc_offset += adjustment;
 }
 
@@ -1209,7 +1204,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
 	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
 		vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
 		vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
-		if (is_nested(svm)) {
+		if (is_guest_mode(&svm->vcpu)) {
 			struct vmcb *hsave = svm->nested.hsave;
 
 			hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
@@ -1220,7 +1215,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
 	} else {
 		svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
 		svm->vmcb->control.intercept_cr_write |= ...
Previous thread: Webmail Biztonsági figyelmeztetés by irodrigu on Monday, November 29, 2010 - 8:19 am. (1 message)

Next thread: [PATCH] input: misc: add lps001wp_prs driver by mems applications on Monday, November 29, 2010 - 9:43 am. (9 messages)