Hi,
this is the second and reworked version of my nested paging for nested svm
patchset. Changes to the previous version include:
* Renamed mmu.tdp_enabled to mmu.direct_map
* Introduced two helper functions to read physical memory
locations of the current running guest level
* Fixed a couple of bugs were KVM needs to read l2
physical memory and others
This patchset is tested with KVM and Windows 7 XPmode. I also tested in KVM
with Linux and Windows 7 as l2 guests. I also tested different pagesize
combinations and did a stress tests for a couple of hours. All these tests
showed no introduced regressions.
Please review this second version of the patch-set. I appreciate your feedback.
Thanks,
Joerg
Diffstat:
arch/x86/include/asm/kvm_host.h | 26 ++++++
arch/x86/kvm/emulate.c | 22 +++---
arch/x86/kvm/mmu.c | 160 ++++++++++++++++++++++++++++++--------
arch/x86/kvm/mmu.h | 2 +
arch/x86/kvm/paging_tmpl.h | 97 +++++++++++++++++++-----
arch/x86/kvm/svm.c | 120 ++++++++++++++++++++++++-----
arch/x86/kvm/vmx.c | 3 +
arch/x86/kvm/x86.c | 77 +++++++++++++++++--
include/linux/kvm_host.h | 5 +
9 files changed, 423 insertions(+), 89 deletions(-)
Shortlog:
Joerg Roedel (22):
KVM: MMU: Check for root_level instead of long mode
KVM: MMU: Make tdp_enabled a mmu-context parameter
KVM: MMU: Make set_cr3 a function pointer in kvm_mmu
KVM: X86: Introduce a tdp_set_cr3 function
KVM: MMU: Introduce get_cr3 function pointer
KVM: MMU: Introduce inject_page_fault function pointer
KVM: SVM: Implement MMU helper functions for Nested Nested Paging
KVM: MMU: Change init_kvm_softmmu to take a context as parameter
KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu
KVM: MMU: Introduce generic walk_addr function
KVM: MMU: Add infrastructure for two-level page walker
KVM: MMU: ...The walk_addr function checks for !is_long_mode in its 64
bit version. But what is meant here is a check for pae
paging. Change the condition to really check for pae paging
so that it also works with nested nested paging.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/paging_tmpl.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d0cc07e..b07cec6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,7 +128,7 @@ walk:
walker->level = vcpu->arch.mmu.root_level;
pte = vcpu->arch.cr3;
#if PTTYPE == 64
- if (!is_long_mode(vcpu)) {
+ if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte))
@@ -194,7 +194,7 @@ walk:
(PTTYPE == 64 || is_pse(vcpu))) ||
((walker->level == PT_PDPE_LEVEL) &&
is_large_pte(pte) &&
- is_long_mode(vcpu))) {
+ vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL)) {
int lvl = walker->level;
walker->gfn = gpte_to_gfn_lvl(pte, lvl);
--
1.7.0.4
--
This patch implements the reporting of the nested paging feature support to userspace. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> --- arch/x86/kvm/svm.c | 7 ++++++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 266b1d4..6d6b300 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3233,7 +3233,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper ASID emulation to nested SVM */ entry->ecx = 0; /* Reserved */ - entry->edx = 0; /* Do not support any additional features */ + entry->edx = 0; /* Per default do not support any + additional features */ + + /* Support NPT for the guest if enabled */ + if (npt_enabled) + entry->edx = SVM_FEATURE_NPT; break; } -- 1.7.0.4 --
This patch introduces the init_kvm_nested_mmu() function
which is used to re-initialize the nested mmu when the l2
guest changes its paging mode.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/mmu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 54 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8bd40b5..af89e71 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2136,6 +2136,25 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error)
return gpa;
}
+static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error)
+{
+ gpa_t t_gpa;
+ u32 access;
+ u32 err;
+
+ BUG_ON(!vcpu->arch.mmu.nested);
+
+ /* NPT walks are treated as user writes */
+ access = PFERR_WRITE_MASK | PFERR_USER_MASK;
+ t_gpa = vcpu->arch.nested_mmu.gva_to_gpa(vcpu, gpa, access, &err);
+ if (t_gpa == UNMAPPED_GVA) {
+ vcpu->arch.fault_address = gpa;
+ vcpu->arch.fault_error_code = err | PFERR_NESTED_MASK;
+ }
+
+ return t_gpa;
+}
+
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, u32 *error)
{
@@ -2465,11 +2484,45 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
return r;
}
+static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
+ struct kvm_mmu *h_context = &vcpu->arch.mmu;
+
+ g_context->get_cr3 = get_cr3;
+ g_context->translate_gpa = translate_nested_gpa;
+ g_context->inject_page_fault = kvm_inject_page_fault;
+
+ /*
+ * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The
+ * translation of l2_gpa to l1_gpa addresses is done using the
+ * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa
+ * functions between mmu and nested_mmu are swapped.
+ */
+ if (!is_paging(vcpu)) {
+ g_context->root_level = 0;
+ h_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
+ } else if (is_long_mode(vcpu)) ...This patch introduces two fields in vcpu_arch for x86:
* fault_address
* fault_error_code
This will be used to correctly propagate page faults back
into the guest when we could have either an ordinary page
fault or a nested page fault. In the case of a nested page
fault the fault-address is different from the original
address that should be walked. So we need to keep track
about the real fault-address.
We could also remove the current path of the error_code to
the fault. But this change is too invasive and outside the
scope of this patch set. It will be changed and tested
seperatly.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/paging_tmpl.h | 4 ++++
2 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d9dfc8c..8426870 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -298,6 +298,9 @@ struct kvm_vcpu_arch {
/* Used for two dimensional paging emulation */
struct kvm_mmu nested_mmu;
+ unsigned long fault_address;
+ int fault_error_code;
+
/* only needed in kvm_pv_mmu_op() path, but it's hot so
* put it here to avoid allocation */
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 8b27026..df92ce2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -266,6 +266,10 @@ err:
walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
+
+ vcpu->arch.fault_address = addr;
+ vcpu->arch.fault_error_code = walker->error_code;
+
trace_kvm_mmu_walker_error(walker->error_code);
return 0;
}
--
1.7.0.4
--
Probably a problem on i386. How does npt handle faults when the guest is using pae paging and the host (in our case the guest...) isn't? I see it uses exit_info_2 for the address, which is a u64. So we probably need to upgrade gva_t to a u64. Please send this as a unsigned. Maybe put the two in a struct, easier to pass around. -- error compiling committee.c: too many arguments to function --
This shouldn't be an issue. If we run on 32bit host with nested paging the guest can't have more than 4gb of addressable memory because of the Yeah, makes it easier to read. Joerg --
But the nested guest can use pae paging and generate a #NPF with exit_info_2 > 4GB. So we need to keep the full fault address; if we truncate, the guest might actually resolve the fault and let the nested guest continue. -- error compiling committee.c: too many arguments to function --
This could only be a malicious guest because it can't have memory above 4gb. But a guest could certainly setup its page tables to point there, thats true. So I change it to u64. Joerg --
It doesn't need to be malicious, for example it could set up an mmio PCI BAR outside the 4GB space. -- error compiling committee.c: too many arguments to function --
Are there _any_ regular tests of KVM on i386 hosts? For me this is terribly broken (also after I fixed the issue which gave me a VMEXIT_INVALID at the first vmrun). Joerg --
No, apart from the poor users. I'll try to set something up using nsvm. -- error compiling committee.c: too many arguments to function --
Ok. I will post an initial fix for the VMEXIT_INVALID bug soon. Apart from that I get a lockdep warning when I try to start a guest. The guest actually boots if it is single-vcpu. SMP guests don't even boot through the BIOS for me. Joerg --
Strange. i386 vs x86_64 shouldn't have that much effect! -- error compiling committee.c: too many arguments to function --
This is the lockdep warning I get when I start booting a Linux kernel.
It is with the nested-npt patchset but the warning occurs without it too
(slightly different backtraces then).
[60390.953424] =======================================================
[60390.954324] [ INFO: possible circular locking dependency detected ]
[60390.954324] 2.6.34-rc5 #7
[60390.954324] -------------------------------------------------------
[60390.954324] qemu-system-x86/2506 is trying to acquire lock:
[60390.954324] (&mm->mmap_sem){++++++}, at: [<c10ab0f4>] might_fault+0x4c/0x86
[60390.954324]
[60390.954324] but task is already holding lock:
[60390.954324] (&(&kvm->mmu_lock)->rlock){+.+...}, at: [<f8ec1b50>] spin_lock+0xd/0xf [kvm]
[60390.954324]
[60390.954324] which lock already depends on the new lock.
[60390.954324]
[60390.954324]
[60390.954324] the existing dependency chain (in reverse order) is:
[60390.954324]
[60390.954324] -> #1 (&(&kvm->mmu_lock)->rlock){+.+...}:
[60390.954324] [<c10575ad>] __lock_acquire+0x9fa/0xb6c
[60390.954324] [<c10577b8>] lock_acquire+0x99/0xb8
[60390.954324] [<c15afa2b>] _raw_spin_lock+0x20/0x2f
[60390.954324] [<f8eafe19>] spin_lock+0xd/0xf [kvm]
[60390.954324] [<f8eb104e>] kvm_mmu_notifier_invalidate_range_start+0x2f/0x71 [kvm]
[60390.954324] [<c10bc994>] __mmu_notifier_invalidate_range_start+0x31/0x57
[60390.954324] [<c10b1de3>] mprotect_fixup+0x153/0x3d5
[60390.954324] [<c10b21ca>] sys_mprotect+0x165/0x1db
[60390.954324] [<c10028cc>] sysenter_do_call+0x12/0x32
[60390.954324]
[60390.954324] -> #0 (&mm->mmap_sem){++++++}:
[60390.954324] [<c10574af>] __lock_acquire+0x8fc/0xb6c
[60390.954324] [<c10577b8>] lock_acquire+0x99/0xb8
[60390.954324] [<c10ab111>] might_fault+0x69/0x86
[60390.954324] [<c11d5987>] _copy_from_user+0x36/0x119
[60390.954324] [<f8eafcd9>] copy_from_user+0xd/0xf [kvm]
[60390.954324] [<f8eb0ac0>] kvm_read_guest_page+0x24/0x33 ...Unrelated. This can take the lock and free it. It only shows up because we do memory ops inside the mmu_lock, which is deeply forbidden (anything which touches user memory, including kmalloc(), can trigger Just a silly bug. kvm_pdptr_read() can cause a guest memory read on Ever increasing complexity... -- error compiling committee.c: too many arguments to function --
I guess this was not reported because most svm machines have npt, and this requires npt=0 to trigger. Nonpae paging disables npt, so you were hit. Interestingly, nsvm makes it more likely to appear, since npt on i386+pae will need the pdptrs. -- error compiling committee.c: too many arguments to function --
Hmm, actually it happened on 32 bit with npt enabled. I think this can trigger when mmu_alloc_roots is called for an pae guest because it accidentially tries read the root_gfn from the guest before it figures out that it runs with tdp and omits the gfn read from the guest. I need to touch this for nested-npt and will look into a way improving this. Joerg --
Yes. I had a patchset which moved the 'direct' calculation before, and skipped root_gfn if it was direct, but it was broken. If you like I can resurrect it, but it may interfere with your work. -- error compiling committee.c: too many arguments to function --
This is necessary to implement Nested Nested Paging. As a
side effect this allows some cleanups in the SVM nested
paging code.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 4 +++-
2 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44eda9b..04834b0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -233,6 +233,7 @@ struct kvm_pio_request {
*/
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
+ void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 618d25f..6eedcdd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2386,6 +2386,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->root_hpa = INVALID_PAGE;
vcpu->arch.mmu.direct_map = true;
+ vcpu->arch.mmu.set_cr3 = kvm_x86_ops->set_cr3;
if (!is_paging(vcpu)) {
context->gva_to_gpa = nonpaging_gva_to_gpa;
@@ -2425,6 +2426,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu->arch.mmu.direct_map = false;
+ vcpu->arch.mmu.set_cr3 = kvm_x86_ops->set_cr3;
return r;
}
@@ -2470,7 +2472,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
if (r)
goto out;
/* set_cr3() should ensure TLB has been flushed */
- kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+ vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
out:
return r;
}
--
1.7.0.4
--
This patch adds code to initialize the Nested Nested Paging
MMU context when the L1 guest executes a VMRUN instruction
and has nested paging enabled in its VMCB.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/mmu.c | 1 +
arch/x86/kvm/svm.c | 56 ++++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 47 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index af89e71..e5dc853 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2569,6 +2569,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_unload);
static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e31f601..266b1d4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -94,7 +94,6 @@ struct nested_state {
/* Nested Paging related state */
u64 nested_cr3;
-
};
#define MSRPM_OFFSETS 16
@@ -283,6 +282,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
force_new_asid(vcpu);
}
+static int get_npt_level(void)
+{
+#ifdef CONFIG_X86_64
+ return PT64_ROOT_LEVEL;
+#else
+ return PT32E_ROOT_LEVEL;
+#endif
+}
+
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (!npt_enabled && !(efer & EFER_LMA))
@@ -1523,6 +1531,27 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
+static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+
+ vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
+ vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
+ vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ vcpu->arch.nested_mmu.gva_to_gpa = vcpu->arch.mmu.gva_to_gpa;
+ vcpu->arch.mmu.nested = true;
+
+ return r;
+}
+
+static void ...Better not to introduce in the first place (I resisted commenting on it, but now I broke down). -- error compiling committee.c: too many arguments to function --
This patch adds a function which can read from the guests
physical memory or from the guest's guest physical memory.
This will be used in the two-dimensional page table walker.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/x86.c | 24 ++++++++++++++++++++++++
2 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3cbfb51..7851bbc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -635,6 +635,9 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
+int kvm_read_guest_page_tdp(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gfn_t gfn, void *data, int offset, int len,
+ u32 *error);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
int kvm_pic_set_irq(void *opaque, int irq, int level);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..558d995 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -356,6 +356,30 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
EXPORT_SYMBOL_GPL(kvm_require_cpl);
/*
+ * This function will be used to read from the physical memory of the currently
+ * running guest. The difference to kvm_read_guest_page ist that this function
+ * can read from guest physical or from the guest's guest physical memory.
+ */
+int kvm_read_guest_page_tdp(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ gfn_t gfn, void *data, int offset, int len,
+ u32 *error)
+{
+ gfn_t real_gfn;
+ gpa_t gpa;
+
+ *error = 0;
+ gpa = gfn << PAGE_SHIFT;
+ real_gfn = mmu->translate_gpa(vcpu, gpa, error);
+ if (real_gfn == UNMAPPED_GVA)
+ return -EFAULT;
+
+ real_gfn >>= PAGE_SHIFT;
+
+ return ...Naming: I see 'tdp' as a host property, and this is valid whether tdp is enabled or not. Suggest calling it kvm_read_nested_guest_page(). -- error compiling committee.c: too many arguments to function --
I'll fix the overflow errors and send an updated patch. Thanks, Joerg --
This patch adds the helper functions which will be used in
the mmu context for handling nested nested page faults.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/svm.c | 33 +++++++++++++++++++++++++++++++++
1 files changed, 33 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4aae4be..e31f601 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -92,6 +92,9 @@ struct nested_state {
u32 intercept_exceptions;
u64 intercept;
+ /* Nested Paging related state */
+ u64 nested_cr3;
+
};
#define MSRPM_OFFSETS 16
@@ -1490,6 +1493,36 @@ static int vmmcall_interception(struct vcpu_svm *svm)
return 1;
}
+static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->nested.nested_cr3;
+}
+
+static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
+ unsigned long root)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.nested_cr3 = root;
+ force_new_asid(vcpu);
+}
+
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
+ unsigned long addr,
+ u32 error_code)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = addr;
+
+ nested_svm_vmexit(svm);
+}
+
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
if (!(svm->vcpu.arch.efer & EFER_SVME)
--
1.7.0.4
--
This patch introduces a mmu-callback to translate gpa
addresses in the walk_addr code. This is later used to
translate l2_gpa addresses into l1_gpa addresses.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/mmu.c | 7 +++++++
arch/x86/kvm/paging_tmpl.h | 1 +
include/linux/kvm_host.h | 5 +++++
4 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1d46e6..ccdbd2f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -242,6 +242,7 @@ struct kvm_mmu {
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
+ gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 615c78f..4e0bfdb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2131,6 +2131,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
+static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error)
+{
+ return gpa;
+}
+
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, u32 *error)
{
@@ -2387,6 +2392,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->new_cr3 = nonpaging_new_cr3;
context->page_fault = tdp_page_fault;
context->free = nonpaging_free;
+ context->translate_gpa = translate_gpa;
context->prefetch_page = nonpaging_prefetch_page;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
@@ -2434,6 +2440,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu->arch.mmu.direct_map = ...This overflows on 32-bit, since gpa_t is u64 and gfn_t is ulong. -- error compiling committee.c: too many arguments to function --
Hm, this is a problem outside of this patchset too (for 32bit hosts). The best solution is probably to convert gfn_t to u64 too. Joerg --
If you cast like
(gfn_t)(gpa >> PAGE_SHIFT)
you avoid the overflow for MAXPHYADDR < 48. However, I agree that
converting gfn_t to u64 is best, the minor performance degradation is in
no way comparable to the corruption that results from a miscast.
--
error compiling committee.c: too many arguments to function
--
Thinking again about it, on 32 bit the physical address width is only 36 bits. So there shouldn't be an overflow, no? Joerg --
It's limited by MAXPHYADDR (at least on Intel) even on 32-bits. -- error compiling committee.c: too many arguments to function --
This patch uses kvm_read_guest_page_tdp to make the
walk_addr_generic functions suitable for two-level page
table walking.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/paging_tmpl.h | 34 ++++++++++++++++++++++++----------
1 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7819a6f..8b27026 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -121,8 +121,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gfn_t table_gfn;
unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
+ int offset;
int rsvd_fault = 0;
- u32 error;
+ u32 error = 0;
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
fetch_fault);
@@ -149,13 +150,16 @@ walk:
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
- pte_gpa = gfn_to_gpa(table_gfn);
- pte_gpa += index * sizeof(pt_element_t);
+ offset = index * sizeof(pt_element_t);
+ pte_gpa = gfn_to_gpa(table_gfn) + offset;
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
- goto not_present;
+ if (kvm_read_guest_page_tdp(vcpu, mmu, table_gfn, &pte, offset,
+ sizeof(pte), &error)) {
+ walker->error_code = error;
+ return 0;
+ }
trace_kvm_mmu_paging_element(pte, walker->level);
@@ -200,15 +204,25 @@ walk:
is_large_pte(pte) &&
mmu->root_level == PT64_ROOT_LEVEL)) {
int lvl = walker->level;
+ gpa_t real_gpa;
+ gfn_t gfn;
- walker->gfn = gpte_to_gfn_lvl(pte, lvl);
- walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
- >> PAGE_SHIFT;
+ gfn = gpte_to_gfn_lvl(pte, lvl);
+ gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
if (PTTYPE == 32 &&
walker->level == PT_DIRECTORY_LEVEL &&
is_cpuid_PSE36())
- walker->gfn += ...This patch changes is_rsvd_bits_set() function prototype to
take only a kvm_mmu context instead of a full vcpu.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kvm/mmu.c | 4 ++--
arch/x86/kvm/paging_tmpl.h | 3 ++-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef9d284..615c78f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2252,12 +2252,12 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
}
-static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
{
int bit7;
bit7 = (gpte >> 7) & 1;
- return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+ return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
}
#define PTTYPE 64
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 802c513..1975ab6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -158,7 +158,8 @@ walk:
if (!is_present_gpte(pte))
goto not_present;
- rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
+ rsvd_fault = is_rsvd_bits_set(&vcpu->arch.mmu, pte,
+ walker->level);
if (rsvd_fault)
goto access_error;
--
1.7.0.4
--
We could also set direct_map = true for real mode, would probably simplify some later logic (can be cleaned up later, no need to update now). -- error compiling committee.c: too many arguments to function --
Those !! are unneeded. -- error compiling committee.c: too many arguments to function --
In the emulator there are some functions passing NULL for the error code. In general it doesn't matter much because when we track this information in the vcpu struct these error parameters can be removed. I just let this outside the scope of this patchset and make that cleanup Ok, I remove them. Joerg --
Ok, apart from the overflow on i386 nasties this look really good! I look forward to merging v3. -- error compiling committee.c: too many arguments to function --
