Fourth set of kvm updates scheduled for the 2.6.25 merge window. Please review. Diffstat below is for this batch only. arch/x86/Kconfig | 4 + drivers/kvm/Kconfig | 4 +- drivers/kvm/i8259.c | 1 + drivers/kvm/ioapic.c | 36 ++-- drivers/kvm/iodev.h | 63 +++++ drivers/kvm/irq.h | 21 ++- drivers/kvm/kvm.h | 79 +------ drivers/kvm/kvm_main.c | 129 ++++------- drivers/kvm/lapic.c | 25 ++- drivers/kvm/mmu.c | 482 +++++++++++++++++++++----------------- drivers/kvm/paging_tmpl.h | 202 +++++++--------- drivers/kvm/svm.c | 99 +++----- drivers/kvm/svm.h | 1 + drivers/kvm/types.h | 41 ++++ drivers/kvm/vmx.c | 86 ++----- drivers/kvm/x86.c | 469 ++++++++++++++++++++++++++++++++++--- drivers/kvm/x86.h | 41 +++- drivers/kvm/x86_emulate.c | 568 +++++++++++++++++++++++++-------------------- drivers/kvm/x86_emulate.h | 5 +- include/asm-x86/kvm.h | 21 ++ include/linux/Kbuild | 2 +- include/linux/kvm.h | 6 + kernel/fork.c | 1 + 23 files changed, 1444 insertions(+), 942 deletions(-) --
No longer used.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm.h | 1 -
drivers/kvm/mmu.c | 9 ---------
2 files changed, 0 insertions(+), 10 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index eda82cd..31315bc 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -379,7 +379,6 @@ hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
extern struct page *bad_page;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 1965185..6aa0319 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -881,15 +881,6 @@ hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
return hpa;
}
-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
-{
- gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
-
- if (gpa == UNMAPPED_GVA)
- return UNMAPPED_GVA;
- return gpa_to_hpa(vcpu->kvm, gpa);
-}
-
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
--
1.5.3.7
--
Rename the awkwardly named variable.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 86896da..0cb6580 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -430,18 +430,18 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
struct kvm_mmu_page *sp;
- struct page *release_page;
+ struct page *page;
unsigned long *rmapp;
int i;
if (!is_rmap_pte(*spte))
return;
sp = page_header(__pa(spte));
- release_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
if (is_writeble_pte(*spte))
- kvm_release_page_dirty(release_page);
+ kvm_release_page_dirty(page);
else
- kvm_release_page_clean(release_page);
+ kvm_release_page_clean(page);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
--
1.5.3.7
--
Converting last uses along the way.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm.h | 1 -
drivers/kvm/mmu.c | 21 +++------------------
2 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 31315bc..1fd8158 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -375,7 +375,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
struct module *module);
void kvm_exit(void);
-hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 6aa0319..9b75b10 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -868,26 +868,13 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
__set_bit(slot, &page_head->slot_bitmap);
}
-hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
-{
- struct page *page;
- hpa_t hpa;
-
- ASSERT((gpa & HPA_ERR_MASK) == 0);
- page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
- hpa = ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1));
- if (is_error_page(page))
- return hpa | HPA_ERR_MASK;
- return hpa;
-}
-
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
if (gpa == UNMAPPED_GVA)
return NULL;
- return pfn_to_page(gpa_to_hpa(vcpu->kvm, gpa) >> PAGE_SHIFT);
+ return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -1611,8 +1598,8 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
audit_mappings_page(vcpu, ent, va, level - 1);
} else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
- hpa_t hpa = gpa_to_hpa(vcpu, gpa);
- struct page *page;
+ struct page *page = gpa_to_page(vcpu, gpa);
+ hpa_t hpa = page_to_phys(page);
if (is_shadow_present_pte(ent)
...These are traditionally named 'page', but even more traditionally, that name
is reserved for variables that point to a 'struct page'. Rename them to 'sp'
(for "shadow page").
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 300 ++++++++++++++++++++++++++---------------------------
1 files changed, 146 insertions(+), 154 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 9b75b10..86896da 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -367,7 +367,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
*/
static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
- struct kvm_mmu_page *page;
+ struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
unsigned long *rmapp;
int i;
@@ -375,8 +375,8 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
if (!is_rmap_pte(*spte))
return;
gfn = unalias_gfn(vcpu->kvm, gfn);
- page = page_header(__pa(spte));
- page->gfns[spte - page->spt] = gfn;
+ sp = page_header(__pa(spte));
+ sp->gfns[spte - sp->spt] = gfn;
rmapp = gfn_to_rmap(vcpu->kvm, gfn);
if (!*rmapp) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
@@ -429,20 +429,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
- struct kvm_mmu_page *page;
+ struct kvm_mmu_page *sp;
struct page *release_page;
unsigned long *rmapp;
int i;
if (!is_rmap_pte(*spte))
return;
- page = page_header(__pa(spte));
+ sp = page_header(__pa(spte));
release_page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
if (is_writeble_pte(*spte))
kvm_release_page_dirty(release_page);
else
kvm_release_page_clean(release_page);
- rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
+ rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
BUG();
@@ -537,14 +537,13 @@ static int ...From: Zhang Xiantao <xiantao.zhang@intel.com>
Non-x86 archs don't need this mechanism. Move it to arch, and
keep its interface in common.
Signed-off-by: Zhang Xiantao <xiantao.zhang@intel.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm_main.c | 14 --------------
drivers/kvm/x86.c | 14 ++++++++++++++
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index a9e1c77..003a43c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -408,20 +408,6 @@ int kvm_is_error_hva(unsigned long addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
-{
- int i;
- struct kvm_mem_alias *alias;
-
- for (i = 0; i < kvm->naliases; ++i) {
- alias = &kvm->aliases[i];
- if (gfn >= alias->base_gfn
- && gfn < alias->base_gfn + alias->npages)
- return alias->target_gfn + gfn - alias->base_gfn;
- }
- return gfn;
-}
-
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
int i;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 7237cb2..2be5936 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -1119,6 +1119,20 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
return kvm->n_alloc_mmu_pages;
}
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ int i;
+ struct kvm_mem_alias *alias;
+
+ for (i = 0; i < kvm->naliases; ++i) {
+ alias = &kvm->aliases[i];
+ if (gfn >= alias->base_gfn
+ && gfn < alias->base_gfn + alias->npages)
+ return alias->target_gfn + gfn - alias->base_gfn;
+ }
+ return gfn;
+}
+
/*
* Set a new alias region. Aliases map a portion of physical memory into
* another portion. This is useful for memory windows, for example the PC
--
1.5.3.7
--
We don't want the meaning of guest userspace changing under our feet.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm.h | 1 +
drivers/kvm/kvm_main.c | 9 +++++++++
kernel/fork.c | 1 +
3 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 1fd8158..be18620 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -305,6 +305,7 @@ struct kvm_vm_stat {
struct kvm {
struct mutex lock; /* protects everything except vcpus */
+ struct mm_struct *mm; /* userspace tied to this vm */
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 411b2bd..a9e1c77 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -165,6 +165,8 @@ static struct kvm *kvm_create_vm(void)
if (IS_ERR(kvm))
goto out;
+ kvm->mm = current->mm;
+ atomic_inc(&kvm->mm->mm_count);
kvm_io_bus_init(&kvm->pio_bus);
mutex_init(&kvm->lock);
kvm_io_bus_init(&kvm->mmio_bus);
@@ -202,12 +204,15 @@ void kvm_free_physmem(struct kvm *kvm)
static void kvm_destroy_vm(struct kvm *kvm)
{
+ struct mm_struct *mm = kvm->mm;
+
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
kvm_arch_destroy_vm(kvm);
+ mmdrop(mm);
}
static int kvm_vm_release(struct inode *inode, struct file *filp)
@@ -818,6 +823,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
+ if (vcpu->kvm->mm != current->mm)
+ return -EIO;
switch (ioctl) {
case KVM_RUN:
r = -EINVAL;
@@ -976,6 +983,8 @@ static long kvm_vm_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int r;
+ if (kvm->mm != current->mm)
+ return -EIO;
switch (ioctl) {
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
diff --git a/kernel/fork.c ...Current implementation is to toggle, which is incorrect. Patch ported from
corresponding Xen code.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 19 ++++++++++++-------
1 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 9f8d59a..3be506a 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -758,6 +758,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
struct decode_cache *c = &ctxt->decode;
int rc = 0;
int mode = ctxt->mode;
+ int def_op_bytes, def_ad_bytes;
/* Shadow copy of register state. Committed on successful emulation. */
@@ -768,34 +769,38 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_PROT16:
- c->op_bytes = c->ad_bytes = 2;
+ def_op_bytes = def_ad_bytes = 2;
break;
case X86EMUL_MODE_PROT32:
- c->op_bytes = c->ad_bytes = 4;
+ def_op_bytes = def_ad_bytes = 4;
break;
#ifdef CONFIG_X86_64
case X86EMUL_MODE_PROT64:
- c->op_bytes = 4;
- c->ad_bytes = 8;
+ def_op_bytes = 4;
+ def_ad_bytes = 8;
break;
#endif
default:
return -1;
}
+ c->op_bytes = def_op_bytes;
+ c->ad_bytes = def_ad_bytes;
+
/* Legacy prefixes. */
for (;;) {
switch (c->b = insn_fetch(u8, 1, c->eip)) {
case 0x66: /* operand-size override */
- c->op_bytes ^= 6; /* switch between 2/4 bytes */
+ /* switch between 2/4 bytes */
+ c->op_bytes = def_op_bytes ^ 6;
break;
case 0x67: /* address-size override */
if (mode == X86EMUL_MODE_PROT64)
/* switch between 4/8 bytes */
- c->ad_bytes ^= 12;
+ c->ad_bytes = def_ad_bytes ^ 12;
else
/* switch between 2/4 bytes */
- c->ad_bytes ^= 6;
+ c->ad_bytes = def_ad_bytes ^ 6;
break;
case 0x2e: /* CS override */
c->override_base = &ctxt->cs_base;
--
1.5.3.7
--
From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
cmps and scas instructions accept repeat prefixes F3 and F2. So in
order to emulate those prefixed instructions we need to be able to know
if prefixes are REP/REPE/REPZ or REPNE/REPNZ. Currently kvm doesn't make
this distinction. This patch introduces this distinction.
Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 4 +++-
drivers/kvm/x86_emulate.h | 4 ++++
2 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 6e7f774..9f8d59a 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -824,8 +824,10 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->lock_prefix = 1;
break;
case 0xf2: /* REPNE/REPNZ */
+ c->rep_prefix = REPNE_PREFIX;
+ break;
case 0xf3: /* REP/REPE/REPZ */
- c->rep_prefix = 1;
+ c->rep_prefix = REPE_PREFIX;
break;
default:
goto done_prefixes;
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
index 4603b2b..644086e 100644
--- a/drivers/kvm/x86_emulate.h
+++ b/drivers/kvm/x86_emulate.h
@@ -162,6 +162,10 @@ struct x86_emulate_ctxt {
struct decode_cache decode;
};
+/* Repeat String Operation Prefix */
+#define REPE_PREFIX 1
+#define REPNE_PREFIX 2
+
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
--
1.5.3.7
--
mmio was already handled in kvm_arch_vcpu_ioctl_run(), so no need to check again. Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/x86.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index 2be5936..5e58156 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c @@ -2344,7 +2344,7 @@ again: if (irqchip_in_kernel(vcpu->kvm)) kvm_x86_ops->inject_pending_irq(vcpu); - else if (!vcpu->mmio_read_completed) + else kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); vcpu->guest_mode = 1; -- 1.5.3.7 --
From: Izik Eidus <izike@qumranet.com> Mark guest pages as accessed when removed from the shadow page tables for better lru processing. Signed-off-by: Izik Eidus <izike@qumranet.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/mmu.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 0cb6580..9b9d1b6 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/module.h> +#include <linux/swap.h> #include <asm/page.h> #include <asm/cmpxchg.h> @@ -438,6 +439,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) return; sp = page_header(__pa(spte)); page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + mark_page_accessed(page); if (is_writeble_pte(*spte)) kvm_release_page_dirty(page); else -- 1.5.3.7 --
From: Jerone Young <jyoung5@us.ibm.com>
This patch fixes a small issue where sturctures:
kvm_pic_state
kvm_ioapic_state
are defined inside x86 specific code and may or may not
be defined in anyway for other architectures. The problem
caused is one cannot compile userspace apps (ex. libkvm)
for other archs since a size cannot be determined for these
structures.
Signed-off-by: Jerone Young <jyoung5@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
include/linux/Kbuild | 2 +-
include/linux/kvm.h | 2 ++
2 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 9abf5a8..805c2a7 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -98,7 +98,6 @@ header-y += iso_fs.h
header-y += ixjuser.h
header-y += jffs2.h
header-y += keyctl.h
-header-y += kvm.h
header-y += limits.h
header-y += lock_dlm_plock.h
header-y += magic.h
@@ -254,6 +253,7 @@ unifdef-y += kd.h
unifdef-y += kernelcapi.h
unifdef-y += kernel.h
unifdef-y += keyboard.h
+unifdef-y += kvm.h
unifdef-y += llc.h
unifdef-y += loop.h
unifdef-y += lp.h
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index b751552..f0bebd6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -51,8 +51,10 @@ struct kvm_irqchip {
__u32 pad;
union {
char dummy[512]; /* reserving space */
+#ifdef CONFIG_X86
struct kvm_pic_state pic;
struct kvm_ioapic_state ioapic;
+#endif
} chip;
};
--
1.5.3.7
--
From: Sheng Yang <sheng.yang@intel.com>
Previous patches have removed the dependency on cr2; we can now stop passing
it to the emulator and rename uses to 'memop'.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86.c | 1 -
drivers/kvm/x86_emulate.c | 18 +++++++++---------
drivers/kvm/x86_emulate.h | 1 -
3 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 5e58156..12f1d6f 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -1702,7 +1702,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
vcpu->emulate_ctxt.vcpu = vcpu;
vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
- vcpu->emulate_ctxt.cr2 = cr2;
vcpu->emulate_ctxt.mode =
(vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 3be506a..22fdf0a 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1127,13 +1127,13 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
- unsigned long cr2)
+ unsigned long memop)
{
struct decode_cache *c = &ctxt->decode;
u64 old, new;
int rc;
- rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu);
+ rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
if (rc != 0)
return rc;
@@ -1148,7 +1148,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
(u32) c->regs[VCPU_REGS_RBX];
- rc = ops->cmpxchg_emulated(cr2, &old, &new, 8, ctxt->vcpu);
+ rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
if (rc != 0)
return rc;
ctxt->eflags |= EFLG_ZF;
@@ -1211,7 +1211,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
int
x86_emulate_insn(struct ...The rep prefix cleanup left two switch () statements next to each other.
Unify them.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 50 ++++++++++++++++++++------------------------
1 files changed, 23 insertions(+), 27 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 53377f0..499d1ad 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1541,31 +1541,6 @@ special_insn:
case 0x9d: /* popf */
c->dst.ptr = (unsigned long *) &ctxt->eflags;
goto pop_instruction;
- case 0xc3: /* ret */
- c->dst.ptr = &c->eip;
- goto pop_instruction;
- case 0xf4: /* hlt */
- ctxt->vcpu->halt_request = 1;
- goto done;
- case 0xf5: /* cmc */
- /* complement carry flag from eflags reg */
- ctxt->eflags ^= EFLG_CF;
- c->dst.type = OP_NONE; /* Disable writeback. */
- break;
- case 0xf8: /* clc */
- ctxt->eflags &= ~EFLG_CF;
- c->dst.type = OP_NONE; /* Disable writeback. */
- break;
- case 0xfa: /* cli */
- ctxt->eflags &= ~X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
- break;
- case 0xfb: /* sti */
- ctxt->eflags |= X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
- break;
- }
- switch (c->b) {
case 0xa4 ... 0xa5: /* movs */
c->dst.type = OP_MEM;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
@@ -1652,6 +1627,9 @@ special_insn:
case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate;
+ case 0xc3: /* ret */
+ c->dst.ptr = &c->eip;
+ goto pop_instruction;
case 0xe8: /* call (near) */ {
long int rel;
switch (c->op_bytes) {
@@ -1676,8 +1654,26 @@ special_insn:
JMP_REL(c->src.val);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
-
-
+ case 0xf4: /* hlt */
+ ctxt->vcpu->halt_request = 1;
+ goto done;
+ case 0xf5: /* cmc */
+ /* complement carry flag from eflags reg */
+ ctxt->eflags ^= EFLG_CF;
+ c->dst.type = ...From: Zhang Xiantao <xiantao.zhang@intel.com>
Other archs doesn't need it.
Signed-off-by: Zhang Xiantao <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm_main.c | 29 -----------------------------
drivers/kvm/x86.c | 29 +++++++++++++++++++++++++++++
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 003a43c..ef961cc 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -665,23 +665,6 @@ void kvm_resched(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_resched);
-static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
-{
- if (irq->irq < 0 || irq->irq >= 256)
- return -EINVAL;
- if (irqchip_in_kernel(vcpu->kvm))
- return -ENXIO;
- vcpu_load(vcpu);
-
- set_bit(irq->irq, vcpu->irq_pending);
- set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
-
- vcpu_put(vcpu);
-
- return 0;
-}
-
static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
unsigned long address,
int *type)
@@ -883,18 +866,6 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_INTERRUPT: {
- struct kvm_interrupt irq;
-
- r = -EFAULT;
- if (copy_from_user(&irq, argp, sizeof irq))
- goto out;
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- if (r)
- goto out;
- r = 0;
- break;
- }
case KVM_DEBUG_GUEST: {
struct kvm_debug_guest dbg;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 12f1d6f..c9e4b67 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -1001,6 +1001,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
+ struct kvm_interrupt *irq)
+{
+ if (irq->irq < 0 || irq->irq >= 256)
+ return -EINVAL;
+ if (irqchip_in_kernel(vcpu->kvm))
+ return -ENXIO;
+ vcpu_load(vcpu);
+
+ set_bit(irq->irq, ...Currently, make headers_check barfs due to <asm/kvm.h>, which <linux/kvm.h> includes, not existing. Rather than add a zillion <asm/kvm.h>s, export kvm.h only if the arch actually supports it. Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/Kconfig | 4 ++++ drivers/kvm/Kconfig | 4 ++-- include/linux/Kbuild | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 80b7ba4..e029a93 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -116,6 +116,10 @@ config ARCH_SUPPORTS_OPROFILE bool default y +config ARCH_SUPPORTS_KVM + bool + default y + config ZONE_DMA32 bool diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 6569206..4086080 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -3,7 +3,7 @@ # menuconfig VIRTUALIZATION bool "Virtualization" - depends on X86 + depends on ARCH_SUPPORTS_KVM || X86 default y ---help--- Say Y here to get to see options for using your Linux host to run other @@ -16,7 +16,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on X86 && EXPERIMENTAL + depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES ---help--- diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 805c2a7..d70fa26 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -253,7 +253,7 @@ unifdef-y += kd.h unifdef-y += kernelcapi.h unifdef-y += kernel.h unifdef-y += keyboard.h -unifdef-y += kvm.h +unifdef-$(CONFIG_ARCH_SUPPORTS_KVM) += kvm.h unifdef-y += llc.h unifdef-y += loop.h unifdef-y += lp.h -- 1.5.3.7 --
An approach like the following is preferred: kvm/Kconfig: # config symbols to be selected by archs that implment virtualization/kvm config HAVE_VIRTUALIZATION config HAVE_KVM menuconfig VIRTUALIZATION bool "..." depends on HAVE_VIRTUALIZATION config KVM tristate "..." depends on HAVE_KVM arch/x86/Kconfig: config X86 select HAVE_VIRTUALIZATION select HAVE_KVM I dunno about the additional "HAVE_VIRTUALIZATION" - I added it because I assume virtualization is more than just kvm. The rationales behinds this approach is: -> We do not define a new config variable for each arch -> We have a common way to say that an arch supports a feature -> We have a common naming scheme Sam --
I think my ARCH_SUPPORTS_KVM is exactly your HAVE_KVM. I'll update the patch to rename the variable and to have x86 select it instead of defining it (which is clearly better) as you suggest. I'll also apply your HAVE_VIRTUALIZATION suggestion. Thanks for the review, comments, and explanations. -- error compiling committee.c: too many arguments to function --
From: Zhang Xiantao <xiantao.zhang@intel.com>
This patch replaces lapic structure with kvm_vcpu in ioapic.c, making ioapic
independent of the local apic, as required by ia64.
Signed-off-by: Zhang Xiantao <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/ioapic.c | 18 ++++++++----------
drivers/kvm/irq.h | 5 +++--
drivers/kvm/lapic.c | 23 ++++++++++++++++++-----
3 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
index cf1d50b..3629867 100644
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@@ -136,7 +136,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
}
static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
- struct kvm_lapic *target,
+ struct kvm_vcpu *vcpu,
u8 vector, u8 trig_mode, u8 delivery_mode)
{
ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
@@ -145,7 +145,7 @@ static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
ASSERT((delivery_mode == dest_Fixed) ||
(delivery_mode == dest_LowestPrio));
- kvm_apic_set_irq(target, vector, trig_mode);
+ kvm_apic_set_irq(vcpu, vector, trig_mode);
}
static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
@@ -196,7 +196,6 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
u8 vector = ioapic->redirtbl[irq].fields.vector;
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask;
- struct kvm_lapic *target;
struct kvm_vcpu *vcpu;
int vcpu_id;
@@ -212,13 +211,13 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
switch (delivery_mode) {
case dest_LowestPrio:
- target =
- kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
- if (target != NULL)
- ioapic_inj_irq(ioapic, target, vector,
+ vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+ deliver_bitmask);
+ if (vcpu != NULL)
+ ioapic_inj_irq(ioapic, vcpu, vector,
...From: Zhang Xiantao <xiantao.zhang@intel.com>
Signed-off-by: Zhang Xiantao <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm_main.c | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ef961cc..b56ee34 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1311,7 +1311,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
r = kvm_arch_init(opaque);
if (r)
- goto out4;
+ goto out_fail;
bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -1322,29 +1322,29 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
r = kvm_arch_hardware_setup();
if (r < 0)
- goto out;
+ goto out_free_0;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu,
kvm_arch_check_processor_compat,
&r, 0, 1);
if (r < 0)
- goto out_free_0;
+ goto out_free_1;
}
on_each_cpu(hardware_enable, NULL, 0, 1);
r = register_cpu_notifier(&kvm_cpu_notifier);
if (r)
- goto out_free_1;
+ goto out_free_2;
register_reboot_notifier(&kvm_reboot_notifier);
r = sysdev_class_register(&kvm_sysdev_class);
if (r)
- goto out_free_2;
+ goto out_free_3;
r = sysdev_register(&kvm_sysdev);
if (r)
- goto out_free_3;
+ goto out_free_4;
/* A kmem cache lets us meet the alignment requirements of fx_save. */
kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
@@ -1352,7 +1352,7 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
0, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
- goto out_free_4;
+ goto out_free_5;
}
kvm_chardev_ops.owner = module;
@@ -1370,21 +1370,23 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
out_free:
kmem_cache_destroy(kvm_vcpu_cache);
-out_free_4:
+out_free_5:
sysdev_unregister(&kvm_sysdev);
-out_free_3:
+out_free_4:
sysdev_class_unregister(&kvm_sysdev_class);
-out_free_2:
+out_free_3:
...From: Zhang Xiantao <xiantao.zhang@intel.com>
Change
dest_Loest_Prio -> IOAPIC_LOWEST_PRIORITY
dest_Fixed -> IOAPIC_FIXED
the original names are x86 specific, while the ioapic code will be reused
for ia64.
Signed-off-by: Zhang Xiantao <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/ioapic.c | 13 +++++--------
drivers/kvm/irq.h | 8 ++++++++
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
index 3629867..0feae6f 100644
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@@ -36,11 +36,8 @@
#include <linux/hrtimer.h>
#include <linux/io.h>
#include <asm/processor.h>
-#include <asm/msr.h>
#include <asm/page.h>
#include <asm/current.h>
-#include <asm/apicdef.h>
-#include <asm/io_apic.h>
#include "irq.h"
#if 0
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
@@ -142,8 +139,8 @@ static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
delivery_mode);
- ASSERT((delivery_mode == dest_Fixed) ||
- (delivery_mode == dest_LowestPrio));
+ ASSERT((delivery_mode == IOAPIC_FIXED) ||
+ (delivery_mode == IOAPIC_LOWEST_PRIORITY));
kvm_apic_set_irq(vcpu, vector, trig_mode);
}
@@ -210,7 +207,7 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
}
switch (delivery_mode) {
- case dest_LowestPrio:
+ case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
deliver_bitmask);
if (vcpu != NULL)
@@ -219,9 +216,9 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
else
ioapic_debug("null lowest prio vcpu: "
"mask=%x vector=%x delivery_mode=%x\n",
- deliver_bitmask, vector, dest_LowestPrio);
+ deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
break;
- case dest_Fixed:
+ case IOAPIC_FIXED:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if ...From: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/iodev.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/irq.h | 1 +
drivers/kvm/kvm.h | 42 --------------------------------
drivers/kvm/kvm_main.c | 1 +
4 files changed, 65 insertions(+), 42 deletions(-)
create mode 100644 drivers/kvm/iodev.h
diff --git a/drivers/kvm/iodev.h b/drivers/kvm/iodev.h
new file mode 100644
index 0000000..eb9e8a7
--- /dev/null
+++ b/drivers/kvm/iodev.h
@@ -0,0 +1,63 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __KVM_IODEV_H__
+#define __KVM_IODEV_H__
+
+#include "types.h"
+
+struct kvm_io_device {
+ void (*read)(struct kvm_io_device *this,
+ gpa_t addr,
+ int len,
+ void *val);
+ void (*write)(struct kvm_io_device *this,
+ gpa_t addr,
+ int len,
+ const void *val);
+ int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+ void (*destructor)(struct kvm_io_device *this);
+
+ void *private;
+};
+
+static inline void kvm_iodevice_read(struct kvm_io_device *dev,
+ gpa_t addr,
+ int len,
+ void *val)
+{
+ dev->read(dev, addr, len, val);
+}
+
+static inline ...From: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm.h | 21 ++-------------------
drivers/kvm/types.h | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 19 deletions(-)
create mode 100644 drivers/kvm/types.h
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index be18620..3b0ba5a 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -20,6 +20,8 @@
#include <linux/kvm.h>
#include <linux/kvm_para.h>
+#include "types.h"
+
#define KVM_MAX_VCPUS 4
#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 8
@@ -39,25 +41,6 @@
*/
#define KVM_REQ_TLB_FLUSH 0
-/*
- * Address types:
- *
- * gva - guest virtual address
- * gpa - guest physical address
- * gfn - guest frame number
- * hva - host virtual address
- * hpa - host physical address
- * hfn - host frame number
- */
-
-typedef unsigned long gva_t;
-typedef u64 gpa_t;
-typedef unsigned long gfn_t;
-
-typedef unsigned long hva_t;
-typedef u64 hpa_t;
-typedef unsigned long hfn_t;
-
#define NR_PTE_CHAIN_ENTRIES 5
struct kvm_pte_chain {
diff --git a/drivers/kvm/types.h b/drivers/kvm/types.h
new file mode 100644
index 0000000..6ad7623
--- /dev/null
+++ b/drivers/kvm/types.h
@@ -0,0 +1,41 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if ...From: Hollis Blanchard <hollisb@us.ibm.com>
This abstracts the detail of x86 hlt and INIT modes into a function.
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm_main.c | 3 +--
drivers/kvm/x86.h | 7 +++++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7785c90..3d1023c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -643,8 +643,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
*/
while (!kvm_cpu_has_interrupt(vcpu)
&& !signal_pending(current)
- && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
- && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
+ && !kvm_arch_vcpu_runnable(vcpu)) {
set_current_state(TASK_INTERRUPTIBLE);
vcpu_put(vcpu);
schedule();
diff --git a/drivers/kvm/x86.h b/drivers/kvm/x86.h
index 4b7acc7..eed7964 100644
--- a/drivers/kvm/x86.h
+++ b/drivers/kvm/x86.h
@@ -471,4 +471,11 @@ static inline u32 get_rdx_init_val(void)
#define TSS_IOPB_SIZE (65536 / 8)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+
+static inline int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return vcpu->mp_state == VCPU_MP_STATE_RUNNABLE
+ || vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED;
+}
+
#endif
--
1.5.3.7
--
From: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/i8259.c | 1 +
drivers/kvm/irq.h | 7 ++++++-
drivers/kvm/kvm.h | 9 ++++-----
drivers/kvm/kvm_main.c | 3 ---
drivers/kvm/x86.h | 8 ++++++++
5 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
index f0dc2ee..b3cad63 100644
--- a/drivers/kvm/i8259.c
+++ b/drivers/kvm/i8259.c
@@ -27,6 +27,7 @@
*/
#include <linux/mm.h>
#include "irq.h"
+#include "kvm.h"
/*
* set irq level. If an edge is detected, then the IRR is set to 1
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 803b9c7..730a87c 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -22,9 +22,14 @@
#ifndef __IRQ_H
#define __IRQ_H
-#include "kvm.h"
+#include <linux/mm_types.h>
+#include <linux/hrtimer.h>
+#include <asm/kvm.h>
#include "iodev.h"
+struct kvm;
+struct kvm_vcpu;
+
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a1b7d1c..0d3555b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -288,11 +288,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return pic_irqchip(kvm) != NULL;
}
-struct descriptor_table {
- u16 limit;
- unsigned long base;
-} __attribute__((packed));
-
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...) \
do { \
@@ -457,4 +452,8 @@ struct kvm_stats_debugfs_item {
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
+#if defined(CONFIG_X86)
+#include "x86.h"
+#endif
+
#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 8d4a06d..7785c90 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -16,8 +16,6 @@
*/
#include "kvm.h"
-#include "x86.h"
-#include "irq.h"
#include ...Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 2 +-
drivers/kvm/svm.c | 35 -----------------------------------
drivers/kvm/vmx.c | 32 --------------------------------
drivers/kvm/x86.c | 17 ++++++++++++++++-
drivers/kvm/x86.h | 4 ++--
5 files changed, 19 insertions(+), 71 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 9b9d1b6..62a7415 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1081,7 +1081,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
u64 addr,
u32 err_code)
{
- kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
+ kvm_inject_page_fault(vcpu, addr, err_code);
}
static void paging_free(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index f4c61c8..ce77f15 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -225,12 +225,6 @@ static void inject_ud(struct kvm_vcpu *vcpu)
UD_VECTOR;
}
-static int is_page_fault(uint32_t info)
-{
- info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
- return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
-}
-
static int is_external_interrupt(u32 info)
{
info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -1624,34 +1618,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
}
}
-static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
- unsigned long addr,
- uint32_t err_code)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
-
- ++vcpu->stat.pf_guest;
-
- if (is_page_fault(exit_int_info)) {
-
- svm->vmcb->control.event_inj_err = 0;
- svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
- SVM_EVTINJ_VALID_ERR |
- SVM_EVTINJ_TYPE_EXEPT |
- DF_VECTOR;
- return;
- }
- vcpu->cr2 = addr;
- svm->vmcb->save.cr2 = addr;
- svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
- SVM_EVTINJ_VALID_ERR |
- SVM_EVTINJ_TYPE_EXEPT ...From: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Add emulation for the cmps instruction. This lets OpenBSD boot on kvm.
Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 58 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 22fdf0a..84e5366 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1535,10 +1535,31 @@ special_insn:
break;
}
if (c->rep_prefix) {
+ /* All REP prefixes have the same first termination condition */
if (c->regs[VCPU_REGS_RCX] == 0) {
ctxt->vcpu->rip = c->eip;
goto done;
}
+ /* The second termination condition only applies for REPE
+ * and REPNE. Test if the repeat string operation prefix is
+ * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
+ * corresponding termination condition according to:
+ * - if REPE/REPZ and ZF = 0 then done
+ * - if REPNE/REPNZ and ZF = 1 then done
+ */
+ if ((c->b == 0xa6) || (c->b == 0xa7) ||
+ (c->b == 0xae) || (c->b == 0xaf)) {
+ if ((c->rep_prefix == REPE_PREFIX) &&
+ ((ctxt->eflags & EFLG_ZF) == 0)) {
+ ctxt->vcpu->rip = c->eip;
+ goto done;
+ }
+ if ((c->rep_prefix == REPNE_PREFIX) &&
+ ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
+ ctxt->vcpu->rip = c->eip;
+ goto done;
+ }
+ }
c->regs[VCPU_REGS_RCX]--;
c->eip = ctxt->vcpu->rip;
}
@@ -1564,8 +1585,41 @@ special_insn:
: c->dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
- DPRINTF("Urk! I don't handle CMPS.\n");
- goto cannot_emulate;
+ c->src.type = OP_NONE; /* Disable writeback. */
+ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+ c->src.ptr = (unsigned long *)register_address(
+ c->override_base ? *c->override_base :
+ ctxt->ds_base,
+ c->regs[VCPU_REGS_RSI]);
+ if ...Instead of each subarch doing its own thing, add an API for queuing an
injection, and manage failed exception injection centerally (i.e., if
an inject failed due to a shadow page fault, we need to requeue it).
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/svm.c | 21 +++++++++++++++++++++
drivers/kvm/vmx.c | 20 ++++++++++++++++++++
drivers/kvm/x86.c | 33 ++++++++++++++++++++++++++++++++-
drivers/kvm/x86.h | 13 +++++++++++++
4 files changed, 86 insertions(+), 1 deletions(-)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 04e6b39..f4c61c8 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -188,6 +188,25 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
vcpu->shadow_efer = efer;
}
+static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
+ bool has_error_code, u32 error_code)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.event_inj = nr
+ | SVM_EVTINJ_VALID
+ | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
+ | SVM_EVTINJ_TYPE_EXEPT;
+ svm->vmcb->control.event_inj_err = error_code;
+}
+
+static bool svm_exception_injected(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
+}
+
static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1712,6 +1731,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.patch_hypercall = svm_patch_hypercall,
.get_irq = svm_get_irq,
.set_irq = svm_set_irq,
+ .queue_exception = svm_queue_exception,
+ .exception_injected = svm_exception_injected,
.inject_pending_irq = svm_intr_assist,
.inject_pending_vectors = do_interrupt_requests,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 8e43feb..1ec1c28 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -595,6 +595,24 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vcpu->interrupt_window_open = 1;
}
+static ...We prepare eflags for the emulated instruction, then clobber it with an 'andl'.
Fix by popping eflags as the last thing in the sequence.
Patch taken from Xen (16143:959b4b92b6bf)
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 30 +++++++++++++++---------------
1 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 2e259a8..f423b0e 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -256,21 +256,21 @@ static u16 twobyte_table[256] = {
#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
/* Before executing instruction: restore necessary bits in EFLAGS. */
-#define _PRE_EFLAGS(_sav, _msk, _tmp) \
- /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */ \
- "push %"_sav"; " \
- "movl %"_msk",%"_LO32 _tmp"; " \
- "andl %"_LO32 _tmp",("_STK"); " \
- "pushf; " \
- "notl %"_LO32 _tmp"; " \
- "andl %"_LO32 _tmp",("_STK"); " \
- "pop %"_tmp"; " \
- "orl %"_LO32 _tmp",("_STK"); " \
- "popf; " \
- /* _sav &= ~msk; */ \
- "movl %"_msk",%"_LO32 _tmp"; " \
- "notl %"_LO32 _tmp"; " \
- "andl %"_LO32 _tmp",%"_sav"; "
+#define _PRE_EFLAGS(_sav, _msk, _tmp) \
+ /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
+ "movl %"_sav",%"_LO32 _tmp"; " \
+ "push %"_tmp"; " \
+ "push %"_tmp"; " \
+ "movl %"_msk",%"_LO32 _tmp"; " \
+ "andl %"_LO32 _tmp",("_STK"); " \
+ "pushf; " \
+ "notl %"_LO32 _tmp"; " \
+ "andl %"_LO32 _tmp",("_STK"); " \
+ "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
+ "pop %"_tmp"; " ...Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/svm.c | 17 ++---------------
drivers/kvm/vmx.c | 18 ++----------------
drivers/kvm/x86.c | 43 +++++++++++++++++++------------------------
drivers/kvm/x86.h | 7 +++++--
drivers/kvm/x86_emulate.c | 4 ++--
5 files changed, 30 insertions(+), 59 deletions(-)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index ce77f15..b896614 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -207,17 +207,6 @@ static bool svm_exception_injected(struct kvm_vcpu *vcpu)
return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
}
-static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
- SVM_EVTINJ_VALID_ERR |
- SVM_EVTINJ_TYPE_EXEPT |
- GP_VECTOR;
- svm->vmcb->control.event_inj_err = error_code;
-}
-
static void inject_ud(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
@@ -1115,7 +1104,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
u64 data;
if (svm_get_msr(&svm->vcpu, ecx, &data))
- svm_inject_gp(&svm->vcpu, 0);
+ kvm_inject_gp(&svm->vcpu, 0);
else {
svm->vmcb->save.rax = data & 0xffffffff;
svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32;
@@ -1176,7 +1165,7 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
| ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32);
svm->next_rip = svm->vmcb->save.rip + 2;
if (svm_set_msr(&svm->vcpu, ecx, data))
- svm_inject_gp(&svm->vcpu, 0);
+ kvm_inject_gp(&svm->vcpu, 0);
else
skip_emulated_instruction(&svm->vcpu);
return 1;
@@ -1688,8 +1677,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.tlb_flush = svm_flush_tlb,
- .inject_gp = svm_inject_gp,
-
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = ...Stack instructions are always 64-bit on 64-bit mode; many of the
emulated stack instructions did not take that into account. Fix by
adding a 'Stack' bitflag and setting the operand size appropriately
during the decode stage (except for 'push r/m', which is in a group
with a few other instructions, so it gets its own treatment).
This fixes random crashes on Vista x64.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 27 +++++++++++++++------------
1 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f423b0e..0a6ab06 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -65,6 +65,7 @@
#define BitOp (1<<8)
#define MemAbs (1<<9) /* Memory operand is absolute displacement */
#define String (1<<10) /* String instruction (rep capable) */
+#define Stack (1<<11) /* Stack instruction (push/pop) */
static u16 opcode_table[256] = {
/* 0x00 - 0x07 */
@@ -104,14 +105,16 @@ static u16 opcode_table[256] = {
/* 0x48 - 0x4F */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x50 - 0x57 */
- SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg,
+ SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
+ SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
/* 0x58 - 0x5F */
- DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
+ DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
+ DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
/* 0x60 - 0x67 */
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0,
/* 0x68 - 0x6F */
- 0, 0, ImplicitOps|Mov, 0,
+ 0, 0, ImplicitOps | Mov | Stack, 0,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
/* 0x70 - 0x77 */
@@ -128,9 +131,10 @@ static ...From: Joerg Roedel <jroedel@lemmy.amd.com> This patch adds code to emulate the access to the cr8 register to the x86 instruction emulator in kvm. This is needed on svm, where there is no hardware decode for control register access. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/x86.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index 6deb052..9db4e32 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c @@ -2236,6 +2236,8 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) return vcpu->cr3; case 4: return vcpu->cr4; + case 8: + return get_cr8(vcpu); default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); return 0; @@ -2259,6 +2261,9 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, case 4: set_cr4(vcpu, mk_cr_64(vcpu->cr4, val)); break; + case 8: + set_cr8(vcpu, val & 0xfUL); + break; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr); } -- 1.5.3.7 --
With apic in userspace, we must exit to userspace after a cr8 write in order to update the tpr. But if the apic is in the kernel, the exit is unnecessary. Noticed by Joerg Roedel. Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/vmx.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index aa6bf2b..548e3a5 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1973,6 +1973,8 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load_rsp_rip(vcpu); set_cr8(vcpu, vcpu->regs[reg]); skip_emulated_instruction(vcpu); + if (irqchip_in_kernel(vcpu->kvm)) + return 1; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; }; -- 1.5.3.7 --
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/svm.c | 12 ++----------
drivers/kvm/vmx.c | 11 +----------
2 files changed, 3 insertions(+), 20 deletions(-)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b896614..8b1cc60 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -207,13 +207,6 @@ static bool svm_exception_injected(struct kvm_vcpu *vcpu)
return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
}
-static void inject_ud(struct kvm_vcpu *vcpu)
-{
- to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
- SVM_EVTINJ_TYPE_EXEPT |
- UD_VECTOR;
-}
-
static int is_external_interrupt(u32 info)
{
info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -948,8 +941,7 @@ static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0);
if (er != EMULATE_DONE)
- inject_ud(&svm->vcpu);
-
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -1027,7 +1019,7 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
- inject_ud(&svm->vcpu);
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 92660db..aa6bf2b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -613,14 +613,6 @@ static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
return !(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
}
-static void vmx_inject_ud(struct kvm_vcpu *vcpu)
-{
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- UD_VECTOR |
- INTR_TYPE_EXCEPTION |
- INTR_INFO_VALID_MASK);
-}
-
/*
* Swap MSR entry in host/guest MSR entry array.
*/
@@ -1866,8 +1858,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (is_invalid_opcode(intr_info)) {
er = emulate_instruction(vcpu, kvm_run, 0, 0, ...From: Marcelo Tosatti <marcelo@kvack.org> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/paging_tmpl.h | 3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index b24bc7c..3c40a59 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -300,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, hpa_t shadow_addr; int level; u64 *shadow_ent; - u64 *prev_shadow_ent = NULL; if (!is_present_pte(walker->pte)) return NULL; @@ -326,7 +325,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (level == PT_PAGE_TABLE_LEVEL) break; shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; - prev_shadow_ent = shadow_ent; continue; } @@ -355,7 +353,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; *shadow_ent = shadow_pte; - prev_shadow_ent = shadow_ent; } FNAME(set_pte)(vcpu, walker->pte, shadow_ent, -- 1.5.3.7 --
While the page table walker correctly generates a guest page fault if a guest tries to execute a non-executable page, the shadow code does not mark it non-executable. This means that if a guest accesses an nx page first with a read access, then subsequent code fetch accesses will succeed. Fix by setting the nx bit on shadow ptes. Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/paging_tmpl.h | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 7688cbf..59ba752 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -255,6 +255,8 @@ static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte, spte |= gpte & PT64_NX_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; + if (!(pte_access & ACC_EXEC_MASK)) + spte |= PT64_NX_MASK; page = gfn_to_page(vcpu->kvm, gfn); -- 1.5.3.7 --
Unused.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/paging_tmpl.h | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index ca8d515..2e5a80a 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -238,8 +238,7 @@ err:
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, struct guest_walker *walker,
- gfn_t gfn)
+ int *ptwrite, gfn_t gfn)
{
u64 spte;
int was_rmapped = is_rmap_pte(*shadow_pte);
@@ -337,7 +336,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
FNAME(set_pte)(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, NULL, gpte_to_gfn(gpte));
+ gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte));
}
/*
@@ -402,7 +401,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
FNAME(set_pte)(vcpu, shadow_ent, access, walker->pte_access & access,
user_fault, write_fault, walker->pte & PT_DIRTY_MASK,
- ptwrite, walker, walker->gfn);
+ ptwrite, walker->gfn);
return shadow_ent;
}
--
1.5.3.7
--
As set_pte() no longer references either a gpte or the guest walker, we can
move it out of paging mode dependent code (which compiles twice and is
generally nasty).
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 83 ++++++++++++++++++++++++++++++++++++++++
drivers/kvm/paging_tmpl.h | 93 ++------------------------------------------
2 files changed, 88 insertions(+), 88 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index cace1e4..a91e05b 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -879,6 +879,89 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
}
+static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
+ unsigned pt_access, unsigned pte_access,
+ int user_fault, int write_fault, int dirty,
+ int *ptwrite, gfn_t gfn)
+{
+ u64 spte;
+ int was_rmapped = is_rmap_pte(*shadow_pte);
+ struct page *page;
+
+ pgprintk("%s: spte %llx gpte %llx access %x write_fault %d"
+ " user_fault %d gfn %lx\n",
+ __FUNCTION__, *shadow_pte, (u64)gpte, pt_access,
+ write_fault, user_fault, gfn);
+
+ /*
+ * We don't set the accessed bit, since we sometimes want to see
+ * whether the guest actually used the pte (in order to detect
+ * demand paging).
+ */
+ spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
+ if (!dirty)
+ pte_access &= ~ACC_WRITE_MASK;
+ if (!(pte_access & ACC_EXEC_MASK))
+ spte |= PT64_NX_MASK;
+
+ page = gfn_to_page(vcpu->kvm, gfn);
+
+ spte |= PT_PRESENT_MASK;
+ if (pte_access & ACC_USER_MASK)
+ spte |= PT_USER_MASK;
+
+ if (is_error_page(page)) {
+ set_shadow_pte(shadow_pte,
+ shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
+ kvm_release_page_clean(page);
+ return;
+ }
+
+ spte |= page_to_phys(page);
+
+ if ((pte_access & ACC_WRITE_MASK)
+ || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
+ struct kvm_mmu_page *shadow;
+
+ spte |= PT_WRITABLE_MASK;
+ if ...In addition to removing some duplicated code, this also handles the unlikely
case of real-mode code updating a guest page table. This can happen when
one vcpu (in real mode) touches a second vcpu's (in protected mode) page
tables, or if a vcpu switches to real mode, touches page tables, and switches
back.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 41 ++++++++++-------------------------------
1 files changed, 10 insertions(+), 31 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index b4dd726..ba71e8d 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -966,40 +966,23 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, struct page *page)
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int level = PT32E_ROOT_LEVEL;
hpa_t table_addr = vcpu->mmu.root_hpa;
+ int pt_write = 0;
for (; ; level--) {
u32 index = PT64_INDEX(v, level);
u64 *table;
- u64 pte;
ASSERT(VALID_PAGE(table_addr));
table = __va(table_addr);
if (level == 1) {
- int was_rmapped;
-
- pte = table[index];
- was_rmapped = is_rmap_pte(pte);
- if (is_shadow_present_pte(pte) && is_writeble_pte(pte)) {
- kvm_release_page_clean(page);
- return 0;
- }
- mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
- page_header_update_slot(vcpu->kvm, table,
- v >> PAGE_SHIFT);
- table[index] = page_to_phys(page)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | PT_USER_MASK;
- if (!was_rmapped)
- rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
- else
- kvm_release_page_clean(page);
-
- return 0;
+ mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write, gfn);
+ return pt_write || is_io_pte(table[index]);
}
if (table[index] == shadow_trap_nonpresent_pte) {
@@ -1013,7 +996,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, struct page *page)
...From: Joerg Roedel <joerg.roedel@amd.com> This patch fixes a compile error of the LAPIC code with APIC debugging enabled. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> Signed-off-by: Markus Rechberger <markus.rechberger@amd.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/lapic.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 5efa6c0..466c37f 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -829,7 +829,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) /* with FSB delivery interrupt, we can restart APIC functionality */ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " - "0x%lx.\n", apic->apic_base, apic->base_address); + "0x%lx.\n", apic->vcpu->apic_base, apic->base_address); } -- 1.5.3.7 --
From: Marcelo Tosatti <marcelo@kvack.org>
There is a race where VCPU0 is shadowing a pagetable entry while VCPU1
is updating it, which results in a stale shadow copy.
Fix that by comparing the contents of the cached guest pte with the
current guest pte after write-protecting the guest pagetable.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 12 ++++++++----
drivers/kvm/paging_tmpl.h | 29 +++++++++++++++++++++--------
2 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index ba71e8d..92ac0d1 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -681,7 +681,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
unsigned level,
int metaphysical,
unsigned access,
- u64 *parent_pte)
+ u64 *parent_pte,
+ bool *new_page)
{
union kvm_mmu_page_role role;
unsigned index;
@@ -720,6 +721,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
vcpu->mmu.prefetch_page(vcpu, sp);
if (!metaphysical)
rmap_write_protect(vcpu->kvm, gfn);
+ if (new_page)
+ *new_page = 1;
return sp;
}
@@ -993,7 +996,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, ACC_ALL, &table[index]);
+ 1, ACC_ALL, &table[index],
+ NULL);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
@@ -1059,7 +1063,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
+ PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->mmu.root_hpa = root;
@@ -1080,7 +1084,7 @@ static void ...From: Joerg Roedel <joerg.roedel@amd.com>
With this patch KVM on SVM will exit to userspace if the guest writes to CR8
and the in-kernel APIC is disabled.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Markus Rechberger <markus.rechberger@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/svm.c | 11 ++++++++++-
1 files changed, 10 insertions(+), 1 deletions(-)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 677b525..9f8564a 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1048,6 +1048,15 @@ static int emulate_on_interception(struct vcpu_svm *svm,
return 1;
}
+static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
+ if (irqchip_in_kernel(svm->vcpu.kvm))
+ return 1;
+ kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+ return 0;
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1202,7 +1211,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_CR0] = emulate_on_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
- [SVM_EXIT_WRITE_CR8] = emulate_on_interception,
+ [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
[SVM_EXIT_READ_DR0] = emulate_on_interception,
[SVM_EXIT_READ_DR1] = emulate_on_interception,
[SVM_EXIT_READ_DR2] = emulate_on_interception,
--
1.5.3.7
--
Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/mmu.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a91e05b..b4dd726 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -888,9 +888,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, int was_rmapped = is_rmap_pte(*shadow_pte); struct page *page; - pgprintk("%s: spte %llx gpte %llx access %x write_fault %d" + pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", - __FUNCTION__, *shadow_pte, (u64)gpte, pt_access, + __FUNCTION__, *shadow_pte, pt_access, write_fault, user_fault, gfn); /* -- 1.5.3.7 --
We already set it according to cumulative access permissions. Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/paging_tmpl.h | 1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 211fef8..179ce9e 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -258,7 +258,6 @@ static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte, * demand paging). */ spte = PT_PRESENT_MASK | PT_DIRTY_MASK; - spte |= gpte & PT64_NX_MASK; if (!dirty) pte_access &= ~ACC_WRITE_MASK; if (!(pte_access & ACC_EXEC_MASK)) -- 1.5.3.7 --
When we emulate a guest pte write, we fail to apply the correct inherited
permissions from the parent ptes. Now that we store inherited permissions
in the shadow page, we can use that to update the pte permissions correctly.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/kvm.h | 4 ++--
drivers/kvm/mmu.c | 4 ++--
drivers/kvm/paging_tmpl.h | 4 +++-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0d3555b..ceefb44 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -55,7 +55,7 @@ struct kvm_pte_chain {
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
- * bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
+ * bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
unsigned word;
@@ -65,7 +65,7 @@ union kvm_mmu_page_role {
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
- unsigned hugepage_access : 3;
+ unsigned access : 3;
};
};
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index f8a2137..cace1e4 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -680,7 +680,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gva_t gaddr,
unsigned level,
int metaphysical,
- unsigned hugepage_access,
+ unsigned access,
u64 *parent_pte)
{
union kvm_mmu_page_role role;
@@ -694,7 +694,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role.glevels = vcpu->mmu.root_level;
role.level = level;
role.metaphysical = metaphysical;
- role.hugepage_access = hugepage_access;
+ role.access = access;
if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + ...Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/paging_tmpl.h | 20 +++++++++++++-------
1 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 59ba752..1fc4f9b 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -102,6 +102,18 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
return (ret != orig_pte);
}
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
+{
+ unsigned access;
+
+ access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+#if PTTYPE == 64
+ if (is_nx(vcpu))
+ access &= ~(gpte >> PT64_NX_SHIFT);
+#endif
+ return access;
+}
+
/*
* Fetch a guest pte for a guest virtual address
*/
@@ -166,13 +178,7 @@ walk:
pte |= PT_ACCESSED_MASK;
}
- pte_access = pte & (PT_WRITABLE_MASK | PT_USER_MASK);
- pte_access |= ACC_EXEC_MASK;
-#if PTTYPE == 64
- if (is_nx(vcpu))
- pte_access &= ~(pte >> PT64_NX_SHIFT);
-#endif
- pte_access &= pt_access;
+ pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = gpte_to_gfn(pte);
--
1.5.3.7
--
The nx bit is awkwardly placed in the 63rd bit position; furthermore it
has a reversed meaning compared to the other bits, which means we can't use
a bitwise and to calculate compounded access masks.
So, we simplify things by creating a new 3-bit exec/write/user access word,
and doing all calculations in that.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/mmu.c | 14 +++++++---
drivers/kvm/paging_tmpl.h | 58 +++++++++++++++++++++++++-------------------
2 files changed, 43 insertions(+), 29 deletions(-)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 62a7415..f8a2137 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -85,7 +85,8 @@ static int dbg = 1;
#define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8)
-#define PT64_NX_MASK (1ULL << 63)
+#define PT64_NX_SHIFT 63
+#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
#define PT_PAT_SHIFT 7
#define PT_DIR_PAT_SHIFT 12
@@ -153,6 +154,11 @@ static int dbg = 1;
#define RMAP_EXT 4
+#define ACC_EXEC_MASK 1
+#define ACC_WRITE_MASK PT_WRITABLE_MASK
+#define ACC_USER_MASK PT_USER_MASK
+#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+
struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
@@ -921,7 +927,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, struct page *page)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
- 1, 3, &table[index]);
+ 1, ACC_ALL, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
kvm_release_page_clean(page);
@@ -988,7 +994,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, 0, NULL);
+ PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->mmu.root_hpa = ...From: Marcelo Tosatti <marcelo@kvack.org>
In preparation for multi-threaded guest pte walking, use cmpxchg()
when updating guest pte's. This guarantees that the assignment of the
dirty bit can't be lost if two CPU's are faulting the same address
simultaneously.
[avi: fix kunmap_atomic() parameters]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/paging_tmpl.h | 36 ++++++++++++++++++++++++++++++++++--
1 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 3c40a59..8086f82 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -34,7 +34,9 @@
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
+ #define CMPXCHG cmpxchg
#else
+ #define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 2
#endif
#elif PTTYPE == 32
@@ -48,6 +50,7 @@
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
+ #define CMPXCHG cmpxchg
#else
#error Invalid PTTYPE value
#endif
@@ -78,6 +81,26 @@ static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}
+static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
+ gfn_t table_gfn, unsigned index,
+ pt_element_t orig_pte, pt_element_t new_pte)
+{
+ pt_element_t ret;
+ pt_element_t *table;
+ struct page *page;
+
+ page = gfn_to_page(kvm, table_gfn);
+ table = kmap_atomic(page, KM_USER0);
+
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+
+ kunmap_atomic(table, KM_USER0);
+
+ kvm_release_page_dirty(page);
+
+ return (ret != orig_pte);
+}
+
/*
* Fetch a guest pte for a guest virtual address
*/
@@ -91,6 +114,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
gpa_t pte_gpa;
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
+walk:
walker->level = vcpu->mmu.root_level;
pte = ...From: npiggin@suse.de <npiggin@suse.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: kvm-devel@lists.sourceforge.net Cc: avi@qumranet.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Avi Kivity <avi@qumranet.com> --- drivers/kvm/kvm_main.c | 42 +++++++++++++++--------------------------- 1 files changed, 15 insertions(+), 27 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 3d1023c..cfb27ef 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -662,30 +662,24 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); -static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) +static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vma->vm_file->private_data; - unsigned long pgoff; struct page *page; - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (pgoff == 0) + if (vmf->pgoff == 0) page = virt_to_page(vcpu->run); - else if (pgoff == KVM_PIO_PAGE_OFFSET) + else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->pio_data); else - return NOPAGE_SIGBUS; + return VM_FAULT_SIGBUS; get_page(page); - if (type != NULL) - *type = VM_FAULT_MINOR; - - return page; + vmf->page = page; + return 0; } static struct vm_operations_struct kvm_vcpu_vm_ops = { - .nopage = kvm_vcpu_nopage, + .fault = kvm_vcpu_fault, }; static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) @@ -976,31 +970,25 @@ out: return r; } -static struct page *kvm_vm_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) +static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct kvm *kvm = vma->vm_file->private_data; - unsigned long pgoff; struct page *page; - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (!kvm_is_visible_gfn(kvm, ...
Unify the special instruction switch with the regular instruction switch,
and the two byte special instruction switch with the regular two byte
instruction switch. That makes it much easier to find an instruction or
the place an instruction needs to be added in.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/x86_emulate.c | 342 ++++++++++++++++++++++-----------------------
1 files changed, 168 insertions(+), 174 deletions(-)
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 499d1ad..3e3eba7 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1294,6 +1294,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
}
c->dst.orig_val = c->dst.val;
+special_insn:
+
if (c->twobyte)
goto twobyte_insn;
@@ -1378,6 +1380,52 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
goto cannot_emulate;
c->dst.val = (s32) c->src.val;
break;
+ case 0x6a: /* push imm8 */
+ c->src.val = 0L;
+ c->src.val = insn_fetch(s8, 1, c->eip);
+ emulate_push(ctxt);
+ break;
+ case 0x6c: /* insb */
+ case 0x6d: /* insw/insd */
+ if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ 1,
+ (c->d & ByteOp) ? 1 : c->op_bytes,
+ c->rep_prefix ?
+ address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+ (ctxt->eflags & EFLG_DF),
+ register_address(ctxt->es_base,
+ c->regs[VCPU_REGS_RDI]),
+ c->rep_prefix,
+ c->regs[VCPU_REGS_RDX]) == 0) {
+ c->eip = saved_eip;
+ return -1;
+ }
+ return 0;
+ case 0x6e: /* outsb */
+ case 0x6f: /* outsw/outsd */
+ if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ 0,
+ (c->d & ByteOp) ? 1 : c->op_bytes,
+ c->rep_prefix ?
+ address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+ (ctxt->eflags & EFLG_DF),
+ register_address(c->override_base ?
+ *c->override_base :
+ ctxt->ds_base,
+ c->regs[VCPU_REGS_RSI]),
+ c->rep_prefix,
+ c->regs[VCPU_REGS_RDX]) == 0) {
+ c->eip = ...From: Carlo Marcelo Arenas Belon <carenas@sajinet.com.pe>
This patch removes the KVM specific defines for MSR_EFER that were being used
in the svm support file and migrates all references to use instead the ones
from the kernel headers that are used everywhere else and that have the same
values.
Signed-off-by: Carlo Marcelo Arenas Belon <carenas@sajinet.com.pe>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
drivers/kvm/svm.c | 17 +++++++----------
1 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 928fb35..04e6b39 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -42,9 +42,6 @@ MODULE_LICENSE("GPL");
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
-#define KVM_EFER_LMA (1 << 10)
-#define KVM_EFER_LME (1 << 8)
-
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_DEATURE_SVML (1 << 2)
@@ -184,8 +181,8 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (!(efer & KVM_EFER_LMA))
- efer &= ~KVM_EFER_LME;
+ if (!(efer & EFER_LMA))
+ efer &= ~EFER_LME;
to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
vcpu->shadow_efer = efer;
@@ -777,15 +774,15 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_svm *svm = to_svm(vcpu);
#ifdef CONFIG_X86_64
- if (vcpu->shadow_efer & KVM_EFER_LME) {
+ if (vcpu->shadow_efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
- vcpu->shadow_efer |= KVM_EFER_LMA;
- svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
+ vcpu->shadow_efer |= EFER_LMA;
+ svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
}
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
- vcpu->shadow_efer &= ~KVM_EFER_LMA;
- svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
+ vcpu->shadow_efer &= ~EFER_LMA;
+ svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
}
}
#endif
--
1.5.3.7
--
