These thirty-five patches comprise the second batch of the updates I have queued for 2.6.26. Please review. arch/x86/Kconfig | 8 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/crash.c | 3 +- arch/x86/kernel/kvm.c | 247 ++++++++++++++ arch/x86/kernel/kvmclock.c | 27 ++ arch/x86/kernel/reboot.c | 13 +- arch/x86/kernel/setup_32.c | 1 + arch/x86/kernel/setup_64.c | 2 + arch/x86/kvm/Kconfig | 2 +- arch/x86/kvm/Makefile | 3 +- arch/x86/kvm/i8254.c | 600 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/i8254.h | 62 ++++ arch/x86/kvm/irq.c | 3 + arch/x86/kvm/lapic.c | 8 +- arch/x86/kvm/mmu.c | 426 ++++++++++++++++++++++--- arch/x86/kvm/paging_tmpl.h | 46 ++- arch/x86/kvm/segment_descriptor.h | 29 -- arch/x86/kvm/svm.c | 41 ++- arch/x86/kvm/svm.h | 3 + arch/x86/kvm/tss.h | 59 ++++ arch/x86/kvm/vmx.c | 70 +++- arch/x86/kvm/x86.c | 647 +++++++++++++++++++++++++++++++++---- arch/x86/kvm/x86_emulate.c | 4 +- include/asm-x86/kvm.h | 21 ++ include/asm-x86/kvm_host.h | 54 +++- include/asm-x86/kvm_para.h | 32 ++- include/asm-x86/reboot.h | 2 + include/linux/kvm.h | 7 + include/linux/kvm_host.h | 21 +- include/linux/kvm_para.h | 11 +- virt/kvm/kvm_main.c | 78 ++++- 31 files changed, 2302 insertions(+), 229 deletions(-) --
From: Amit Shah <amit.shah@qumranet.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 2 ++
include/asm-x86/kvm_host.h | 1 +
2 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a063f44..15bba5d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "irq_window", VCPU_STAT(irq_window_exits) },
{ "halt_exits", VCPU_STAT(halt_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -2405,6 +2406,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
}
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu);
+ ++vcpu->stat.hypercalls;
return 0;
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 0c429c8..4f5a71a 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -327,6 +327,7 @@ struct kvm_vcpu_stat {
u32 fpu_reload;
u32 insn_emulation;
u32 insn_emulation_fail;
+ u32 hypercalls;
};
struct descriptor_table {
--
1.5.4.5
--
From: Harvey Harrison <harvey.harrison@gmail.com>
Fixes sparse warning as well.
arch/x86/kvm/svm.c:69:15: warning: symbol 'iopm_base' was not declared. Should it be static?
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/svm.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7d73e93..ff6e5c8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -66,7 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-unsigned long iopm_base;
+static unsigned long iopm_base;
struct kvm_ldttss_desc {
u16 limit0;
--
1.5.4.5
--
From: Harvey Harrison <harvey.harrison@gmail.com>
Nesting __emulate_2op_nobyte inside__emulate_2op produces many shadowed
variable warnings on the internal variable _tmp used by both macros.
Change the outer macro to use __tmp.
Avoids a sparse warning like the following at every call site of __emulate_2op
arch/x86/kvm/x86_emulate.c:1091:3: warning: symbol '_tmp' shadows an earlier one
arch/x86/kvm/x86_emulate.c:1091:3: originally declared here
[18 more warnings suppressed]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86_emulate.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index cacdcf5..f59ed93 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -371,7 +371,7 @@ static u16 group2_table[] = {
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
- unsigned long _tmp; \
+ unsigned long __tmp; \
switch ((_dst).bytes) { \
case 1: \
__asm__ __volatile__ ( \
@@ -379,7 +379,7 @@ static u16 group2_table[] = {
_op"b %"_bx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
+ "=&r" (__tmp) \
: _by ((_src).val), "i" (EFLAGS_MASK)); \
break; \
default: \
--
1.5.4.5
--
The x86 desc_struct unification allows us to remove segment_descriptor.h.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/segment_descriptor.h | 29 -----------------------------
arch/x86/kvm/vmx.c | 3 +--
arch/x86/kvm/x86.c | 15 +++++++--------
3 files changed, 8 insertions(+), 39 deletions(-)
delete mode 100644 arch/x86/kvm/segment_descriptor.h
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
deleted file mode 100644
index 56fc4c8..0000000
--- a/arch/x86/kvm/segment_descriptor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __SEGMENT_DESCRIPTOR_H
-#define __SEGMENT_DESCRIPTOR_H
-
-struct segment_descriptor {
- u16 limit_low;
- u16 base_low;
- u8 base_mid;
- u8 type : 4;
- u8 system : 1;
- u8 dpl : 2;
- u8 present : 1;
- u8 limit_high : 4;
- u8 avl : 1;
- u8 long_mode : 1;
- u8 default_op : 1;
- u8 granularity : 1;
- u8 base_high;
-} __attribute__((packed));
-
-#ifdef CONFIG_X86_64
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct segment_descriptor_64 {
- struct segment_descriptor s;
- u32 base_higher;
- u32 pad_zero;
-};
-
-#endif
-#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2d5ccec..f46ad03 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -17,7 +17,6 @@
#include "irq.h"
#include "vmx.h"
-#include "segment_descriptor.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -388,7 +387,7 @@ static void reload_tss(void)
* VT restores TR but not its size. Useless.
*/
struct descriptor_table gdt;
- struct segment_descriptor *descs;
+ struct desc_struct *descs;
get_gdt(&gdt);
descs = (void *)gdt.base;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b7c32f6..a063f44 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,7 +15,6 @@
*/
#include <linux/kvm_host.h>
-#include "segment_descriptor.h"
#include "irq.h"
#include "mmu.h"
@@ -29,6 +28,7 @@
...From: Alexander Graf <alex@csgraf.de>
Darwin relies on this and ceases to work without.
Signed-off-by: Alexander Graf <alex@csgraf.de>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf6261e..0dd038e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -426,6 +426,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+ MSR_IA32_PERF_STATUS,
};
static unsigned num_msrs_to_save;
@@ -653,7 +654,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MC0_MISC+12:
case MSR_IA32_MC0_MISC+16:
case MSR_IA32_UCODE_REV:
- case MSR_IA32_PERF_STATUS:
case MSR_IA32_EBL_CR_POWERON:
/* MTRR registers */
case 0xfe:
@@ -669,6 +669,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MISC_ENABLE:
data = vcpu->arch.ia32_misc_enable_msr;
break;
+ case MSR_IA32_PERF_STATUS:
+ /* TSC increment by tick */
+ data = 1000ULL;
+ /* CPU multiplier */
+ data |= (((uint64_t)4ULL) << 40);
+ break;
case MSR_EFER:
data = vcpu->arch.shadow_efer;
break;
--
1.5.4.5
--
From: Harvey Harrison <harvey.harrison@gmail.com>
In two case statements, use the ever popular 'i' instead of index:
arch/x86/kvm/x86.c:1063:7: warning: symbol 'index' shadows an earlier one
arch/x86/kvm/x86.c:1000:9: originally declared here
arch/x86/kvm/x86.c:1079:7: warning: symbol 'index' shadows an earlier one
arch/x86/kvm/x86.c:1000:9: originally declared here
Make it static.
arch/x86/kvm/x86.c:1945:24: warning: symbol 'emulate_ops' was not declared. Should it be static?
Drop the return statements.
arch/x86/kvm/x86.c:2878:2: warning: returning void-valued expression
arch/x86/kvm/x86.c:2944:2: warning: returning void-valued expression
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 26 +++++++++++++-------------
1 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15bba5d..cf6261e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1083,32 +1083,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
/* function 4 and 0xb have additional index. */
case 4: {
- int index, cache_type;
+ int i, cache_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
- for (index = 1; *nent < maxnent; ++index) {
- cache_type = entry[index - 1].eax & 0x1f;
+ for (i = 1; *nent < maxnent; ++i) {
+ cache_type = entry[i - 1].eax & 0x1f;
if (!cache_type)
break;
- do_cpuid_1_ent(&entry[index], function, index);
- entry[index].flags |=
+ do_cpuid_1_ent(&entry[i], function, i);
+ entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
}
case 0xb: {
- int index, level_type;
+ int i, level_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
- for (index = 1; *nent < maxnent; ++index) {
- level_type = entry[index - ...From: Xiantao Zhang <xiantao.zhang@intel.com>
Since the size of kvm_regs is too big to allocate from kernel stack on ia64,
use kzalloc to allocate it.
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
virt/kvm/kvm_main.c | 33 ++++++++++++++++++++++-----------
1 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0dabf58..30bf832 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -849,28 +849,39 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
break;
case KVM_GET_REGS: {
- struct kvm_regs kvm_regs;
+ struct kvm_regs *kvm_regs;
- memset(&kvm_regs, 0, sizeof kvm_regs);
- r = kvm_arch_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
- if (r)
+ r = -ENOMEM;
+ kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+ if (!kvm_regs)
goto out;
+ r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
+ if (r)
+ goto out_free1;
r = -EFAULT;
- if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
- goto out;
+ if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
+ goto out_free1;
r = 0;
+out_free1:
+ kfree(kvm_regs);
break;
}
case KVM_SET_REGS: {
- struct kvm_regs kvm_regs;
+ struct kvm_regs *kvm_regs;
- r = -EFAULT;
- if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
+ r = -ENOMEM;
+ kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
+ if (!kvm_regs)
goto out;
- r = kvm_arch_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
+ r = -EFAULT;
+ if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
+ goto out_free2;
+ r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
if (r)
- goto out;
+ goto out_free2;
r = 0;
+out_free2:
+ kfree(kvm_regs);
break;
}
case KVM_GET_SREGS: {
--
1.5.4.5
--
From: Marcelo Tosatti <mtosatti@redhat.com>
Mark zapped root pagetables as invalid and ignore such pages during lookup.
This is a problem with the cr3-target feature, where a zapped root table fools
the faulting code into creating a read-only mapping. The result is a lockup
if the instruction can't be emulated.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 12 ++++++++++--
arch/x86/kvm/x86.c | 12 ++++++++++++
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm_host.h | 2 ++
virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++
5 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f7541fe..103d008 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -667,7 +667,8 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.metaphysical
+ && !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__FUNCTION__, sp->role.word);
return sp;
@@ -792,8 +793,11 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
if (!sp->root_count) {
hlist_del(&sp->hash_link);
kvm_mmu_free_page(kvm, sp);
- } else
+ } else {
list_move(&sp->link, &kvm->arch.active_mmu_pages);
+ sp->role.invalid = 1;
+ kvm_reload_remote_mmus(kvm);
+ }
kvm_mmu_reset_last_pte_updated(kvm);
}
@@ -1073,6 +1077,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
sp = page_header(root);
--sp->root_count;
+ if (!sp->root_count && sp->role.invalid)
+ kvm_mmu_zap_page(vcpu->kvm, sp);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
spin_unlock(&vcpu->kvm->mmu_lock);
return;
@@ -1085,6 ...Most Intel hosts have a stable tsc, and playing with the offset only
reduces accuracy. By limiting tsc offset adjustment only to forward updates,
we effectively disable tsc offset adjustment on these hosts.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/vmx.c | 9 ++++++---
1 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7ef710a..fb0389d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -519,7 +519,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(vmx->vmcs);
- u64 tsc_this, delta;
+ u64 tsc_this, delta, new_offset;
if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
@@ -559,8 +559,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* Make sure the time stamp counter is monotonous.
*/
rdtscll(tsc_this);
- delta = vcpu->arch.host_tsc - tsc_this;
- vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
+ if (tsc_this < vcpu->arch.host_tsc) {
+ delta = vcpu->arch.host_tsc - tsc_this;
+ new_offset = vmcs_read64(TSC_OFFSET) + delta;
+ vmcs_write64(TSC_OFFSET, new_offset);
+ }
}
}
--
1.5.4.5
--
From: Joerg Roedel <joerg.roedel@amd.com>
In the current inject_page_fault path KVM only checks if there is another PF
pending and injects a DF then. But it has to check for a pending DF too to
detect a shutdown condition in the VCPU. If this is not detected the VCPU goes
to a PF -> DF -> PF loop when it should triple fault. This patch detects this
condition and handles it with an KVM_SHUTDOWN exit to userspace.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 20 +++++++++++++++-----
include/linux/kvm_host.h | 1 +
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dbcff38..491eda3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
+ if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception.nr == PF_VECTOR) {
+ printk(KERN_DEBUG "kvm: inject_page_fault:"
+ " double fault 0x%lx\n", addr);
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ } else if (vcpu->arch.exception.nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ }
return;
}
vcpu->arch.cr2 = addr;
@@ -2676,6 +2681,11 @@ again:
r = 0;
goto out;
}
+ if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
+ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ r = 0;
+ goto out;
+ }
}
kvm_inject_pending_timer_irqs(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9750bb3..958e003 100644
--- ...Names like 'set_cr3()' look dangerously close to affecting the host.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/vmx.c | 14 ++++++------
arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++----------------------
include/asm-x86/kvm_host.h | 12 +++++-----
3 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f46ad03..5034503 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1683,7 +1683,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
- set_cr8(&vmx->vcpu, 0);
+ kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vmx->vcpu.vcpu_id == 0)
msr |= MSR_IA32_APICBASE_BSP;
@@ -2026,22 +2026,22 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
switch (cr) {
case 0:
vcpu_load_rsp_rip(vcpu);
- set_cr0(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 3:
vcpu_load_rsp_rip(vcpu);
- set_cr3(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 4:
vcpu_load_rsp_rip(vcpu);
- set_cr4(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- set_cr8(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
return 1;
@@ -2067,14 +2067,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->arch.regs[reg] = get_cr8(vcpu);
+ vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
return 1;
...From: Sheng Yang <sheng.yang@intel.com>
Separate the reset part and prepare for reset support.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/i8254.c | 30 +++++++++++++++++++-----------
arch/x86/kvm/i8254.h | 1 +
2 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 7776f50..06a241a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -476,12 +476,28 @@ static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
return (addr == KVM_SPEAKER_BASE_ADDRESS);
}
-struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+void kvm_pit_reset(struct kvm_pit *pit)
{
int i;
+ struct kvm_kpit_channel_state *c;
+
+ mutex_lock(&pit->pit_state.lock);
+ for (i = 0; i < 3; i++) {
+ c = &pit->pit_state.channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(pit->kvm, i, 0);
+ }
+ mutex_unlock(&pit->pit_state.lock);
+
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.inject_pending = 1;
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
- struct kvm_kpit_channel_state *c;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
if (!pit)
@@ -510,17 +526,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->pit = pit;
hrtimer_init(&pit_state->pit_timer.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- atomic_set(&pit_state->pit_timer.pending, 0);
- for (i = 0; i < 3; i++) {
- c = &pit_state->channels[i];
- c->mode = 0xff;
- c->gate = (i != 2);
- pit_load_count(kvm, i, 0);
- }
-
mutex_unlock(&pit->pit_state.lock);
- pit->pit_state.inject_pending = 1;
+ kvm_pit_reset(pit);
return pit;
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 586bbf0..e63ef38 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -57,5 +57,6 @@ void kvm_pit_timer_intr_post(struct ...From: Harvey Harrison <harvey.harrison@gmail.com> __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/kvm/lapic.c | 8 ++++---- arch/x86/kvm/mmu.c | 35 +++++++++++++++++------------------ arch/x86/kvm/paging_tmpl.h | 14 +++++++------- arch/x86/kvm/svm.c | 14 +++++++------- arch/x86/kvm/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 12 ++++++------ 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 68a6b15..31280df 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic) apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " - "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, + "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), apic_get_reg(apic, APIC_TMICT), apic->timer.period, @@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this, /* too common printing */ if (offset != APIC_EOI) apic_debug("%s: offset 0x%x with length 0x%x, and value is " - "0x%x\n", __FUNCTION__, offset, len, val); + "0x%x\n", __func__, offset, len, val); offset &= 0xff0; @@ -869,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) struct kvm_lapic *apic; int i; - apic_debug("%s\n", __FUNCTION__); + apic_debug("%s\n", __func__); ASSERT(vcpu); apic = vcpu->arch.apic; @@ -907,7 +907,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_update_ppr(apic); apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, + "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), vcpu->arch.apic_base, ...
From: Marcelo Tosatti <mtosatti@redhat.com> Add basic KVM paravirt support. Avoid vm-exits on IO delays. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/Kconfig | 8 ++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kvm.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_32.c | 1 + arch/x86/kernel/setup_64.c | 2 + include/linux/kvm_para.h | 6 +++++ 6 files changed, 70 insertions(+), 0 deletions(-) create mode 100644 arch/x86/kernel/kvm.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e59ea05..75d2700 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -381,6 +381,14 @@ config KVM_CLOCK provides the guest with timing infrastructure such as time of day, and system time +config KVM_GUEST + bool "KVM Guest support" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + This option enables various optimizations for running under the KVM + hypervisor. + source "arch/x86/lguest/Kconfig" config PARAVIRT diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a3379a3..1cc9d42 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c new file mode 100644 index 0000000..a8e36da --- /dev/null +++ b/arch/x86/kernel/kvm.c @@ -0,0 +1,52 @@ +/* + * KVM paravirt_ops implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) ...
From: Marcelo Tosatti <marcelo@kvack.org>
Create large pages mappings if the guest PTE's are marked as such and
the underlying memory is hugetlbfs backed. If the largepage contains
write-protected pages, a large pte is not used.
Gives a consistent 2% improvement for data copies on ram mounted
filesystem, without NPT/EPT.
Anthony measures a 4% improvement on 4-way kernbench, with NPT.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 222 +++++++++++++++++++++++++++++++++++++++-----
arch/x86/kvm/paging_tmpl.h | 32 +++++-
arch/x86/kvm/x86.c | 1 +
include/asm-x86/kvm_host.h | 9 ++
include/linux/kvm_host.h | 5 +
virt/kvm/kvm_main.c | 22 ++++-
6 files changed, 259 insertions(+), 32 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 103d008..1932a3a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/swap.h>
+#include <linux/hugetlb.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
@@ -211,6 +212,11 @@ static int is_shadow_present_pte(u64 pte)
&& pte != shadow_notrap_nonpresent_pte;
}
+static int is_large_pte(u64 pte)
+{
+ return pte & PT_PAGE_SIZE_MASK;
+}
+
static int is_writeble_pte(unsigned long pte)
{
return pte & PT_WRITABLE_MASK;
@@ -350,16 +356,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
}
/*
+ * Return the pointer to the largepage write count for a given
+ * gfn, handling slots that are not large page aligned.
+ */
+static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
+{
+ unsigned long idx;
+
+ idx = (gfn / KVM_PAGES_PER_HPAGE) -
+ (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+ return &slot->lpage_info[idx].write_count;
+}
+
+static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+{
+ int *write_count;
+
+ write_count = slot_largepage_idx(gfn, ...From: Marcelo Tosatti <mtosatti@redhat.com>
Add basic KVM paravirt support. Avoid vm-exits on IO delays.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 1 +
include/asm-x86/kvm_para.h | 3 ++-
include/linux/kvm.h | 1 +
3 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 621a8e3..1b9e695 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -820,6 +820,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
+ case KVM_CAP_NOP_IO_DELAY:
r = 1;
break;
case KVM_CAP_VAPIC:
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 5ab7d3d..ed5df3a 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -10,7 +10,8 @@
* paravirtualization, the appropriate feature bit should be checked.
*/
#define KVM_CPUID_FEATURES 0x40000001
-#define KVM_FEATURE_CLOCKSOURCE 0
+#define KVM_FEATURE_CLOCKSOURCE 0
+#define KVM_FEATURE_NOP_IO_DELAY 1
#define MSR_KVM_WALL_CLOCK 0x11
#define MSR_KVM_SYSTEM_TIME 0x12
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a2f3274..76f0947 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -237,6 +237,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
#define KVM_CAP_PIT 11
+#define KVM_CAP_NOP_IO_DELAY 12
/*
* ioctls for VM fds
--
1.5.4.5
--
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/x86.c | 21 ++++++++++++++-------
include/asm-x86/kvm_host.h | 3 +++
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b9e695..03ba402 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1840,22 +1840,29 @@ mmio:
return X86EMUL_UNHANDLEABLE;
}
-static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
- const void *val, int bytes)
+int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const void *val, int bytes)
{
int ret;
- down_read(&vcpu->kvm->slots_lock);
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
- if (ret < 0) {
- up_read(&vcpu->kvm->slots_lock);
+ if (ret < 0)
return 0;
- }
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
- up_read(&vcpu->kvm->slots_lock);
return 1;
}
+static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const void *val, int bytes)
+{
+ int ret;
+
+ down_read(&vcpu->kvm->slots_lock);
+ ret =__emulator_write_phys(vcpu, gpa, val, bytes);
+ up_read(&vcpu->kvm->slots_lock);
+ return ret;
+}
+
static int emulator_write_emulated_onepage(unsigned long addr,
const void *val,
unsigned int bytes,
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 12932bb..c8e51f8 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -431,6 +431,9 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
+int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const void *val, int bytes);
+
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
--
1.5.4.5
--
From: Glauber Costa <gcosta@redhat.com>
it will allow external users to call it. It is mainly
useful for routines that will override its machine_ops
field for its own special purposes, but want to call the
normal shutdown routine after they're done
Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kernel/reboot.c | 2 +-
include/asm-x86/reboot.h | 1 +
2 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 708d6f8..1481d85 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -400,7 +400,7 @@ static void native_machine_emergency_restart(void)
}
}
-static void native_machine_shutdown(void)
+void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index ff9b546..c5e8722 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -17,5 +17,6 @@ extern struct machine_ops machine_ops;
void machine_real_restart(unsigned char *code, int length);
void native_machine_crash_shutdown(struct pt_regs *regs);
+void native_machine_shutdown(void);
#endif /* _ASM_REBOOT_H */
--
1.5.4.5
--
From: Marcelo Tosatti <mtosatti@redhat.com>
Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.
Don't report the feature if two dimensional paging is enabled.
[avi:
- guest/host split
- fix 32-bit truncation issues
- adjust to mmu_op]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kernel/kvm.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 137 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a8e36da..1bb6e97 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -25,6 +25,7 @@
#include <linux/kvm_para.h>
#include <linux/cpu.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
/*
* No need for any "IO delay" on KVM
@@ -33,6 +34,122 @@ static void kvm_io_delay(void)
{
}
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+ int r;
+ unsigned long a1, a2;
+
+ do {
+ a1 = __pa(buffer);
+ a2 = 0; /* on i386 __pa() always returns <4G */
+ r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+ buffer += r;
+ len -= r;
+ } while (len);
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+ __u64 pte_phys;
+ struct kvm_mmu_op_write_pte wpte;
+
+#ifdef CONFIG_HIGHPTE
+ struct page *page;
+ unsigned long dst = (unsigned long) dest;
+
+ page = kmap_atomic_to_page(dest);
+ pte_phys = page_to_pfn(page);
+ pte_phys <<= PAGE_SHIFT;
+ pte_phys += (dst & ~(PAGE_MASK));
+#else
+ pte_phys = (unsigned long)__pa(dest);
+#endif
+ wpte.header.op = KVM_MMU_OP_WRITE_PTE;
+ wpte.pte_val = val;
+ wpte.pte_phys = pte_phys;
+
+ kvm_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes. We hook these so that
+ * we can use lazy MMU mode to batch these operations. We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing ...From: Christian Borntraeger <borntraeger@de.ibm.com>
include/linux/kvm.h defines struct kvm_dirty_log to
[...]
union {
void __user *dirty_bitmap; /* one bit per page */
__u64 padding;
};
__user requires compiler.h to compile. Currently, this works on x86
only coincidentally due to other include files. This patch makes
kvm.h compile in all cases.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
include/linux/kvm.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c1b502a..3bd3828 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -8,6 +8,7 @@
*/
#include <asm/types.h>
+#include <linux/compiler.h>
#include <linux/ioctl.h>
#include <asm/kvm.h>
--
1.5.4.5
--
From: Izik Eidus <izike@qumranet.com>
When mmu_set_spte() checks if a page related to spte should be release as
dirty or clean, it check if the shadow pte was writeble, but in case
rmap_write_protect() is called called it is possible for shadow ptes that were
writeble to become readonly and therefor mmu_set_spte will release the pages
as clean.
This patch fix this issue by marking the page as dirty inside
rmap_write_protect().
Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5872b3..dd4b95b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -626,6 +626,14 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
}
spte = rmap_next(kvm, rmapp, spte);
}
+ if (write_protected) {
+ struct page *page;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ SetPageDirty(page);
+ }
+
/* check for huge page mappings */
rmapp = gfn_to_rmap(kvm, gfn, 1);
spte = rmap_next(kvm, rmapp, NULL);
--
1.5.4.5
--
Useful for debugging.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/vmx.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fb0389d..0155931 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -39,6 +39,9 @@ module_param(bypass_guest_pf, bool, 0);
static int enable_vpid = 1;
module_param(enable_vpid, bool, 0);
+static int flexpriority_enabled = 1;
+module_param(flexpriority_enabled, bool, 0);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -200,8 +203,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)
static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
{
- return (vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ return flexpriority_enabled
+ && (vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
}
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
--
1.5.4.5
--
From: Izik Eidus <izike@qumranet.com>
Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/svm.c | 8 ++++++++
arch/x86/kvm/vmx.c | 15 +++++++++++++++
include/asm-x86/kvm_host.h | 1 +
3 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 51741f9..c1c1b97 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -792,6 +792,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->unusable = !var->present;
}
+static int svm_get_cpl(struct kvm_vcpu *vcpu)
+{
+ struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
+
+ return save->cpl;
+}
+
static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1822,6 +1829,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
+ .get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0155931..9b56032 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1395,6 +1395,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->unusable = (ar >> 16) & 1;
}
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment kvm_seg;
+
+ if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
+ return 0;
+
+ if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+ return 3;
+
+ vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
+ return kvm_seg.selector & 3;
+}
+
static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
@@ -2665,6 +2679,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
+ .get_cpl = ...From: Izik Eidus <izike@qumranet.com>
This emulates the x86 hardware task switch mechanism in software, as it is
unsupported by either vmx or svm. It allows operating systems which use it,
like freedos, to run as kvm guests.
Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/svm.c | 15 ++-
arch/x86/kvm/svm.h | 3 +
arch/x86/kvm/tss.h | 59 +++++++
arch/x86/kvm/vmx.c | 15 ++
arch/x86/kvm/x86.c | 408 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm_host.h | 9 +
6 files changed, 506 insertions(+), 3 deletions(-)
create mode 100644 arch/x86/kvm/tss.h
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c1c1b97..ad27346 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1112,9 +1112,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
static int task_switch_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
- pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __func__);
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- return 0;
+ u16 tss_selector;
+
+ tss_selector = (u16)svm->vmcb->control.exit_info_1;
+ if (svm->vmcb->control.exit_info_2 &
+ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
+ return kvm_task_switch(&svm->vcpu, tss_selector,
+ TASK_SWITCH_IRET);
+ if (svm->vmcb->control.exit_info_2 &
+ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+ return kvm_task_switch(&svm->vcpu, tss_selector,
+ TASK_SWITCH_JMP);
+ return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
}
static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
index 5fd5049..1b8afa7 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
#define SVM_EXITINTINFO_VALID_ERR ...Long overdue. Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/kvm/Kconfig | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e7..76c70ab 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -19,7 +19,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && EXPERIMENTAL + depends on HAVE_KVM select PREEMPT_NOTIFIERS select ANON_INODES ---help--- -- 1.5.4.5 --
Encapsulate the pte mask'n'shift in a function.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 17 ++++++++++++-----
1 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dd4b95b..6fc3421 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -240,6 +240,13 @@ static int is_rmap_pte(u64 pte)
return is_shadow_present_pte(pte);
}
+static struct page *spte_to_page(u64 pte)
+{
+ hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+ return pfn_to_page(hfn);
+}
+
static gfn_t pse36_gfn_delta(u32 gpte)
{
int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
@@ -541,7 +548,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
if (!is_rmap_pte(*spte))
return;
sp = page_header(__pa(spte));
- page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ page = spte_to_page(*spte);
mark_page_accessed(page);
if (is_writeble_pte(*spte))
kvm_release_page_dirty(page);
@@ -630,7 +637,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
struct page *page;
spte = rmap_next(kvm, rmapp, NULL);
- page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ page = spte_to_page(*spte);
SetPageDirty(page);
}
@@ -1033,7 +1040,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 spte;
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
- hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
@@ -1051,9 +1057,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
child = page_header(pte & PT64_BASE_ADDR_MASK);
mmu_page_remove_parent_pte(child, shadow_pte);
- } else if (host_pfn != page_to_pfn(page)) {
+ } else if (page != spte_to_page(*shadow_pte)) {
pgprintk("hfn old %lx new %lx\n",
- host_pfn, page_to_pfn(page));
+ ...If we populate a shadow pte due to a fault (and not speculatively due to a
pte write) then we can set the accessed bit on it, as we know it will be
set immediately on the next guest instruction. This saves a read-modify-write
operation.
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 8 +++++---
arch/x86/kvm/paging_tmpl.h | 4 ++--
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 072e942..a5872b3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1020,7 +1020,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int largepage, gfn_t gfn,
- struct page *page)
+ struct page *page, bool speculative)
{
u64 spte;
int was_rmapped = 0;
@@ -1061,6 +1061,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
* demand paging).
*/
spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
+ if (!speculative)
+ pte_access |= PT_ACCESSED_MASK;
if (!dirty)
pte_access &= ~ACC_WRITE_MASK;
if (!(pte_access & ACC_EXEC_MASK))
@@ -1148,13 +1150,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
if (level == 1) {
mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
- 0, write, 1, &pt_write, 0, gfn, page);
+ 0, write, 1, &pt_write, 0, gfn, page, false);
return pt_write;
}
if (largepage && level == 2) {
mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
- 0, write, 1, &pt_write, 1, gfn, page);
+ 0, write, 1, &pt_write, 1, gfn, page, false);
return pt_write;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 57abbd0..e9ae5db 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -266,7 +266,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
...From: Glauber Costa <gcosta@redhat.com>
This patch writes 0 (actually, what really matters is that the
LSB is cleared) to the system time msr before shutting down
the machine for kexec.
Without it, we can have a random memory location being written
when the guest comes back
It overrides the functions shutdown, used in the path of kernel_kexec() (sys.c)
and crash_shutdown, used in the path of crash_kexec() (kexec.c)
Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kernel/kvmclock.c | 27 +++++++++++++++++++++++++++
1 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index b999f5e..ddee040 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,7 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
+#include <asm/reboot.h>
#define KVM_SCALE 22
@@ -143,6 +144,28 @@ static void kvm_setup_secondary_clock(void)
setup_secondary_APIC_clock();
}
+/*
+ * After the clock is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written. So before any
+ * kind of shutdown from our side, we unregister the clock by writting anything
+ * that does not have the 'enable' bit set in the msr
+ */
+#ifdef CONFIG_KEXEC
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_crash_shutdown(regs);
+}
+#endif
+
+static void kvm_shutdown(void)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_shutdown();
+}
+
void __init kvmclock_init(void)
{
if (!kvm_para_available())
@@ -155,6 +178,10 @@ void __init kvmclock_init(void)
pv_time_ops.set_wallclock = kvm_set_wallclock;
...Signed-off-by: Avi Kivity <avi@qumranet.com>
---
include/asm-x86/kvm_host.h | 13 +++++++++++++
include/linux/kvm_host.h | 13 -------------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 52e276c..2773f91 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -20,6 +20,13 @@
#include <asm/desc.h>
+#define KVM_MAX_VCPUS 16
+#define KVM_MEMORY_SLOTS 32
+/* memory slots that does not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 4
+
+#define KVM_PIO_PAGE_OFFSET 1
+
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL)
@@ -113,6 +120,12 @@ enum {
#define KVM_NR_MEM_OBJS 40
+struct kvm_guest_debug {
+ int enabled;
+ unsigned long bp[4];
+ int singlestep;
+};
+
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 958e003..f4e1436 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -24,13 +24,6 @@
#include <asm/kvm_host.h>
-#define KVM_MAX_VCPUS 16
-#define KVM_MEMORY_SLOTS 32
-/* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 4
-
-#define KVM_PIO_PAGE_OFFSET 1
-
/*
* vcpu->requests bit members
*/
@@ -43,12 +36,6 @@
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
-
/*
* It would be nice to use something smarter than a linear search, TBD...
* Thankfully we dont expect many devices to register (famous last words :),
--
1.5.4.5
--
From: Glauber Costa <gcosta@redhat.com>
This patch a llows machine_crash_shutdown to
be replaced, just like any of the other functions
in machine_ops
Signed-off-by: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kernel/crash.c | 3 ++-
arch/x86/kernel/reboot.c | 11 ++++++++++-
include/asm-x86/reboot.h | 1 +
3 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9a5fa0a..d262306 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
#include <asm/hpet.h>
#include <linux/kdebug.h>
#include <asm/smp.h>
+#include <asm/reboot.h>
#ifdef CONFIG_X86_32
#include <mach_ipi.h>
@@ -121,7 +122,7 @@ static void nmi_shootdown_cpus(void)
}
#endif
-void machine_crash_shutdown(struct pt_regs *regs)
+void native_machine_crash_shutdown(struct pt_regs *regs)
{
/* This function is only called after the system
* has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 484c4a8..708d6f8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -471,7 +471,10 @@ struct machine_ops machine_ops = {
.shutdown = native_machine_shutdown,
.emergency_restart = native_machine_emergency_restart,
.restart = native_machine_restart,
- .halt = native_machine_halt
+ .halt = native_machine_halt,
+#ifdef CONFIG_KEXEC
+ .crash_shutdown = native_machine_crash_shutdown,
+#endif
};
void machine_power_off(void)
@@ -499,3 +502,9 @@ void machine_halt(void)
machine_ops.halt();
}
+#ifdef CONFIG_KEXEC
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ machine_ops.crash_shutdown(regs);
+}
+#endif
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index e9e3ffc..ff9b546 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -16,5 +16,6 @@ struct machine_ops
extern struct machine_ops machine_ops;
...From: Marcelo Tosatti <mtosatti@redhat.com>
Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.
Don't report the feature if two dimensional paging is enabled.
[avi:
- one mmu_op hypercall instead of one per op
- allow 64-bit gpa on hypercall
- don't pass host errors (-ENOMEM) to guest]
[akpm: warning fix on i386]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/mmu.c | 136 +++++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/x86.c | 18 ++++++-
include/asm-x86/kvm_host.h | 4 +
include/asm-x86/kvm_para.h | 29 +++++++++
include/linux/kvm.h | 1 +
include/linux/kvm_para.h | 5 +-
6 files changed, 190 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 414405b..072e942 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -28,6 +28,7 @@
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
+#include <linux/compiler.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
@@ -40,7 +41,7 @@
* 2. while doing 1. it walks guest-physical to host-physical
* If the hardware supports that we don't need to do shadow paging.
*/
-static bool tdp_enabled = false;
+bool tdp_enabled = false;
#undef MMU_DEBUG
@@ -167,6 +168,13 @@ static int dbg = 1;
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+struct kvm_pv_mmu_op_buffer {
+ void *ptr;
+ unsigned len;
+ unsigned processed;
+ char buf[512] __aligned(sizeof(long));
+};
+
struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
@@ -2003,6 +2011,132 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
return nr_mmu_pages;
}
+static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer ...From: Marcelo Tosatti <mtosatti@redhat.com>
Batch pte updates and tlb flushes in lazy MMU mode.
[avi:
- adjust to mmu_op
- helper for getting para_state without debug warnings]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kernel/kvm.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1bb6e97..d9121f9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,22 @@
#include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/hardirq.h>
+
+#define MMU_QUEUE_SIZE 1024
+
+struct kvm_para_state {
+ u8 mmu_queue[MMU_QUEUE_SIZE];
+ int mmu_queue_len;
+ enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static struct kvm_para_state *kvm_para_state(void)
+{
+ return &per_cpu(para_state, raw_smp_processor_id());
+}
/*
* No need for any "IO delay" on KVM
@@ -48,6 +64,28 @@ static void kvm_mmu_op(void *buffer, unsigned len)
} while (len);
}
+static void mmu_queue_flush(struct kvm_para_state *state)
+{
+ if (state->mmu_queue_len) {
+ kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
+ state->mmu_queue_len = 0;
+ }
+}
+
+static void kvm_deferred_mmu_op(void *buffer, int len)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ if (state->mode != PARAVIRT_LAZY_MMU) {
+ kvm_mmu_op(buffer, len);
+ return;
+ }
+ if (state->mmu_queue_len + len > sizeof state->mmu_queue)
+ mmu_queue_flush(state);
+ memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
+ state->mmu_queue_len += len;
+}
+
static void kvm_mmu_write(void *dest, u64 val)
{
__u64 pte_phys;
@@ -68,7 +106,7 @@ static void kvm_mmu_write(void *dest, u64 val)
wpte.pte_val = val;
wpte.pte_phys = pte_phys;
- kvm_mmu_op(&wpte, sizeof wpte);
+ kvm_deferred_mmu_op(&wpte, sizeof ...From: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
arch/x86/kvm/i8254.c | 7 +++++++
arch/x86/kvm/i8254.h | 1 +
arch/x86/kvm/x86.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm.h | 21 +++++++++++++++++++++
include/linux/kvm.h | 2 ++
5 files changed, 79 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 1031901..7776f50 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -286,6 +286,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
}
}
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ pit_load_count(kvm, channel, val);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+}
+
static void pit_ioport_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 38184d5..586bbf0 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -54,6 +54,7 @@ struct kvm_pit {
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
struct kvm_pit *kvm_create_pit(struct kvm *kvm);
void kvm_free_pit(struct kvm *kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c33a457..621a8e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1504,6 +1504,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
return r;
}
+static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+ int r = 0;
+
+ memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+ return r;
+}
+
+static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+ int r = ...From: Sheng Yang <sheng.yang@intel.com> The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly. Signed-off-by: Sheng Yang <sheng.yang@intel.com> Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/kvm/Makefile | 3 +- arch/x86/kvm/i8254.c | 585 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/i8254.h | 60 +++++ arch/x86/kvm/irq.c | 3 + arch/x86/kvm/x86.c | 9 + include/asm-x86/kvm_host.h | 1 + include/linux/kvm.h | 2 + 7 files changed, 662 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/i8254.c create mode 100644 arch/x86/kvm/i8254.h diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index ffdd0b3..4d0c22e 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm -kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o +kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ + i8254.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c new file mode 100644 index 0000000..1031901 --- /dev/null +++ b/arch/x86/kvm/i8254.c @@ -0,0 +1,585 @@ +/* + * 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2006 Intel Corporation + * Copyright (c) 2007 Keir Fraser, XenSource Inc + * Copyright (c) 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * ...
The desc_struct changes left an unnecessary #ifdef; remove it. Signed-off-by: Avi Kivity <avi@qumranet.com> --- arch/x86/kvm/svm.c | 4 ---- 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b2c667f..51741f9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -290,11 +290,7 @@ static void svm_hardware_enable(void *garbage) struct svm_cpu_data *svm_data; uint64_t efer; -#ifdef CONFIG_X86_64 - struct desc_ptr gdt_descr; -#else struct desc_ptr gdt_descr; -#endif struct desc_struct *gdt; int me = raw_smp_processor_id(); -- 1.5.4.5 --
