Hi, This small series provides some more fixes towards the goal to have the PARAVIRT selectable for x86_64. After that, just some more small steps are needed. The first fix is not even specific for PARAVIRT, and it's actually preventing the whole tree from booting. --
lookup_address() receives two parameters, but efi_64.c call
is passing only one. It's actually preventing the tree from compiling
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kernel/efi_64.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index f420819..1f8bbd9 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -45,9 +45,10 @@ static void __init early_mapping_set_exec(unsigned long start,
int executable)
{
pte_t *kpte;
+ int level;
while (start < end) {
- kpte = lookup_address((unsigned long)__va(start));
+ kpte = lookup_address((unsigned long)__va(start), &level);
BUG_ON(!kpte);
if (executable)
set_pte(kpte, pte_mkexec(*kpte));
--
1.4.4.2
--
We use a __stringify construction at paravirt_patch_64.c. It's better practice to include the stringify header directly Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> --- arch/x86/kernel/paravirt_patch_64.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index cbfc4f3..7d904e1 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -1,5 +1,6 @@ #include <asm/paravirt.h> #include <asm/asm-offsets.h> +#include <linux/stringify.h> DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); -- 1.4.4.2 --
x86_64 lacks a native_init_IRQ() function, so we turn the arch's
init_IRQ() function into a native construct
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kernel/i8259_64.c | 4 +++-
include/asm-x86/hw_irq_64.h | 1 +
2 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index cba13e0..738517e 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -456,7 +456,9 @@ void __init init_ISA_irqs (void)
}
}
-void __init init_IRQ(void)
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
{
int i;
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index a346159..312a58d 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -141,6 +141,7 @@ extern void print_IO_APIC(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
extern void send_IPI(int dest, int vector);
extern void setup_ioapic_dest(void);
+extern void native_init_IRQ(void);
extern unsigned long io_apic_irqs;
--
1.4.4.2
--
With PARAVIRT, we actually have arch_{dup,exit}_mmap functions,
so we can't include the generic header
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
include/asm-x86/mmu_context_64.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index 7e2aa23..ad6dc82 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -7,7 +7,9 @@
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#ifndef CONFIG_PARAVIRT
#include <asm-generic/mm_hooks.h>
+#endif
/*
* possibly do the LDT unload here?
--
1.4.4.2
--
This patch adds room for read and write_cr8 functions back in
pv_cpu_ops struct
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
include/asm-x86/paravirt.h | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 7ff25c2..3e7ca42 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -101,6 +101,11 @@ struct pv_cpu_ops {
unsigned long (*read_cr4)(void);
void (*write_cr4)(unsigned long);
+#ifdef CONFIG_X86_64
+ unsigned long (*read_cr8)(void);
+ void (*write_cr8)(unsigned long);
+#endif
+
/* Segment descriptor handling */
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
@@ -614,6 +619,16 @@ static inline void write_cr4(unsigned long x)
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
+static inline unsigned long read_cr8(void)
+{
+ return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
+}
+
+static inline void write_cr8(unsigned long x)
+{
+ PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
+}
+
static inline void raw_safe_halt(void)
{
PVOP_VCALL0(pv_irq_ops.safe_halt);
--
1.4.4.2
--
Since the cr8 manipulation functions ended up staying in the tree,
they can't be defined just when PARAVIRT is off: In this patch,
those functions are defined for the PARAVIRT case too.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
include/asm-x86/system.h | 30 ++++++++++++++++--------------
1 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 692c28c..a33c3f4 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -236,6 +236,20 @@ static inline void native_write_cr4(unsigned long val)
asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
}
+#ifdef CONFIG_X86_64
+static inline unsigned long native_read_cr8(void)
+{
+ unsigned long cr8;
+ asm volatile("movq %%cr8,%0" : "=r" (cr8));
+ return cr8;
+}
+
+static inline void native_write_cr8(unsigned long val)
+{
+ asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+#endif
+
static inline void native_wbinvd(void)
{
asm volatile("wbinvd": : :"memory");
@@ -253,21 +267,9 @@ static inline void native_wbinvd(void)
#define read_cr4_safe() (native_read_cr4_safe())
#define write_cr4(x) (native_write_cr4(x))
#define wbinvd() (native_wbinvd())
-
#ifdef CONFIG_X86_64
-
-static inline unsigned long read_cr8(void)
-{
- unsigned long cr8;
- asm volatile("movq %%cr8,%0" : "=r" (cr8));
- return cr8;
-}
-
-static inline void write_cr8(unsigned long val)
-{
- asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-
+#define read_cr8() (native_read_cr8())
+#define write_cr8(x) (native_write_cr8(x))
#endif
/* Clear the 'TS' bit */
--
1.4.4.2
--
This patch fills in the read and write cr8 fields with their
native version
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kernel/paravirt.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c20b4f8..c67d331 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,6 +319,10 @@ struct pv_cpu_ops pv_cpu_ops = {
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
+#ifdef CONFIG_X86_64
+ .read_cr8 = native_read_cr8,
+ .write_cr8 = native_write_cr8,
+#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
--
1.4.4.2
--
This patch adds the constant PARAVIRT needs in asm_offsets_64.c Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> --- arch/x86/kernel/asm-offsets_64.c | 14 ++++++++++++++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 2b32719..494e1e0 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -61,6 +61,20 @@ int main(void) ENTRY(data_offset); BLANK(); #undef ENTRY +#ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); + OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); + OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); + OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); + OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); + OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); + OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); +#endif + + #ifdef CONFIG_IA32_EMULATION #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) ENTRY(ax); -- 1.4.4.2 --
This patch adds the __parainstructions section to vmlinux.lds.S.
It's needed for the patching system.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 5ae8aa8..5e0300f 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -175,6 +175,14 @@ SECTIONS
}
__con_initcall_end = .;
SECURITY_INIT
+
+ . = ALIGN(8);
+ .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+ __parainstructions = .;
+ *(.parainstructions)
+ __parainstructions_end = .;
+ }
+
. = ALIGN(8);
__alt_instructions = .;
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
--
1.4.4.2
--
__pmd, pmd_val and set_pud are used before they are defined (as static)
We move them a little up in the file, so it doesn't happen.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
include/asm-x86/paravirt.h | 84 ++++++++++++++++++++++----------------------
1 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 3e7ca42..12caaf1 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1023,6 +1023,48 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
}
+#if PAGETABLE_LEVELS >= 3
+static inline pmd_t __pmd(pmdval_t val)
+{
+ pmdval_t ret;
+
+ if (sizeof(pmdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
+ val, (u64)val >> 32);
+ else
+ ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
+ val);
+
+ return (pmd_t) { ret };
+}
+
+static inline pmdval_t pmd_val(pmd_t pmd)
+{
+ pmdval_t ret;
+
+ if (sizeof(pmdval_t) > sizeof(long))
+ ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd, (u64)pmd.pmd >> 32);
+ else
+ ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
+ pmd.pmd);
+
+ return ret;
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+ pudval_t val = native_pud_val(pud);
+
+ if (sizeof(pudval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
+ val, (u64)val >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
+ val);
+}
+#endif /* PAGETABLE_LEVELS >= 3 */
+
#ifdef CONFIG_X86_PAE
/* Special-case pte-setting operations for PAE, which can't update a
64-bit pte atomically */
@@ -1073,48 +1115,6 @@ static inline void pmd_clear(pmd_t *pmdp)
}
#endif /* CONFIG_X86_PAE */
-#if PAGETABLE_LEVELS >= 3
-static inline pmd_t __pmd(pmdval_t val)
-{
- pmdval_t ret;
-
- if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
- val, (u64)val >> 32);
- else
- ret = ...Hm, in my original patches I put the #ifdef CONFIG_X86_PAE below the PAGETABLE_LEVELS section. Does that work? Or is that an equivalent transform? --
Are we going to see this for other archs than just x86? If so then please add this to include/asm-generic/vmlinux.lds.h The altinstructions right below is antoehr candidate.. Sam --
In theory other architectures could use a similar mechanism, but for now
its x86 specific.
J
--
Good catch, I know I don't test with CONFIG_EFI=y --
Ah that came probably from the CPA patchset which added the parameter. Sorry for that. -Andi --
but in exchange you broke all of 32-bit with CONFIG_PARAVIRT=y. Which means you did not even build-test it on 32-bit, let alone boot test it... Ingo --
Why are we rushing so much to do 64-bit paravirt that we are breaking working configurations? If the developement is going to be this chaotic, it should be done and tested out of tree until it can stabilize. I do not like having to continuously retest and review the x86 branch because the paravirt-ops are constantly in flux and the 32-bit code keeps breaking. We won't be doing 64-bit paravirt-ops for exactly this reason - is there a serious justification from the performance angle on modern 64-bit hardware? If not, why justify the complexity and hackery to Linux? Zach --
what you see is a open feedback cycle conducted on lkml. People send
patches for arch/x86, and we tell them if it breaks something. The bug
was found before i pushed out the x86.git devel tree (and the fix is
below - but this shouldnt matter to you because the bug never hit a
public x86.git tree).
Ingo
Index: linux/include/asm-x86/paravirt.h
===================================================================
--- linux.orig/include/asm-x86/paravirt.h
+++ linux/include/asm-x86/paravirt.h
@@ -619,6 +619,7 @@ static inline void write_cr4(unsigned lo
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
+#ifdef CONFIG_X86_64
static inline unsigned long read_cr8(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
@@ -628,6 +629,7 @@ static inline void write_cr8(unsigned lo
{
PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
}
+#endif
static inline void raw_safe_halt(void)
{
--
x86.git is out of the mainline tree, and it seems to be working fairly
smoothly. I've come to appreciate the "lots of small patches with quick
Most of the activity is pure unification, with paravirt being part of
that. It doesn't help that it increases the CONFIG_ combinatorial
A big part of the rationale is to unify 32 and 64 bit, so that paravirt
isn't a gratuitous difference between the two. Also, 32 and 64 bit Xen
have almost identical interface requirements, so the work is making
64-bit Xen progress (and lguest64, of course).
J
--
And the following allows PARAVIRT kernels to boot on x86_64.
-----
Fill in missing pagetable manipulation entries in pv_mmu_ops
for PAGETABLE_LEVELS >= 3.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Index: linux-2.6-x86/arch/x86/kernel/paravirt.c
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/paravirt.c
+++ linux-2.6-x86/arch/x86/kernel/paravirt.c
@@ -396,16 +396,25 @@ struct pv_mmu_ops pv_mmu_ops = {
.kmap_atomic_pte = kmap_atomic,
#endif
+#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
.set_pte_atomic = native_set_pte_atomic,
.set_pte_present = native_set_pte_present,
- .set_pud = native_set_pud,
.pte_clear = native_pte_clear,
.pmd_clear = native_pmd_clear,
-
+#endif
+ .set_pud = native_set_pud,
+ .set_pgd = native_set_pgd,
+ .pgd_clear = native_pgd_clear,
.pmd_val = native_pmd_val,
.make_pmd = native_make_pmd,
+
+#if PAGETABLE_LEVELS == 4
+ .pud_val = native_pud_val,
+ .make_pud = native_make_pud,
+ .pud_clear = native_pud_clear,
#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
.pte_val = native_pte_val,
.pgd_val = native_pgd_val,
--
Current x86.git doesn't have .set_pgd or .pgd_clear fields on pv_mmu_ops. Those fields, and the implementations of set_pgd(), __pud() and pud_val() were missing on the last series from Glauber. I will send the missing On the patches I will send, pud_clear() and pgd_clear() aren't present on pv_mmu_ops and are implemented using set_pud() and set_pgd(). -- Eduardo --
Actually, I changed my mind on that. pud_clear needs special handling
with 32-bit PAE, so I'd prefer to keep it a separate operation.
J
--
thanks Marcelo - picked this up and the other changes as well. I guess the only thing missing at the moment is the proper Kconfig changes to allow the building of a 64-bit PARAVIRT kernel? Ingo --
For normal hardware yes. But I still have a vsmp patch in the queue. --
