From 9d02b43dee0d7fb18dfb13a00915550b1a3daa9f Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 1 Nov 2012 22:02:30 +0100 Subject: [PATCH 01/15] xen PVonHVM: use E820_Reserved area for shared_info This is a respin of 00e37bdb0113a98408de42db85be002f21dbffd3 ("xen PVonHVM: move shared_info to MMIO before kexec"). Currently kexec in a PVonHVM guest fails with a triple fault because the new kernel overwrites the shared info page. The exact failure depends on the size of the kernel image. This patch moves the pfn from RAM into an E820 reserved memory area. The pfn containing the shared_info is located somewhere in RAM. This will cause trouble if the current kernel is doing a kexec boot into a new kernel. The new kernel (and its startup code) can not know where the pfn is, so it can not reserve the page. The hypervisor will continue to update the pfn, and as a result memory corruption occours in the new kernel. The toolstack marks the memory area FC000000-FFFFFFFF as reserved in the E820 map. Within that range newer toolstacks (4.3+) will keep 1MB starting from FE700000 as reserved for guest use. Older Xen4 toolstacks will usually not allocate areas up to FE700000, so FE700000 is expected to work also with older toolstacks. In Xen3 there is no reserved area at a fixed location. If the guest is started on such old hosts the shared_info page will be placed in RAM. As a result kexec can not be used. Signed-off-by: Olaf Hering Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 81 +++++++++++++++++++++++++++------------- arch/x86/xen/suspend.c | 2 +- arch/x86/xen/xen-ops.h | 2 +- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 586d83812b6..a90c3bb58be 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1495,51 +1495,72 @@ asmlinkage void __init xen_start_kernel(void) #endif } -void __ref xen_hvm_init_shared_info(void) -{ - int cpu; - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; +#ifdef CONFIG_XEN_PVHVM +#define HVM_SHARED_INFO_ADDR 0xFE700000UL +static struct shared_info *xen_hvm_shared_info; +static unsigned long xen_hvm_sip_phys; +static int xen_major, xen_minor; + +static void xen_hvm_connect_shared_info(unsigned long pfn) +{ + struct xen_add_to_physmap xatp; - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = pfn; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; +} +static void __init xen_hvm_set_shared_info(struct shared_info *sip) +{ + int cpu; + + HYPERVISOR_shared_info = sip; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { + * HVM. */ + for_each_online_cpu(cpu) per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - } } -#ifdef CONFIG_XEN_PVHVM +/* Reconnect the shared_info pfn to a (new) mfn */ +void xen_hvm_resume_shared_info(void) +{ + xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); +} + +/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. + * On these old tools the shared info page will be placed in E820_Ram. + * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects + * that nothing is mapped up to HVM_SHARED_INFO_ADDR. + * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used + * here for the shared info page. */ +static void __init xen_hvm_init_shared_info(void) +{ + if (xen_major < 4) { + xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); + xen_hvm_sip_phys = __pa(xen_hvm_shared_info); + } else { + xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; + set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); + xen_hvm_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + } + xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); + xen_hvm_set_shared_info(xen_hvm_shared_info); +} + static void __init init_hvm_pv_info(void) { - int major, minor; uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - major = eax >> 16; - minor = eax & 0xffff; - printk(KERN_INFO "Xen version %d.%d.\n", major, minor); - cpuid(base + 2, &pages, &msr, &ecx, &edx); pfn = __pa(hypercall_page); @@ -1590,12 +1611,22 @@ static void __init xen_hvm_guest_init(void) static bool __init xen_hvm_platform(void) { + uint32_t eax, ebx, ecx, edx, base; + if (xen_pv_domain()) return false; - if (!xen_cpuid_base()) + base = xen_cpuid_base(); + if (!base) return false; + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + xen_major = eax >> 16; + xen_minor = eax & 0xffff; + + printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); + return true; } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226..ae8a00c39de 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + xen_hvm_resume_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a95b41744ad..d2e73d19d36 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -40,7 +40,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_init_shared_info(void); +void xen_hvm_resume_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); From 5b71fbdc64225b7a86944f4f0de80e59071187c7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 2 Nov 2012 14:36:38 +0000 Subject: [PATCH 02/15] xen-pciback: simplify and tighten parsing of device IDs Now that at least one of the conformance problems of the kernel's sscanf() was addressed (commit da99075c1d368315e1508b6143226c0d27b621e0), we can improve the parsing done in xen-pciback both in terms of code readability and correctness (in particular properly rejecting input strings not well formed). Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 83 ++++++++++++++---------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 961d664e2d2..1a92739f431 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -897,42 +897,35 @@ static struct pci_driver xen_pcibk_pci_driver = { static inline int str_to_slot(const char *buf, int *domain, int *bus, int *slot, int *func) { - int err; - char wc = '*'; + int parsed = 0; - err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - switch (err) { + switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func, + &parsed)) { case 3: *func = -1; - err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); + sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed); break; case 2: *slot = *func = -1; - err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); - if (err >= 2) - ++err; + sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed); break; } - if (err == 4 && wc == '*') + if (parsed && !buf[parsed]) return 0; - else if (err < 0) - return -EINVAL; /* try again without domain */ *domain = 0; - wc = '*'; - err = sscanf(buf, " %x:%x.%x", bus, slot, func); - switch (err) { + switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) { case 2: *func = -1; - err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); + sscanf(buf, " %x:%x.* %n", bus, slot, &parsed); break; case 1: *slot = *func = -1; - err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; + sscanf(buf, " %x:*.* %n", bus, &parsed); break; } - if (err == 3 && wc == '*') + if (parsed && !buf[parsed]) return 0; return -EINVAL; @@ -941,13 +934,20 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus, static inline int str_to_quirk(const char *buf, int *domain, int *bus, int *slot, int *func, int *reg, int *size, int *mask) { - int err; + int parsed = 0; - err = - sscanf(buf, " %04x:%02x:%02x.%d-%08x:%1x:%08x", domain, bus, slot, - func, reg, size, mask); - if (err == 7) + sscanf(buf, " %4x:%2x:%2x.%d-%8x:%1x:%8x %n", domain, bus, slot, func, + reg, size, mask, &parsed); + if (parsed && !buf[parsed]) return 0; + + /* try again without domain */ + *domain = 0; + sscanf(buf, " %2x:%2x.%d-%8x:%1x:%8x %n", bus, slot, func, reg, size, + mask, &parsed); + if (parsed && !buf[parsed]) + return 0; + return -EINVAL; } @@ -1339,8 +1339,6 @@ static int __init pcistub_init(void) if (pci_devs_to_hide && *pci_devs_to_hide) { do { - char wc = '*'; - parsed = 0; err = sscanf(pci_devs_to_hide + pos, @@ -1349,51 +1347,48 @@ static int __init pcistub_init(void) switch (err) { case 3: func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x:%x.%c) %n", - &domain, &bus, &slot, &wc, - &parsed); + sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.*) %n", + &domain, &bus, &slot, &parsed); break; case 2: slot = func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x:*.%c) %n", - &domain, &bus, &wc, &parsed) + 1; + sscanf(pci_devs_to_hide + pos, + " (%x:%x:*.*) %n", + &domain, &bus, &parsed); break; } - if (err != 4 || wc != '*') { + if (!parsed) { domain = 0; - wc = '*'; err = sscanf(pci_devs_to_hide + pos, " (%x:%x.%x) %n", &bus, &slot, &func, &parsed); switch (err) { case 2: func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x.%c) %n", - &bus, &slot, &wc, - &parsed); + sscanf(pci_devs_to_hide + pos, + " (%x:%x.*) %n", + &bus, &slot, &parsed); break; case 1: slot = func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:*.%c) %n", - &bus, &wc, &parsed) + 1; + sscanf(pci_devs_to_hide + pos, + " (%x:*.*) %n", + &bus, &parsed); break; } - if (err != 3 || wc != '*') - goto parse_error; } + if (parsed <= 0) + goto parse_error; + err = pcistub_device_id_add(domain, bus, slot, func); if (err) goto out; - /* if parsed<=0, we've reached the end of the string */ pos += parsed; - } while (parsed > 0 && pci_devs_to_hide[pos]); + } while (pci_devs_to_hide[pos]); } /* If we're the first PCI Device Driver to register, we're the From b3e40b72bb24237b0aee9f6ba2e9f88dd4ff3c0a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 2 Nov 2012 14:37:13 +0000 Subject: [PATCH 03/15] xen-pciback: reject out of range inputs This add checks for out of range numbers (including in cases where the folding of slot and function into a single value could yield false matches). It also removes the bogus field width restrictions in str_to_quirk() - nowhere else in the driver this is being done, and hence this function could reject input the equivalent of which would be happily accepted in other places (in particular, "0x" prefixes causing the effective width of the actual number to be either zero or less than what would be required to cover the full range of valid values). Note that for the moment this second part is cosmetic only, as the kernel's sscanf() currently ignores the field widths, but a patch to overcome this is on its way. Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 39 +++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 1a92739f431..129e1674f4a 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -142,7 +142,8 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus, if (psdev->dev != NULL && domain == pci_domain_nr(psdev->dev->bus) && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + && slot == PCI_SLOT(psdev->dev->devfn) + && func == PCI_FUNC(psdev->dev->devfn)) { pcistub_device_get(psdev); goto out; } @@ -191,7 +192,8 @@ struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, if (psdev->dev != NULL && domain == pci_domain_nr(psdev->dev->bus) && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + && slot == PCI_SLOT(psdev->dev->devfn) + && func == PCI_FUNC(psdev->dev->devfn)) { found_dev = pcistub_device_get_pci_dev(pdev, psdev); break; } @@ -936,14 +938,14 @@ static inline int str_to_quirk(const char *buf, int *domain, int *bus, int { int parsed = 0; - sscanf(buf, " %4x:%2x:%2x.%d-%8x:%1x:%8x %n", domain, bus, slot, func, + sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func, reg, size, mask, &parsed); if (parsed && !buf[parsed]) return 0; /* try again without domain */ *domain = 0; - sscanf(buf, " %2x:%2x.%d-%8x:%1x:%8x %n", bus, slot, func, reg, size, + sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size, mask, &parsed); if (parsed && !buf[parsed]) return 0; @@ -955,7 +957,7 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id; unsigned long flags; - int rc = 0; + int rc = 0, devfn = PCI_DEVFN(slot, func); if (slot < 0) { for (slot = 0; !rc && slot < 32; ++slot) @@ -969,13 +971,24 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) return rc; } + if (( +#if !defined(MODULE) /* pci_domains_supported is not being exported */ \ + || !defined(CONFIG_PCI_DOMAINS) + !pci_domains_supported ? domain : +#endif + domain < 0 || domain > 0xffff) + || bus < 0 || bus > 0xff + || PCI_SLOT(devfn) != slot + || PCI_FUNC(devfn) != func) + return -EINVAL; + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) return -ENOMEM; pci_dev_id->domain = domain; pci_dev_id->bus = bus; - pci_dev_id->devfn = PCI_DEVFN(slot, func); + pci_dev_id->devfn = devfn; pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%d\n", domain, bus, slot, func); @@ -1016,14 +1029,18 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func) return err; } -static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, - int size, int mask) +static int pcistub_reg_add(int domain, int bus, int slot, int func, + unsigned int reg, unsigned int size, + unsigned int mask) { int err = 0; struct pcistub_device *psdev; struct pci_dev *dev; struct config_field *field; + if (reg > 0xfff || (size < 4 && (mask >> (size * 8)))) + return -EINVAL; + psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; @@ -1254,13 +1271,11 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, int err; struct pcistub_device *psdev; struct xen_pcibk_dev_data *dev_data; + err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; - if (slot < 0 || func < 0) { - err = -EINVAL; - goto out; - } + psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; From 92e3229dcdc80ff0b6304f14c578d76e7e10e226 Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Thu, 8 Nov 2012 05:41:13 +0000 Subject: [PATCH 04/15] xen/acpi: ACPI PAD driver PAD is acpi Processor Aggregator Device which provides a control point that enables the platform to perform specific processor configuration and control that applies to all processors in the platform. This patch is to implement Xen acpi pad logic. When running under Xen virt platform, native pad driver would not work. Instead Xen pad driver, a self-contained and thin logic level, would take over acpi pad logic. When acpi pad notify OSPM, xen pad logic intercept and parse _PUR object to get the expected idle cpu number, and then hypercall to hypervisor. Xen hypervisor would then do the rest work, say, core parking, to idle specific number of cpus on its own policy. Signed-off-by: Jan Beulich Signed-off-by: Liu Jinsong Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Makefile | 3 +- drivers/xen/xen-acpi-pad.c | 181 +++++++++++++++++++++++++++++++ include/xen/interface/platform.h | 17 +++ include/xen/interface/version.h | 15 +++ 4 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/xen-acpi-pad.c diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0e863703545..3c397170f39 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -10,7 +10,8 @@ CFLAGS_features.o := $(nostackp) dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o -dom0-$(CONFIG_ACPI) += acpi.o +dom0-$(CONFIG_ACPI) += acpi.o $(xen-pad-y) +xen-pad-$(CONFIG_X86) += xen-acpi-pad.o dom0-$(CONFIG_X86) += pcpu.o obj-$(CONFIG_XEN_DOM0) += $(dom0-y) obj-$(CONFIG_BLOCK) += biomerge.o diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c new file mode 100644 index 00000000000..f23ecf38008 --- /dev/null +++ b/drivers/xen/xen-acpi-pad.c @@ -0,0 +1,181 @@ +/* + * xen-acpi-pad.c - Xen pad interface + * + * Copyright (c) 2012, Intel Corporation. + * Author: Liu, Jinsong + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include + +#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" +#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" +#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 +static DEFINE_MUTEX(xen_cpu_lock); + +static int xen_acpi_pad_idle_cpus(unsigned int idle_nums) +{ + struct xen_platform_op op; + + op.cmd = XENPF_core_parking; + op.u.core_parking.type = XEN_CORE_PARKING_SET; + op.u.core_parking.idle_nums = idle_nums; + + return HYPERVISOR_dom0_op(&op); +} + +static int xen_acpi_pad_idle_cpus_num(void) +{ + struct xen_platform_op op; + + op.cmd = XENPF_core_parking; + op.u.core_parking.type = XEN_CORE_PARKING_GET; + + return HYPERVISOR_dom0_op(&op) + ?: op.u.core_parking.idle_nums; +} + +/* + * Query firmware how many CPUs should be idle + * return -1 on failure + */ +static int acpi_pad_pur(acpi_handle handle) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *package; + int num = -1; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer))) + return num; + + if (!buffer.length || !buffer.pointer) + return num; + + package = buffer.pointer; + + if (package->type == ACPI_TYPE_PACKAGE && + package->package.count == 2 && + package->package.elements[0].integer.value == 1) /* rev 1 */ + num = package->package.elements[1].integer.value; + + kfree(buffer.pointer); + return num; +} + +/* Notify firmware how many CPUs are idle */ +static void acpi_pad_ost(acpi_handle handle, int stat, + uint32_t idle_nums) +{ + union acpi_object params[3] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_BUFFER,}, + }; + struct acpi_object_list arg_list = {3, params}; + + params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY; + params[1].integer.value = stat; + params[2].buffer.length = 4; + params[2].buffer.pointer = (void *)&idle_nums; + acpi_evaluate_object(handle, "_OST", &arg_list, NULL); +} + +static void acpi_pad_handle_notify(acpi_handle handle) +{ + int idle_nums; + + mutex_lock(&xen_cpu_lock); + idle_nums = acpi_pad_pur(handle); + if (idle_nums < 0) { + mutex_unlock(&xen_cpu_lock); + return; + } + + idle_nums = xen_acpi_pad_idle_cpus(idle_nums) + ?: xen_acpi_pad_idle_cpus_num(); + if (idle_nums >= 0) + acpi_pad_ost(handle, 0, idle_nums); + mutex_unlock(&xen_cpu_lock); +} + +static void acpi_pad_notify(acpi_handle handle, u32 event, + void *data) +{ + switch (event) { + case ACPI_PROCESSOR_AGGREGATOR_NOTIFY: + acpi_pad_handle_notify(handle); + break; + default: + pr_warn("Unsupported event [0x%x]\n", event); + break; + } +} + +static int acpi_pad_add(struct acpi_device *device) +{ + acpi_status status; + + strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS); + + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, acpi_pad_notify, device); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return 0; +} + +static int acpi_pad_remove(struct acpi_device *device, + int type) +{ + mutex_lock(&xen_cpu_lock); + xen_acpi_pad_idle_cpus(0); + mutex_unlock(&xen_cpu_lock); + + acpi_remove_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, acpi_pad_notify); + return 0; +} + +static const struct acpi_device_id pad_device_ids[] = { + {"ACPI000C", 0}, + {"", 0}, +}; + +static struct acpi_driver acpi_pad_driver = { + .name = "processor_aggregator", + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, + .ids = pad_device_ids, + .ops = { + .add = acpi_pad_add, + .remove = acpi_pad_remove, + }, +}; + +static int __init xen_acpi_pad_init(void) +{ + /* Only DOM0 is responsible for Xen acpi pad */ + if (!xen_initial_domain()) + return -ENODEV; + + /* Only Xen4.2 or later support Xen acpi pad */ + if (!xen_running_on_version_or_later(4, 2)) + return -ENODEV; + + return acpi_bus_register_driver(&acpi_pad_driver); +} +subsys_initcall(xen_acpi_pad_init); diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 4755b5fac9c..5e36932ab40 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -324,6 +324,22 @@ struct xenpf_cpu_ol { }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); +/* + * CMD 58 and 59 are reserved for cpu hotadd and memory hotadd, + * which are already occupied at Xen hypervisor side. + */ +#define XENPF_core_parking 60 +struct xenpf_core_parking { + /* IN variables */ +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -341,6 +357,7 @@ struct xen_platform_op { struct xenpf_set_processor_pminfo set_pminfo; struct xenpf_pcpuinfo pcpu_info; struct xenpf_cpu_ol cpu_ol; + struct xenpf_core_parking core_parking; uint8_t pad[128]; } u; }; diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 7ff6498679a..96d8d3deea6 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -63,4 +63,19 @@ struct xen_feature_info { /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 +/* Check if running on Xen version (major, minor) or later */ +static inline bool +xen_running_on_version_or_later(unsigned int major, unsigned int minor) +{ + unsigned int version; + + if (!xen_domain()) + return false; + + version = HYPERVISOR_xen_version(XENVER_version, NULL); + if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || + ((version >> 16) > major)) + return true; + return false; +} #endif /* __XEN_PUBLIC_VERSION_H__ */ From e3aa4e61b57da7574fdd1b4c9ca9bdee06a4d23e Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Thu, 8 Nov 2012 05:44:28 +0000 Subject: [PATCH 05/15] xen/acpi: revert pad config check in xen_check_mwait With Xen acpi pad logic added into kernel, we can now revert xen mwait related patch df88b2d96e36d9a9e325bfcd12eb45671cbbc937 ("xen/enlighten: Disable MWAIT_LEAF so that acpi-pad won't be loaded. "). The reason is, when running under newer Xen platform, Xen pad driver would be early loaded, so native pad driver would fail to be loaded, and hence no mwait/monitor #UD risk again. Another point is, only Xen4.2 or later support Xen acpi pad, so we won't expose mwait cpuid capability when running under older Xen platform. Signed-off-by: Liu, Jinsong Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a90c3bb58be..eb0edff5499 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -287,8 +287,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ - !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) +#ifdef CONFIG_ACPI struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -309,6 +308,13 @@ static bool __init xen_check_mwait(void) if (!xen_initial_domain()) return false; + /* + * When running under platform earlier than Xen4.2, do not expose + * mwait, to avoid the risk of loading native acpi pad driver + */ + if (!xen_running_on_version_or_later(4, 2)) + return false; + ax = 1; cx = 0; From 0dfa5b5d09a882a33f3aa433ad0b16898a3589ad Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 26 Nov 2012 19:57:04 -0500 Subject: [PATCH 06/15] xen/acpi: Fix compile error by missing decleration for xen_domain. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 92e3229dcdc80ff0b6304f14c578d76e7e10e226 ("xen/acpi: ACPI PAD driver") adds a new function but forgets to use the right header. Without it, we get: In file included from drivers/xen/features.c:15:0: include/xen/interface/version.h: In function ‘xen_running_on_version_or_later’: include/xen/interface/version.h:72:2: error: implicit declaration of function ‘xen_domain’ [-Werror=implicit-function-declaration] Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/interface/version.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 96d8d3deea6..53553f04649 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -63,6 +63,9 @@ struct xen_feature_info { /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 +/* Declares the xen_domain() macros. */ +#include + /* Check if running on Xen version (major, minor) or later */ static inline bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) From 5af19e475fdc046a68be0c09cd53417ce73b8dcf Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 19 Nov 2012 16:52:30 +0530 Subject: [PATCH 07/15] xen/xenbus: Remove duplicate inclusion of asm/xen/hypervisor.h asm/xen/hypervisor.h was included twice. Signed-off-by: Sachin Kamat Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index acedeabe589..88e677b0de7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -48,7 +48,6 @@ #include #include #include "xenbus_comms.h" -#include struct xs_stored_msg { struct list_head list; From 394b40f62d7ae18a1c48c13fc483b8193f8c3a98 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 27 Nov 2012 11:39:40 -0500 Subject: [PATCH 08/15] xen/acpi: Move the xen_running_on_version_or_later function. As on ia64 builds we get: include/xen/interface/version.h: In function 'xen_running_on_version_or_later': include/xen/interface/version.h:76: error: implicit declaration of function 'HYPERVISOR_xen_version' We can later on make this function exportable if there are modules using part of it. For right now the only two users are built-in. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 15 +++++++++++++++ drivers/xen/xen-acpi-pad.c | 1 + include/xen/interface/version.h | 18 ------------------ include/xen/xen-ops.h | 1 + 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index eb0edff5499..3325cd9f779 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -223,6 +223,21 @@ static void __init xen_banner(void) version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +/* Check if running on Xen version (major, minor) or later */ +bool +xen_running_on_version_or_later(unsigned int major, unsigned int minor) +{ + unsigned int version; + + if (!xen_domain()) + return false; + + version = HYPERVISOR_xen_version(XENVER_version, NULL); + if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || + ((version >> 16) > major)) + return true; + return false; +} #define CPUID_THERM_POWER_LEAF 6 #define APERFMPERF_PRESENT 0 diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index f23ecf38008..da39191e727 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -20,6 +20,7 @@ #include #include #include +#include #define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 53553f04649..7ff6498679a 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -63,22 +63,4 @@ struct xen_feature_info { /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 -/* Declares the xen_domain() macros. */ -#include - -/* Check if running on Xen version (major, minor) or later */ -static inline bool -xen_running_on_version_or_later(unsigned int major, unsigned int minor) -{ - unsigned int version; - - if (!xen_domain()) - return false; - - version = HYPERVISOR_xen_version(XENVER_version, NULL); - if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || - ((version >> 16) > major)) - return true; - return false; -} #endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 6a198e46ab6..6170abd53d0 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -29,4 +29,5 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long mfn, int nr, pgprot_t prot, unsigned domid); +bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); #endif /* INCLUDE_XEN_OPS_H */ From 9a032e393a8bc888a9b0c898cbdb9db2cee7b536 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Oct 2012 13:37:49 -0700 Subject: [PATCH 09/15] xen: add pages parameter to xen_remap_domain_mfn_range Also introduce xen_unmap_domain_mfn_range. These are the parts of Mukesh's "xen/pvh: Implement MMU changes for PVH" which are also needed as a baseline for ARM privcmd support. The original patch was: Signed-off-by: Mukesh Rathor Signed-off-by: Konrad Rzeszutek Wilk This derivative is also: Signed-off-by: Ian Campbell --- arch/x86/xen/mmu.c | 15 ++++++++++++++- drivers/xen/privcmd.c | 5 +++-- include/xen/xen-ops.h | 5 ++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6226c99729b..0f6386a5b43 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2479,7 +2479,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) + pgprot_t prot, unsigned domid, + struct page **pages) + { struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; @@ -2523,3 +2525,14 @@ out: return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + +/* Returns: 0 success */ +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int numpgs, struct page **pages) +{ + if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 8adb9cc267f..b612267a8cb 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -178,7 +178,7 @@ static int mmap_mfn_range(void *data, void *state) msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, - st->domain); + st->domain, NULL); if (rc < 0) return rc; @@ -267,7 +267,8 @@ static int mmap_batch_fn(void *data, void *state) int ret; ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain); + st->vma->vm_page_prot, st->domain, + NULL); /* Store error code for second pass. */ *(st->err++) = ret; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 6a198e46ab6..990b43e441e 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -27,6 +27,9 @@ struct vm_area_struct; int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long mfn, int nr, - pgprot_t prot, unsigned domid); + pgprot_t prot, unsigned domid, + struct page **pages); +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int numpgs, struct page **pages); #endif /* INCLUDE_XEN_OPS_H */ From d71f513985c22f1050295d1a7e4327cf9fb060da Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Wed, 17 Oct 2012 17:11:21 -0700 Subject: [PATCH 10/15] xen: privcmd: support autotranslated physmap guests. PVH and ARM only support the batch interface. To map a foreign page to a process, the PFN must be allocated and the autotranslated path uses ballooning for that purpose. The returned PFN is then mapped to the foreign page. xen_unmap_domain_mfn_range() is introduced to unmap these pages via the privcmd close call. Acked-by: Stefano Stabellini Signed-off-by: Mukesh Rathor [v1: Fix up privcmd_close] Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Ian Campbell [v2: used for ARM too] --- drivers/xen/privcmd.c | 69 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index b612267a8cb..b9d08987a5a 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -33,11 +33,14 @@ #include #include #include +#include #include "privcmd.h" MODULE_LICENSE("GPL"); +#define PRIV_VMA_LOCKED ((void *)1) + #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif @@ -199,6 +202,10 @@ static long privcmd_ioctl_mmap(void __user *udata) if (!xen_initial_domain()) return -EPERM; + /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; @@ -246,6 +253,7 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; + int index; /* A tristate: * 0 for no errors * 1 if at least one error has happened (and no @@ -260,15 +268,24 @@ struct mmap_batch_state { xen_pfn_t __user *user_mfn; }; +/* auto translated dom0 note: if domU being created is PV, then mfn is + * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). + */ static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + struct vm_area_struct *vma = st->vma; + struct page **pages = vma->vm_private_data; + struct page *cur_page = NULL; int ret; + if (xen_feature(XENFEAT_auto_translated_physmap)) + cur_page = pages[st->index++]; + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, st->vma->vm_page_prot, st->domain, - NULL); + &cur_page); /* Store error code for second pass. */ *(st->err++) = ret; @@ -304,6 +321,32 @@ static int mmap_return_errors_v1(void *data, void *state) return __put_user(*mfnp, st->user_mfn++); } +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update + * the vma with the page info to use later. + * Returns: 0 if success, otherwise -errno + */ +static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) +{ + int rc; + struct page **pages; + + pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; + + rc = alloc_xenballooned_pages(numpgs, pages, 0); + if (rc != 0) { + pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, + numpgs, rc); + kfree(pages); + return -ENOMEM; + } + BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); + vma->vm_private_data = pages; + + return 0; +} + static struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata, int version) @@ -371,10 +414,18 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) up_write(&mm->mmap_sem); goto out; } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = alloc_empty_pages(vma, m.num); + if (ret < 0) { + up_write(&mm->mmap_sem); + goto out; + } + } state.domain = m.dom; state.vma = vma; state.va = m.addr; + state.index = 0; state.global_error = 0; state.err = err_array; @@ -439,6 +490,19 @@ static long privcmd_ioctl(struct file *file, return ret; } +static void privcmd_close(struct vm_area_struct *vma) +{ + struct page **pages = vma->vm_private_data; + int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages)) + return; + + xen_unmap_domain_mfn_range(vma, numpgs, pages); + free_xenballooned_pages(numpgs, pages); + kfree(pages); +} + static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -449,6 +513,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } static struct vm_operations_struct privcmd_vm_ops = { + .close = privcmd_close, .fault = privcmd_fault }; @@ -466,7 +531,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { - return (xchg(&vma->vm_private_data, (void *)1) == NULL); + return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); } const struct file_operations xen_privcmd_fops = { From c2374bf57e7861039bb129ead538515502ef7860 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Oct 2012 12:17:50 +0100 Subject: [PATCH 11/15] xen: balloon: allow PVMMU interfaces to be compiled out The ARM platform has no concept of PVMMU and therefor no HYPERVISOR_update_va_mapping et al. Allow this code to be compiled out when not required. In some similar situations (e.g. P2M) we have defined dummy functions to avoid this, however I think we can/should draw the line at dummying out actual hypercalls. Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Kconfig | 1 + drivers/xen/Kconfig | 3 +++ drivers/xen/balloon.c | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index fdce49c7aff..c31ee77e1ec 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,6 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK + select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_CMPXCHG && X86_TSC help diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 126d8ce591c..cabfa97f467 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -206,4 +206,7 @@ config XEN_MCE_LOG Allow kernel fetching MCE error from Xen platform and converting it into Linux mcelog format for mcelog tools +config XEN_HAVE_PVMMU + bool + endmenu diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d6886d90ccf..a56776dbe09 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -359,6 +359,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) set_phys_to_machine(pfn, frame_list[i]); +#ifdef CONFIG_XEN_HAVE_PVMMU /* Link back into the page tables if not highmem. */ if (xen_pv_domain() && !PageHighMem(page)) { int ret; @@ -368,6 +369,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) 0); BUG_ON(ret); } +#endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); @@ -416,13 +418,14 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) scrub_page(page); +#ifdef CONFIG_XEN_HAVE_PVMMU if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } - +#endif } /* Ensure that ballooned highmem pages don't have kmaps. */ From c61ba7291b81ed7fd8c1dba1b45d05cae3f150f7 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Oct 2012 12:28:26 +0100 Subject: [PATCH 12/15] xen: arm: enable balloon driver The code is now in a state where can just enable it. Drop the *_xenballloned_pages duplicates since these are now supplied by the balloon code. Signed-off-by: Ian Campbell Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/enlighten.c | 23 +++++------------------ drivers/xen/Makefile | 4 ++-- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 59bcb96ac36..ba5cc134a7d 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,10 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); +/* These are unused until we support booting "pre-ballooned" */ +unsigned long xen_released_pages; +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; + /* TODO: to be removed */ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); @@ -148,21 +153,3 @@ static int __init xen_init_events(void) return 0; } postcore_initcall(xen_init_events); - -/* XXX: only until balloon is properly working */ -int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) -{ - *pages = alloc_pages(highmem ? GFP_HIGHUSER : GFP_KERNEL, - get_order(nr_pages)); - if (*pages == NULL) - return -ENOMEM; - return 0; -} -EXPORT_SYMBOL_GPL(alloc_xenballooned_pages); - -void free_xenballooned_pages(int nr_pages, struct page **pages) -{ - kfree(*pages); - *pages = NULL; -} -EXPORT_SYMBOL_GPL(free_xenballooned_pages); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0e863703545..909bb56e3ad 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,8 +1,8 @@ ifneq ($(CONFIG_ARM),y) -obj-y += manage.o balloon.o +obj-y += manage.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o endif -obj-y += grant-table.o features.o events.o +obj-y += grant-table.o features.o events.o balloon.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) From 7892f6928d0cd9ef9200a193183c2033b3143dab Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Oct 2012 17:19:15 +0100 Subject: [PATCH 13/15] xen: correctly use xen_pfn_t in remap_domain_mfn_range. For Xen on ARM a PFN is 64 bits so we need to use the appropriate type here. Signed-off-by: Ian Campbell Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk [v2: include the necessary header, Reported-by: Fengguang Wu ] --- arch/x86/xen/mmu.c | 2 +- include/xen/xen-ops.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0f6386a5b43..fd8393f5506 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2478,7 +2478,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, + xen_pfn_t mfn, int nr, pgprot_t prot, unsigned domid, struct page **pages) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 990b43e441e..a50e7bee891 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -2,6 +2,7 @@ #define INCLUDE_XEN_OPS_H #include +#include DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -26,7 +27,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); struct vm_area_struct; int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, + xen_pfn_t mfn, int nr, pgprot_t prot, unsigned domid, struct page **pages); int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, From f832da068b0aadb15f747f6427b6bf945f525ba4 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Oct 2012 16:37:09 +0100 Subject: [PATCH 14/15] xen: arm: implement remap interfaces needed for privcmd mappings. We use XENMEM_add_to_physmap_range which is the preferred interface for foreign mappings. Acked-by: Mukesh Rathor Acked-by: Stefano Stabellini Signed-off-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/include/asm/xen/interface.h | 1 + arch/arm/xen/enlighten.c | 100 ++++++++++++++++++++++++++- arch/x86/include/asm/xen/interface.h | 1 + include/xen/interface/memory.h | 44 +++++++++++- 4 files changed, 141 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 5000397134b..1151188bcd8 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -49,6 +49,7 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 1 diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index ba5cc134a7d..f28fc1ac876 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,8 @@ #include #include +#include + struct start_info _xen_start_info; struct start_info *xen_start_info = &_xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -43,15 +46,106 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); static __read_mostly int xen_events_irq = -1; +/* map fgmfn of domid to lpfn in the current domain */ +static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = domid, + .size = 1, + .space = XENMAPSPACE_gmfn_foreign, + }; + xen_ulong_t idx = fgmfn; + xen_pfn_t gpfn = lpfn; + + set_xen_guest_handle(xatp.idxs, &idx); + set_xen_guest_handle(xatp.gpfns, &gpfn); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + if (rc) { + pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", + rc, lpfn, fgmfn); + return 1; + } + return 0; +} + +struct remap_data { + xen_pfn_t fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + struct vm_area_struct *vma; + int index; + struct page **pages; + struct xen_remap_mfn_info *info; +}; + +static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct remap_data *info = data; + struct page *page = info->pages[info->index++]; + unsigned long pfn = page_to_pfn(page); + pte_t pte = pfn_pte(pfn, info->prot); + + if (map_foreign_page(pfn, info->fgmfn, info->domid)) + return -EFAULT; + set_pte_at(info->vma->vm_mm, addr, ptep, pte); + + return 0; +} + int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) { - return -ENOSYS; + int err; + struct remap_data data; + + /* TBD: Batching, current sole caller only does page at a time */ + if (nr > 1) + return -EINVAL; + + data.fgmfn = mfn; + data.prot = prot; + data.domid = domid; + data.vma = vma; + data.index = 0; + data.pages = pages; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + remap_pte_fn, &data); + return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + int i; + + for (i = 0; i < nr; i++) { + struct xen_remove_from_physmap xrp; + unsigned long rc, pfn; + + pfn = page_to_pfn(pages[i]); + + xrp.domid = DOMID_SELF; + xrp.gpfn = pfn; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) { + pr_warn("Failed to unmap pfn:%lx rc:%ld\n", + pfn, rc); + return rc; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); + /* * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 54d52ff1304..fd9cb7695b5 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -63,6 +63,7 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); #endif #ifndef HYPERVISOR_VIRT_START diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 90712e2072d..b40a4315cb8 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -153,6 +153,14 @@ struct xen_machphys_mapping { }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_range only. + */ + /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. @@ -167,8 +175,6 @@ struct xen_add_to_physmap { uint16_t size; /* Source mapping space. */ -#define XENMAPSPACE_shared_info 0 /* shared info page */ -#define XENMAPSPACE_grant_table 1 /* grant table page */ unsigned int space; /* Index into source mapping space. */ @@ -182,6 +188,24 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /*** REMOVED ***/ /*#define XENMEM_translate_gpfn_list 8*/ +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { + /* Which domain to change the mapping for. */ + domid_t domid; + uint16_t space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + uint16_t size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + GUEST_HANDLE(xen_ulong_t) idxs; + + /* GPFN in domid where the source mapping page should appear. */ + GUEST_HANDLE(xen_pfn_t) gpfns; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); + /* * Returns the pseudo-physical memory map as it was when the domain * was started (specified by XENMEM_set_memory_map). @@ -217,4 +241,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); * during a driver critical region. */ extern spinlock_t xen_reservation_lock; + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ From a7be94ac8d69c037d08f0fd94b45a593f1d45176 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 29 Nov 2012 15:32:26 +0100 Subject: [PATCH 15/15] xen/PVonHVM: fix compile warning in init_hvm_pv_info After merging the xen-two tree, today's linux-next build (x86_64 allmodconfig) produced this warning: arch/x86/xen/enlighten.c: In function 'init_hvm_pv_info': arch/x86/xen/enlighten.c:1617:16: warning: unused variable 'ebx' [-Wunused-variable] arch/x86/xen/enlighten.c:1617:11: warning: unused variable 'eax' [-Wunused-variable] Introduced by commit 9d02b43dee0d ("xen PVonHVM: use E820_Reserved area for shared_info"). Signed-off-by: Olaf Hering Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3325cd9f779..3aeaa933b52 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1578,7 +1578,7 @@ static void __init xen_hvm_init_shared_info(void) static void __init init_hvm_pv_info(void) { - uint32_t eax, ebx, ecx, edx, pages, msr, base; + uint32_t ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base();