diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 5000397134b..1151188bcd8 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -49,6 +49,7 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 1 diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index f5760927544..7a32976fa2a 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -17,6 +19,8 @@ #include #include +#include + struct start_info _xen_start_info; struct start_info *xen_start_info = &_xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -29,6 +33,10 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); +/* These are unused until we support booting "pre-ballooned" */ +unsigned long xen_released_pages; +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; + /* TODO: to be removed */ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); @@ -38,15 +46,106 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); static __read_mostly int xen_events_irq = -1; +/* map fgmfn of domid to lpfn in the current domain */ +static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn, + unsigned int domid) +{ + int rc; + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = domid, + .size = 1, + .space = XENMAPSPACE_gmfn_foreign, + }; + xen_ulong_t idx = fgmfn; + xen_pfn_t gpfn = lpfn; + + set_xen_guest_handle(xatp.idxs, &idx); + set_xen_guest_handle(xatp.gpfns, &gpfn); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + if (rc) { + pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n", + rc, lpfn, fgmfn); + return 1; + } + return 0; +} + +struct remap_data { + xen_pfn_t fgmfn; /* foreign domain's gmfn */ + pgprot_t prot; + domid_t domid; + struct vm_area_struct *vma; + int index; + struct page **pages; + struct xen_remap_mfn_info *info; +}; + +static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct remap_data *info = data; + struct page *page = info->pages[info->index++]; + unsigned long pfn = page_to_pfn(page); + pte_t pte = pfn_pte(pfn, info->prot); + + if (map_foreign_page(pfn, info->fgmfn, info->domid)) + return -EFAULT; + set_pte_at(info->vma->vm_mm, addr, ptep, pte); + + return 0; +} + int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) { - return -ENOSYS; + int err; + struct remap_data data; + + /* TBD: Batching, current sole caller only does page at a time */ + if (nr > 1) + return -EINVAL; + + data.fgmfn = mfn; + data.prot = prot; + data.domid = domid; + data.vma = vma; + data.index = 0; + data.pages = pages; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + remap_pte_fn, &data); + return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int nr, struct page **pages) +{ + int i; + + for (i = 0; i < nr; i++) { + struct xen_remove_from_physmap xrp; + unsigned long rc, pfn; + + pfn = page_to_pfn(pages[i]); + + xrp.domid = DOMID_SELF; + xrp.gpfn = pfn; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) { + pr_warn("Failed to unmap pfn:%lx rc:%ld\n", + pfn, rc); + return rc; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); + /* * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. @@ -149,24 +248,6 @@ static int __init xen_init_events(void) } postcore_initcall(xen_init_events); -/* XXX: only until balloon is properly working */ -int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) -{ - *pages = alloc_pages(highmem ? GFP_HIGHUSER : GFP_KERNEL, - get_order(nr_pages)); - if (*pages == NULL) - return -ENOMEM; - return 0; -} -EXPORT_SYMBOL_GPL(alloc_xenballooned_pages); - -void free_xenballooned_pages(int nr_pages, struct page **pages) -{ - kfree(*pages); - *pages = NULL; -} -EXPORT_SYMBOL_GPL(free_xenballooned_pages); - /* In the hypervisor.S file. */ EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 54d52ff1304..fd9cb7695b5 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -63,6 +63,7 @@ DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); #endif #ifndef HYPERVISOR_VIRT_START diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9a6775c9ddc..131dacd2748 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,6 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK + select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_TSC help diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 586d83812b6..3aeaa933b52 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -223,6 +223,21 @@ static void __init xen_banner(void) version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +/* Check if running on Xen version (major, minor) or later */ +bool +xen_running_on_version_or_later(unsigned int major, unsigned int minor) +{ + unsigned int version; + + if (!xen_domain()) + return false; + + version = HYPERVISOR_xen_version(XENVER_version, NULL); + if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || + ((version >> 16) > major)) + return true; + return false; +} #define CPUID_THERM_POWER_LEAF 6 #define APERFMPERF_PRESENT 0 @@ -287,8 +302,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ - !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) +#ifdef CONFIG_ACPI struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -309,6 +323,13 @@ static bool __init xen_check_mwait(void) if (!xen_initial_domain()) return false; + /* + * When running under platform earlier than Xen4.2, do not expose + * mwait, to avoid the risk of loading native acpi pad driver + */ + if (!xen_running_on_version_or_later(4, 2)) + return false; + ax = 1; cx = 0; @@ -1495,51 +1516,72 @@ asmlinkage void __init xen_start_kernel(void) #endif } -void __ref xen_hvm_init_shared_info(void) -{ - int cpu; - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; +#ifdef CONFIG_XEN_PVHVM +#define HVM_SHARED_INFO_ADDR 0xFE700000UL +static struct shared_info *xen_hvm_shared_info; +static unsigned long xen_hvm_sip_phys; +static int xen_major, xen_minor; + +static void xen_hvm_connect_shared_info(unsigned long pfn) +{ + struct xen_add_to_physmap xatp; - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = pfn; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; +} +static void __init xen_hvm_set_shared_info(struct shared_info *sip) +{ + int cpu; + + HYPERVISOR_shared_info = sip; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { + * HVM. */ + for_each_online_cpu(cpu) per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - } } -#ifdef CONFIG_XEN_PVHVM +/* Reconnect the shared_info pfn to a (new) mfn */ +void xen_hvm_resume_shared_info(void) +{ + xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); +} + +/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. + * On these old tools the shared info page will be placed in E820_Ram. + * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects + * that nothing is mapped up to HVM_SHARED_INFO_ADDR. + * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used + * here for the shared info page. */ +static void __init xen_hvm_init_shared_info(void) +{ + if (xen_major < 4) { + xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); + xen_hvm_sip_phys = __pa(xen_hvm_shared_info); + } else { + xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; + set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); + xen_hvm_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + } + xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); + xen_hvm_set_shared_info(xen_hvm_shared_info); +} + static void __init init_hvm_pv_info(void) { - int major, minor; - uint32_t eax, ebx, ecx, edx, pages, msr, base; + uint32_t ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - major = eax >> 16; - minor = eax & 0xffff; - printk(KERN_INFO "Xen version %d.%d.\n", major, minor); - cpuid(base + 2, &pages, &msr, &ecx, &edx); pfn = __pa(hypercall_page); @@ -1590,12 +1632,22 @@ static void __init xen_hvm_guest_init(void) static bool __init xen_hvm_platform(void) { + uint32_t eax, ebx, ecx, edx, base; + if (xen_pv_domain()) return false; - if (!xen_cpuid_base()) + base = xen_cpuid_base(); + if (!base) return false; + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + xen_major = eax >> 16; + xen_minor = eax & 0xffff; + + printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); + return true; } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dcf5f2dd91e..01de35c7722 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2497,8 +2497,10 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid) + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) + { struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; @@ -2542,3 +2544,14 @@ out: return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + +/* Returns: 0 success */ +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int numpgs, struct page **pages) +{ + if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226..ae8a00c39de 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + xen_hvm_resume_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a95b41744ad..d2e73d19d36 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -40,7 +40,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_init_shared_info(void); +void xen_hvm_resume_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 126d8ce591c..cabfa97f467 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -206,4 +206,7 @@ config XEN_MCE_LOG Allow kernel fetching MCE error from Xen platform and converting it into Linux mcelog format for mcelog tools +config XEN_HAVE_PVMMU + bool + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 74354708c6c..fb213cf81a7 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,9 +1,9 @@ ifneq ($(CONFIG_ARM),y) -obj-y += manage.o balloon.o +obj-y += manage.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o endif obj-$(CONFIG_X86) += fallback.o -obj-y += grant-table.o features.o events.o +obj-y += grant-table.o features.o events.o balloon.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) @@ -11,7 +11,8 @@ CFLAGS_features.o := $(nostackp) dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o -dom0-$(CONFIG_ACPI) += acpi.o +dom0-$(CONFIG_ACPI) += acpi.o $(xen-pad-y) +xen-pad-$(CONFIG_X86) += xen-acpi-pad.o dom0-$(CONFIG_X86) += pcpu.o obj-$(CONFIG_XEN_DOM0) += $(dom0-y) obj-$(CONFIG_BLOCK) += biomerge.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d6886d90ccf..a56776dbe09 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -359,6 +359,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) set_phys_to_machine(pfn, frame_list[i]); +#ifdef CONFIG_XEN_HAVE_PVMMU /* Link back into the page tables if not highmem. */ if (xen_pv_domain() && !PageHighMem(page)) { int ret; @@ -368,6 +369,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages) 0); BUG_ON(ret); } +#endif /* Relinquish the page back to the allocator. */ ClearPageReserved(page); @@ -416,13 +418,14 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) scrub_page(page); +#ifdef CONFIG_XEN_HAVE_PVMMU if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } - +#endif } /* Ensure that ballooned highmem pages don't have kmaps. */ diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 71f5c459b08..0bbbccbb1f1 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -33,11 +33,14 @@ #include #include #include +#include #include "privcmd.h" MODULE_LICENSE("GPL"); +#define PRIV_VMA_LOCKED ((void *)1) + #ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); #endif @@ -178,7 +181,7 @@ static int mmap_mfn_range(void *data, void *state) msg->va & PAGE_MASK, msg->mfn, msg->npages, vma->vm_page_prot, - st->domain); + st->domain, NULL); if (rc < 0) return rc; @@ -199,6 +202,10 @@ static long privcmd_ioctl_mmap(void __user *udata) if (!xen_initial_domain()) return -EPERM; + /* We only support privcmd_ioctl_mmap_batch for auto translated. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; @@ -246,6 +253,7 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; + int index; /* A tristate: * 0 for no errors * 1 if at least one error has happened (and no @@ -260,14 +268,24 @@ struct mmap_batch_state { xen_pfn_t __user *user_mfn; }; +/* auto translated dom0 note: if domU being created is PV, then mfn is + * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP). + */ static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + struct vm_area_struct *vma = st->vma; + struct page **pages = vma->vm_private_data; + struct page *cur_page = NULL; int ret; + if (xen_feature(XENFEAT_auto_translated_physmap)) + cur_page = pages[st->index++]; + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain); + st->vma->vm_page_prot, st->domain, + &cur_page); /* Store error code for second pass. */ *(st->err++) = ret; @@ -303,6 +321,32 @@ static int mmap_return_errors_v1(void *data, void *state) return __put_user(*mfnp, st->user_mfn++); } +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update + * the vma with the page info to use later. + * Returns: 0 if success, otherwise -errno + */ +static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) +{ + int rc; + struct page **pages; + + pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; + + rc = alloc_xenballooned_pages(numpgs, pages, 0); + if (rc != 0) { + pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, + numpgs, rc); + kfree(pages); + return -ENOMEM; + } + BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); + vma->vm_private_data = pages; + + return 0; +} + static struct vm_operations_struct privcmd_vm_ops; static long privcmd_ioctl_mmap_batch(void __user *udata, int version) @@ -370,10 +414,18 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) ret = -EINVAL; goto out; } + if (xen_feature(XENFEAT_auto_translated_physmap)) { + ret = alloc_empty_pages(vma, m.num); + if (ret < 0) { + up_write(&mm->mmap_sem); + goto out; + } + } state.domain = m.dom; state.vma = vma; state.va = m.addr; + state.index = 0; state.global_error = 0; state.err = err_array; @@ -442,6 +494,19 @@ static long privcmd_ioctl(struct file *file, return ret; } +static void privcmd_close(struct vm_area_struct *vma) +{ + struct page **pages = vma->vm_private_data; + int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages)) + return; + + xen_unmap_domain_mfn_range(vma, numpgs, pages); + free_xenballooned_pages(numpgs, pages); + kfree(pages); +} + static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -452,6 +517,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } static struct vm_operations_struct privcmd_vm_ops = { + .close = privcmd_close, .fault = privcmd_fault }; @@ -469,7 +535,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { - return (xchg(&vma->vm_private_data, (void *)1) == NULL); + return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); } const struct file_operations xen_privcmd_fops = { diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c new file mode 100644 index 00000000000..da39191e727 --- /dev/null +++ b/drivers/xen/xen-acpi-pad.c @@ -0,0 +1,182 @@ +/* + * xen-acpi-pad.c - Xen pad interface + * + * Copyright (c) 2012, Intel Corporation. + * Author: Liu, Jinsong + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" +#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" +#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 +static DEFINE_MUTEX(xen_cpu_lock); + +static int xen_acpi_pad_idle_cpus(unsigned int idle_nums) +{ + struct xen_platform_op op; + + op.cmd = XENPF_core_parking; + op.u.core_parking.type = XEN_CORE_PARKING_SET; + op.u.core_parking.idle_nums = idle_nums; + + return HYPERVISOR_dom0_op(&op); +} + +static int xen_acpi_pad_idle_cpus_num(void) +{ + struct xen_platform_op op; + + op.cmd = XENPF_core_parking; + op.u.core_parking.type = XEN_CORE_PARKING_GET; + + return HYPERVISOR_dom0_op(&op) + ?: op.u.core_parking.idle_nums; +} + +/* + * Query firmware how many CPUs should be idle + * return -1 on failure + */ +static int acpi_pad_pur(acpi_handle handle) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *package; + int num = -1; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer))) + return num; + + if (!buffer.length || !buffer.pointer) + return num; + + package = buffer.pointer; + + if (package->type == ACPI_TYPE_PACKAGE && + package->package.count == 2 && + package->package.elements[0].integer.value == 1) /* rev 1 */ + num = package->package.elements[1].integer.value; + + kfree(buffer.pointer); + return num; +} + +/* Notify firmware how many CPUs are idle */ +static void acpi_pad_ost(acpi_handle handle, int stat, + uint32_t idle_nums) +{ + union acpi_object params[3] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_BUFFER,}, + }; + struct acpi_object_list arg_list = {3, params}; + + params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY; + params[1].integer.value = stat; + params[2].buffer.length = 4; + params[2].buffer.pointer = (void *)&idle_nums; + acpi_evaluate_object(handle, "_OST", &arg_list, NULL); +} + +static void acpi_pad_handle_notify(acpi_handle handle) +{ + int idle_nums; + + mutex_lock(&xen_cpu_lock); + idle_nums = acpi_pad_pur(handle); + if (idle_nums < 0) { + mutex_unlock(&xen_cpu_lock); + return; + } + + idle_nums = xen_acpi_pad_idle_cpus(idle_nums) + ?: xen_acpi_pad_idle_cpus_num(); + if (idle_nums >= 0) + acpi_pad_ost(handle, 0, idle_nums); + mutex_unlock(&xen_cpu_lock); +} + +static void acpi_pad_notify(acpi_handle handle, u32 event, + void *data) +{ + switch (event) { + case ACPI_PROCESSOR_AGGREGATOR_NOTIFY: + acpi_pad_handle_notify(handle); + break; + default: + pr_warn("Unsupported event [0x%x]\n", event); + break; + } +} + +static int acpi_pad_add(struct acpi_device *device) +{ + acpi_status status; + + strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS); + + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, acpi_pad_notify, device); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return 0; +} + +static int acpi_pad_remove(struct acpi_device *device, + int type) +{ + mutex_lock(&xen_cpu_lock); + xen_acpi_pad_idle_cpus(0); + mutex_unlock(&xen_cpu_lock); + + acpi_remove_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, acpi_pad_notify); + return 0; +} + +static const struct acpi_device_id pad_device_ids[] = { + {"ACPI000C", 0}, + {"", 0}, +}; + +static struct acpi_driver acpi_pad_driver = { + .name = "processor_aggregator", + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, + .ids = pad_device_ids, + .ops = { + .add = acpi_pad_add, + .remove = acpi_pad_remove, + }, +}; + +static int __init xen_acpi_pad_init(void) +{ + /* Only DOM0 is responsible for Xen acpi pad */ + if (!xen_initial_domain()) + return -ENODEV; + + /* Only Xen4.2 or later support Xen acpi pad */ + if (!xen_running_on_version_or_later(4, 2)) + return -ENODEV; + + return acpi_bus_register_driver(&acpi_pad_driver); +} +subsys_initcall(xen_acpi_pad_init); diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 68dcc59cd28..cd50d251998 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -142,7 +142,8 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus, if (psdev->dev != NULL && domain == pci_domain_nr(psdev->dev->bus) && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + && slot == PCI_SLOT(psdev->dev->devfn) + && func == PCI_FUNC(psdev->dev->devfn)) { pcistub_device_get(psdev); goto out; } @@ -191,7 +192,8 @@ struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, if (psdev->dev != NULL && domain == pci_domain_nr(psdev->dev->bus) && bus == psdev->dev->bus->number - && PCI_DEVFN(slot, func) == psdev->dev->devfn) { + && slot == PCI_SLOT(psdev->dev->devfn) + && func == PCI_FUNC(psdev->dev->devfn)) { found_dev = pcistub_device_get_pci_dev(pdev, psdev); break; } @@ -897,42 +899,35 @@ static struct pci_driver xen_pcibk_pci_driver = { static inline int str_to_slot(const char *buf, int *domain, int *bus, int *slot, int *func) { - int err; - char wc = '*'; + int parsed = 0; - err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - switch (err) { + switch (sscanf(buf, " %x:%x:%x.%x %n", domain, bus, slot, func, + &parsed)) { case 3: *func = -1; - err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); + sscanf(buf, " %x:%x:%x.* %n", domain, bus, slot, &parsed); break; case 2: *slot = *func = -1; - err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); - if (err >= 2) - ++err; + sscanf(buf, " %x:%x:*.* %n", domain, bus, &parsed); break; } - if (err == 4 && wc == '*') + if (parsed && !buf[parsed]) return 0; - else if (err < 0) - return -EINVAL; /* try again without domain */ *domain = 0; - wc = '*'; - err = sscanf(buf, " %x:%x.%x", bus, slot, func); - switch (err) { + switch (sscanf(buf, " %x:%x.%x %n", bus, slot, func, &parsed)) { case 2: *func = -1; - err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); + sscanf(buf, " %x:%x.* %n", bus, slot, &parsed); break; case 1: *slot = *func = -1; - err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; + sscanf(buf, " %x:*.* %n", bus, &parsed); break; } - if (err == 3 && wc == '*') + if (parsed && !buf[parsed]) return 0; return -EINVAL; @@ -941,13 +936,20 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus, static inline int str_to_quirk(const char *buf, int *domain, int *bus, int *slot, int *func, int *reg, int *size, int *mask) { - int err; + int parsed = 0; - err = - sscanf(buf, " %04x:%02x:%02x.%d-%08x:%1x:%08x", domain, bus, slot, - func, reg, size, mask); - if (err == 7) + sscanf(buf, " %x:%x:%x.%x-%x:%x:%x %n", domain, bus, slot, func, + reg, size, mask, &parsed); + if (parsed && !buf[parsed]) return 0; + + /* try again without domain */ + *domain = 0; + sscanf(buf, " %x:%x.%x-%x:%x:%x %n", bus, slot, func, reg, size, + mask, &parsed); + if (parsed && !buf[parsed]) + return 0; + return -EINVAL; } @@ -955,7 +957,7 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id; unsigned long flags; - int rc = 0; + int rc = 0, devfn = PCI_DEVFN(slot, func); if (slot < 0) { for (slot = 0; !rc && slot < 32; ++slot) @@ -969,13 +971,24 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) return rc; } + if (( +#if !defined(MODULE) /* pci_domains_supported is not being exported */ \ + || !defined(CONFIG_PCI_DOMAINS) + !pci_domains_supported ? domain : +#endif + domain < 0 || domain > 0xffff) + || bus < 0 || bus > 0xff + || PCI_SLOT(devfn) != slot + || PCI_FUNC(devfn) != func) + return -EINVAL; + pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) return -ENOMEM; pci_dev_id->domain = domain; pci_dev_id->bus = bus; - pci_dev_id->devfn = PCI_DEVFN(slot, func); + pci_dev_id->devfn = devfn; pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%d\n", domain, bus, slot, func); @@ -1016,14 +1029,18 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func) return err; } -static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, - int size, int mask) +static int pcistub_reg_add(int domain, int bus, int slot, int func, + unsigned int reg, unsigned int size, + unsigned int mask) { int err = 0; struct pcistub_device *psdev; struct pci_dev *dev; struct config_field *field; + if (reg > 0xfff || (size < 4 && (mask >> (size * 8)))) + return -EINVAL; + psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; @@ -1254,13 +1271,11 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, int err; struct pcistub_device *psdev; struct xen_pcibk_dev_data *dev_data; + err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; - if (slot < 0 || func < 0) { - err = -EINVAL; - goto out; - } + psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; @@ -1339,8 +1354,6 @@ static int __init pcistub_init(void) if (pci_devs_to_hide && *pci_devs_to_hide) { do { - char wc = '*'; - parsed = 0; err = sscanf(pci_devs_to_hide + pos, @@ -1349,51 +1362,48 @@ static int __init pcistub_init(void) switch (err) { case 3: func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x:%x.%c) %n", - &domain, &bus, &slot, &wc, - &parsed); + sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.*) %n", + &domain, &bus, &slot, &parsed); break; case 2: slot = func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x:*.%c) %n", - &domain, &bus, &wc, &parsed) + 1; + sscanf(pci_devs_to_hide + pos, + " (%x:%x:*.*) %n", + &domain, &bus, &parsed); break; } - if (err != 4 || wc != '*') { + if (!parsed) { domain = 0; - wc = '*'; err = sscanf(pci_devs_to_hide + pos, " (%x:%x.%x) %n", &bus, &slot, &func, &parsed); switch (err) { case 2: func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:%x.%c) %n", - &bus, &slot, &wc, - &parsed); + sscanf(pci_devs_to_hide + pos, + " (%x:%x.*) %n", + &bus, &slot, &parsed); break; case 1: slot = func = -1; - err = sscanf(pci_devs_to_hide + pos, - " (%x:*.%c) %n", - &bus, &wc, &parsed) + 1; + sscanf(pci_devs_to_hide + pos, + " (%x:*.*) %n", + &bus, &parsed); break; } - if (err != 3 || wc != '*') - goto parse_error; } + if (parsed <= 0) + goto parse_error; + err = pcistub_device_id_add(domain, bus, slot, func); if (err) goto out; - /* if parsed<=0, we've reached the end of the string */ pos += parsed; - } while (parsed > 0 && pci_devs_to_hide[pos]); + } while (pci_devs_to_hide[pos]); } /* If we're the first PCI Device Driver to register, we're the diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index acedeabe589..88e677b0de7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -48,7 +48,6 @@ #include #include #include "xenbus_comms.h" -#include struct xs_stored_msg { struct list_head list; diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 90712e2072d..b40a4315cb8 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -153,6 +153,14 @@ struct xen_machphys_mapping { }; DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_range only. + */ + /* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. @@ -167,8 +175,6 @@ struct xen_add_to_physmap { uint16_t size; /* Source mapping space. */ -#define XENMAPSPACE_shared_info 0 /* shared info page */ -#define XENMAPSPACE_grant_table 1 /* grant table page */ unsigned int space; /* Index into source mapping space. */ @@ -182,6 +188,24 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /*** REMOVED ***/ /*#define XENMEM_translate_gpfn_list 8*/ +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { + /* Which domain to change the mapping for. */ + domid_t domid; + uint16_t space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + uint16_t size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + GUEST_HANDLE(xen_ulong_t) idxs; + + /* GPFN in domid where the source mapping page should appear. */ + GUEST_HANDLE(xen_pfn_t) gpfns; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); + /* * Returns the pseudo-physical memory map as it was when the domain * was started (specified by XENMEM_set_memory_map). @@ -217,4 +241,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); * during a driver critical region. */ extern spinlock_t xen_reservation_lock; + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 4755b5fac9c..5e36932ab40 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -324,6 +324,22 @@ struct xenpf_cpu_ol { }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); +/* + * CMD 58 and 59 are reserved for cpu hotadd and memory hotadd, + * which are already occupied at Xen hypervisor side. + */ +#define XENPF_core_parking 60 +struct xenpf_core_parking { + /* IN variables */ +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -341,6 +357,7 @@ struct xen_platform_op { struct xenpf_set_processor_pminfo set_pminfo; struct xenpf_pcpuinfo pcpu_info; struct xenpf_cpu_ol cpu_ol; + struct xenpf_core_parking core_parking; uint8_t pad[128]; } u; }; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 6a198e46ab6..d6fe062cad6 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -2,6 +2,7 @@ #define INCLUDE_XEN_OPS_H #include +#include DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -26,7 +27,11 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); struct vm_area_struct; int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long mfn, int nr, - pgprot_t prot, unsigned domid); + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages); +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, + int numpgs, struct page **pages); +bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); #endif /* INCLUDE_XEN_OPS_H */