dect
/
linux-2.6
Archived
13
0
Fork 0
This repository has been archived on 2022-02-17. You can view files and clone it, but cannot push or open issues or pull requests.
linux-2.6/arch/x86/include/asm/pgtable_types.h

359 lines
12 KiB
C
Raw Normal View History

#ifndef _ASM_X86_PGTABLE_DEFS_H
#define _ASM_X86_PGTABLE_DEFS_H
#include <linux/const.h>
#include <asm/page_types.h>
#define FIRST_USER_ADDRESS 0
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#define _PAGE_BIT_PWT 3 /* page write through */
#define _PAGE_BIT_PCD 4 /* page cache disabled */
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
/* If _PAGE_BIT_PRESENT is clear, we use these: */
/* - if the user mapped it with PROT_NONE; pte_present gives true */
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
/* - set: nonlinear file mapping, saved PTE; unset:swap */
#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
#define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING)
#define __HAVE_ARCH_PTE_SPECIAL
#ifdef CONFIG_KMEMCHECK
#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
#else
#define _PAGE_HIDDEN (_AT(pteval_t, 0))
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else
#define _PAGE_NX (_AT(pteval_t, 0))
#endif
#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
mm: numa: define _PAGE_NUMA The objective of _PAGE_NUMA is to be able to trigger NUMA hinting page faults to identify the per NUMA node working set of the thread at runtime. Arming the NUMA hinting page fault mechanism works similarly to setting up a mprotect(PROT_NONE) virtual range: the present bit is cleared at the same time that _PAGE_NUMA is set, so when the fault triggers we can identify it as a NUMA hinting page fault. _PAGE_NUMA on x86 shares the same bit number of _PAGE_PROTNONE (but it could also use a different bitflag, it's up to the architecture to decide). It would be confusing to call the "NUMA hinting page faults" as "do_prot_none faults". They're different events and _PAGE_NUMA doesn't alter the semantics of mprotect(PROT_NONE) in any way. Sharing the same bitflag with _PAGE_PROTNONE in fact complicates things: it requires us to ensure the code paths executed by _PAGE_PROTNONE remains mutually exclusive to the code paths executed by _PAGE_NUMA at all times, to avoid _PAGE_NUMA and _PAGE_PROTNONE to step into each other toes. Because we want to be able to set this bitflag in any established pte or pmd (while clearing the present bit at the same time) without losing information, this bitflag must never be set when the pte and pmd are present, so the bitflag picked for _PAGE_NUMA usage, must not be used by the swap entry format. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com>
2012-10-03 23:50:46 +00:00
/*
* _PAGE_NUMA indicates that this page will trigger a numa hinting
* minor page fault to gather numa placement statistics (see
* pte_numa()). The bit picked (8) is within the range between
* _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
* require changes to the swp entry format because that bit is always
* zero when the pte is not present.
*
* The bit picked must be always zero when the pmd is present and not
* present, so that we don't lose information when we set it while
* atomically clearing the present bit.
*
* Because we shared the same bit (8) with _PAGE_PROTNONE this can be
* interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
* couldn't reach, like handle_mm_fault() (see access_error in
* arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
* handle_mm_fault() to be invoked).
*/
#define _PAGE_NUMA _PAGE_PROTNONE
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
_PAGE_DIRTY)
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
#define _PAGE_CACHE_WB (0)
#define _PAGE_CACHE_WC (_PAGE_PWT)
#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
_PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED)
#define PAGE_COPY PAGE_COPY_NOEXEC
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_NX)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
_PAGE_ACCESSED)
#define __PAGE_KERNEL_EXEC \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR)
#define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE)
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
/* xwr */
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY
#define __P100 PAGE_READONLY_EXEC
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY_EXEC
#define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
#define __S100 PAGE_READONLY_EXEC
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
/*
* early identity mapping pte attrib macros.
*/
#ifdef CONFIG_X86_64
#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
#else
/*
* For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
* bits are combined, this will alow user to access the high address mapped
* VDSO in the presence of CONFIG_COMPAT_VDSO
*/
#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
#endif
#ifdef CONFIG_X86_32
# include <asm/pgtable_32_types.h>
#else
# include <asm/pgtable_64_types.h>
#endif
#ifndef __ASSEMBLY__
#include <linux/types.h>
/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
static inline pgd_t native_make_pgd(pgdval_t val)
{
return (pgd_t) { val };
}
static inline pgdval_t native_pgd_val(pgd_t pgd)
{
return pgd.pgd;
}
static inline pgdval_t pgd_flags(pgd_t pgd)
{
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
#if PAGETABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
{
return (pud_t) { val };
}
static inline pudval_t native_pud_val(pud_t pud)
{
return pud.pud;
}
#else
#include <asm-generic/pgtable-nopud.h>
static inline pudval_t native_pud_val(pud_t pud)
{
return native_pgd_val(pud.pgd);
}
#endif
#if PAGETABLE_LEVELS > 2
typedef struct { pmdval_t pmd; } pmd_t;
static inline pmd_t native_make_pmd(pmdval_t val)
{
return (pmd_t) { val };
}
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
return pmd.pmd;
}
#else
#include <asm-generic/pgtable-nopmd.h>
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
return native_pgd_val(pmd.pud.pgd);
}
#endif
static inline pudval_t pud_flags(pud_t pud)
{
return native_pud_val(pud) & PTE_FLAGS_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
return native_pmd_val(pmd) & PTE_FLAGS_MASK;
}
static inline pte_t native_make_pte(pteval_t val)
{
return (pte_t) { .pte = val };
}
static inline pteval_t native_pte_val(pte_t pte)
{
return pte.pte;
}
static inline pteval_t pte_flags(pte_t pte)
{
return native_pte_val(pte) & PTE_FLAGS_MASK;
}
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern void set_nx(void);
extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
/* Indicate that x86 has its own track and untrack pfn vma functions */
#define __HAVE_PFNMAP_TRACKING
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot);
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
x86,xen: introduce x86_init.mapping.pagetable_reserve Introduce a new x86_init hook called pagetable_reserve that at the end of init_memory_mapping is used to reserve a range of memory addresses for the kernel pagetable pages we used and free the other ones. On native it just calls memblock_x86_reserve_range while on xen it also takes care of setting the spare memory previously allocated for kernel pagetable pages from RO to RW, so that it can be used for other purposes. A detailed explanation of the reason why this hook is needed follows. As a consequence of the commit: commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e Author: Yinghai Lu <yinghai@kernel.org> Date: Fri Dec 17 16:58:28 2010 -0800 x86-64, mm: Put early page table high at some point init_memory_mapping is going to reach the pagetable pages area and map those pages too (mapping them as normal memory that falls in the range of addresses passed to init_memory_mapping as argument). Some of those pages are already pagetable pages (they are in the range pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and everything is fine. Some of these pages are not pagetable pages yet (they fall in the range pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they are going to be mapped RW. When these pages become pagetable pages and are hooked into the pagetable, xen will find that the guest has already a RW mapping of them somewhere and fail the operation. The reason Xen requires pagetables to be RO is that the hypervisor needs to verify that the pagetables are valid before using them. The validation operations are called "pinning" (more details in arch/x86/xen/mmu.c). In order to fix the issue we mark all the pages in the entire range pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation is completed only the range pgt_buf_start-pgt_buf_end is reserved by init_memory_mapping. Hence the kernel is going to crash as soon as one of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those ranges are RO). For this reason we need a hook to reserve the kernel pagetable pages we used and free the other ones so that they can be reused for other purposes. On native it just means calling memblock_x86_reserve_range, on Xen it also means marking RW the pagetable pages that we allocated before but that haven't been used before. Another way to fix this is without using the hook is by adding a 'if (xen_pv_domain)' in the 'init_memory_mapping' code and calling the Xen counterpart, but that is just nasty. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Acked-by: Yinghai Lu <yinghai@kernel.org> Acked-by: H. Peter Anvin <hpa@zytor.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2011-04-14 14:49:41 +00:00
extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_init(void);
#else
#define native_pagetable_init paging_init
#endif
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
enum {
PG_LEVEL_NONE,
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_NUM
};
#ifdef CONFIG_PROC_FS
extern void update_page_count(int level, unsigned long pages);
#else
static inline void update_page_count(int level, unsigned long pages) { }
#endif
/*
* Helper function that returns the kernel pagetable entry controlling
* the virtual address 'address'. NULL means no pagetable entry present.
* NOTE: the return type is pte_t but if the pmd is PSE then we return it
* as a pte too.
*/
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */