diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index cbeb3616a28..0086b472bc2 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -63,4 +63,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* __ALPHA_MMAN_H__ */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 46d3da0d4b9..9a936ac9a94 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -87,4 +87,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* _ASM_MMAN_H */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 12219ebce86..294d251ca7b 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -70,4 +70,15 @@ #define MAP_FILE 0 #define MAP_VARIABLE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* __PARISC_MMAN_H__ */ diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 593e51d4643..513b05f15bb 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -3,6 +3,9 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) + #include #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 25bc6c1309c..00eed6786d7 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -93,4 +93,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* _XTENSA_MMAN_H */ diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c5bc355d824..21b8a487523 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -923,7 +923,7 @@ static struct file_system_type hugetlbfs_fs_type = { .kill_sb = kill_litter_super, }; -static struct vfsmount *hugetlbfs_vfsmount; +static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; static int can_do_hugetlb_shm(void) { @@ -932,9 +932,22 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } +static int get_hstate_idx(int page_size_log) +{ + struct hstate *h; + + if (!page_size_log) + return default_hstate_idx; + h = size_to_hstate(1 << page_size_log); + if (!h) + return -1; + return h - hstates; +} + struct file *hugetlb_file_setup(const char *name, unsigned long addr, size_t size, vm_flags_t acctflag, - struct user_struct **user, int creat_flags) + struct user_struct **user, + int creat_flags, int page_size_log) { int error = -ENOMEM; struct file *file; @@ -944,9 +957,14 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, struct qstr quick_string; struct hstate *hstate; unsigned long num_pages; + int hstate_idx; + + hstate_idx = get_hstate_idx(page_size_log); + if (hstate_idx < 0) + return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount) + if (!hugetlbfs_vfsmount[hstate_idx]) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { @@ -963,7 +981,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, } } - root = hugetlbfs_vfsmount->mnt_root; + root = hugetlbfs_vfsmount[hstate_idx]->mnt_root; quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; @@ -971,7 +989,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, if (!path.dentry) goto out_shm_unlock; - path.mnt = mntget(hugetlbfs_vfsmount); + path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); error = -ENOSPC; inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) @@ -1011,8 +1029,9 @@ out_shm_unlock: static int __init init_hugetlbfs_fs(void) { + struct hstate *h; int error; - struct vfsmount *vfsmount; + int i; error = bdi_init(&hugetlbfs_backing_dev_info); if (error) @@ -1029,14 +1048,26 @@ static int __init init_hugetlbfs_fs(void) if (error) goto out; - vfsmount = kern_mount(&hugetlbfs_fs_type); + i = 0; + for_each_hstate(h) { + char buf[50]; + unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - if (!IS_ERR(vfsmount)) { - hugetlbfs_vfsmount = vfsmount; - return 0; + snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); + hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, + buf); + + if (IS_ERR(hugetlbfs_vfsmount[i])) { + pr_err("hugetlb: Cannot mount internal hugetlbfs for " + "page size %uK", ps_kb); + error = PTR_ERR(hugetlbfs_vfsmount[i]); + hugetlbfs_vfsmount[i] = NULL; + } + i++; } - - error = PTR_ERR(vfsmount); + /* Non default hstates are optional */ + if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) + return 0; out: kmem_cache_destroy(hugetlbfs_inode_cachep); @@ -1047,13 +1078,19 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + struct hstate *h; + int i; + + /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); - kern_unmount(hugetlbfs_vfsmount); + i = 0; + for_each_hstate(h) + kern_unmount(hugetlbfs_vfsmount[i++]); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 225164842ab..3e7fa1acf09 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -183,7 +183,8 @@ extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, unsigned long addr, size_t size, vm_flags_t acct, - struct user_struct **user, int creat_flags); + struct user_struct **user, int creat_flags, + int page_size_log); static inline int is_file_hugepages(struct file *file) { @@ -195,12 +196,14 @@ static inline int is_file_hugepages(struct file *file) return 0; } + #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) 0 static inline struct file * hugetlb_file_setup(const char *name, unsigned long addr, size_t size, - vm_flags_t acctflag, struct user_struct **user, int creat_flags) + vm_flags_t acctflag, struct user_struct **user, int creat_flags, + int page_size_log) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/shm.h b/include/linux/shm.h index bcf8a6a3ec0..429c1995d75 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -29,6 +29,21 @@ struct shmid_kernel /* private to the kernel */ #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ #define SHM_NORESERVE 010000 /* don't check for reservations */ +/* Bits [26:31] are reserved */ + +/* + * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + #ifdef CONFIG_SYSVIPC long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index d030d2c2647..4164529a94f 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -55,4 +55,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 32c8bd6a196..e9fe6fd2a07 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -13,6 +13,8 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ + #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/ipc/shm.c b/ipc/shm.c index dff40c9f73c..4fa6d8fee73 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -495,7 +495,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; file = hugetlb_file_setup(name, 0, size, acctflag, - &shp->mlock_user, HUGETLB_SHMFS_INODE); + &shp->mlock_user, HUGETLB_SHMFS_INODE, + (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { /* * Do not allow no accounting for OVERCOMMIT_NEVER, even diff --git a/mm/mmap.c b/mm/mmap.c index 9a796c41e7d..ebf19031c5e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1153,8 +1153,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, * memory so no accounting is necessary */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len, - VM_NORESERVE, &user, - HUGETLB_ANONHUGE_INODE); + VM_NORESERVE, + &user, HUGETLB_ANONHUGE_INODE, + (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); }