diff options
author | Patrick McHardy <kaber@trash.net> | 2013-03-31 18:10:34 +0200 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2013-03-31 18:10:34 +0200 |
commit | 70711d223510ba1773cfe1d7770a56141c815ff8 (patch) | |
tree | 4a71f38a3a554ddecaa31b7d8c6bc49b7d1705b4 /fs/proc | |
parent | d53b4ed072d9779cdf53582c46436dec06d0961f (diff) | |
parent | 19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff) |
Merge tag 'v3.8' of /home/kaber/src/repos/linux
Linux 3.8
Signed-off-by: Patrick McHardy <kaber@trash.net>
Conflicts:
include/linux/Kbuild
include/linux/netlink.h
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Makefile | 3 | ||||
-rw-r--r-- | fs/proc/array.c | 27 | ||||
-rw-r--r-- | fs/proc/base.c | 769 | ||||
-rw-r--r-- | fs/proc/fd.c | 369 | ||||
-rw-r--r-- | fs/proc/fd.h | 14 | ||||
-rw-r--r-- | fs/proc/generic.c | 65 | ||||
-rw-r--r-- | fs/proc/inode.c | 7 | ||||
-rw-r--r-- | fs/proc/internal.h | 59 | ||||
-rw-r--r-- | fs/proc/kcore.c | 2 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 189 | ||||
-rw-r--r-- | fs/proc/page.c | 8 | ||||
-rw-r--r-- | fs/proc/proc_devtree.c | 11 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 35 | ||||
-rw-r--r-- | fs/proc/root.c | 29 | ||||
-rw-r--r-- | fs/proc/self.c | 59 | ||||
-rw-r--r-- | fs/proc/stat.c | 14 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 116 |
18 files changed, 991 insertions, 787 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c72933592..981b0560193 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o + proc_tty.o fd.o proc-y += cmdline.o proc-y += consoles.o proc-y += cpuinfo.o @@ -21,6 +21,7 @@ proc-y += uptime.o proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o +proc-y += self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/array.c b/fs/proc/array.c index c1c207c36ca..6a91e6ffbcb 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { - struct user_namespace *user_ns = current_user_ns(); + struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; struct fdtable *fdt = NULL; @@ -212,7 +212,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, group_info = cred->group_info; task_unlock(p); - for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) + for (g = 0; g < group_info->ngroups; g++) seq_printf(m, "%d ", from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); @@ -220,7 +220,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_putc(m, '\n'); } -static void render_sigset_t(struct seq_file *m, const char *header, +void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set) { int i; @@ -308,6 +308,10 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_putc(m, '\n'); } +/* Remove non-existent capabilities */ +#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ + CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) + static inline void task_cap(struct seq_file *m, struct task_struct *p) { const struct cred *cred; @@ -321,12 +325,24 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) cap_bset = cred->cap_bset; rcu_read_unlock(); + NORM_CAPS(cap_inheritable); + NORM_CAPS(cap_permitted); + NORM_CAPS(cap_effective); + NORM_CAPS(cap_bset); + render_cap_t(m, "CapInh:\t", &cap_inheritable); render_cap_t(m, "CapPrm:\t", &cap_permitted); render_cap_t(m, "CapEff:\t", &cap_effective); render_cap_t(m, "CapBnd:\t", &cap_bset); } +static inline void task_seccomp(struct seq_file *m, struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); +#endif +} + static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { @@ -360,6 +376,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_seccomp(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); task_context_switch_counts(m, task); @@ -438,7 +455,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_times(task, &utime, &stime); + thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; } @@ -454,7 +471,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - task_times(task, &utime, &stime); + task_cputime_adjusted(task, &utime, &stime); gtime = task->gtime; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 437195f204e..9b43ff77a51 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -90,6 +90,7 @@ #endif #include <trace/events/oom.h> #include "internal.h" +#include "fd.h" /* NOTE: * Implementing inode permission operations in /proc is almost @@ -136,8 +137,6 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) -static int proc_fd_permission(struct inode *inode, int mask); - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -543,13 +542,6 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - setattr_copy(inode, attr); mark_inode_dirty(inode); return 0; @@ -695,8 +687,6 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) mmput(mm); } - /* OK to pass negative loff_t, we can catch out-of-range */ - file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; return 0; @@ -704,7 +694,12 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) static int mem_open(struct inode *inode, struct file *file) { - return __mem_open(inode, file, PTRACE_MODE_ATTACH); + int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); + + /* OK to pass negative loff_t, we can catch out-of-range */ + file->f_mode |= FMODE_UNSIGNED_OFFSET; + + return ret; } static ssize_t mem_rw(struct file *file, char __user *buf, @@ -827,15 +822,16 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; while (count > 0) { - int this_len, retval, max_len; - - this_len = mm->env_end - (mm->env_start + src); + size_t this_len, max_len; + int retval; - if (this_len <= 0) + if (src >= (mm->env_end - mm->env_start)) break; - max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - this_len = (this_len > max_len) ? max_len : this_len; + this_len = mm->env_end - (mm->env_start + src); + + max_len = min_t(size_t, PAGE_SIZE, count); + this_len = min(max_len, this_len); retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); @@ -870,36 +866,36 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; -static ssize_t oom_adjust_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); char buffer[PROC_NUMBUF]; + int oom_adj = OOM_ADJUST_MIN; size_t len; - int oom_adjust = OOM_DISABLE; unsigned long flags; if (!task) return -ESRCH; - if (lock_task_sighand(task, &flags)) { - oom_adjust = task->signal->oom_adj; + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; unlock_task_sighand(task, &flags); } - put_task_struct(task); - - len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } -static ssize_t oom_adjust_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t oom_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF]; - int oom_adjust; + int oom_adj; unsigned long flags; int err; @@ -911,11 +907,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, goto out; } - err = kstrtoint(strstrip(buffer), 0, &oom_adjust); + err = kstrtoint(strstrip(buffer), 0, &oom_adj); if (err) goto out; - if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && - oom_adjust != OOM_DISABLE) { + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && + oom_adj != OOM_DISABLE) { err = -EINVAL; goto out; } @@ -937,28 +933,30 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, goto err_task_lock; } - if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { + /* + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum + * value is always attainable. + */ + if (oom_adj == OOM_ADJUST_MAX) + oom_adj = OOM_SCORE_ADJ_MAX; + else + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_sighand; } /* - * Warn that /proc/pid/oom_adj is deprecated, see - * Documentation/feature-removal-schedule.txt. + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. */ printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); - task->signal->oom_adj = oom_adjust; - /* - * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum - * value is always attainable. - */ - if (task->signal->oom_adj == OOM_ADJUST_MAX) - task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; - else - task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / - -OOM_DISABLE; + + task->signal->oom_score_adj = oom_adj; trace_oom_score_adj_update(task); err_sighand: unlock_task_sighand(task, &flags); @@ -969,9 +967,9 @@ out: return err < 0 ? err : count; } -static const struct file_operations proc_oom_adjust_operations = { - .read = oom_adjust_read, - .write = oom_adjust_write, +static const struct file_operations proc_oom_adj_operations = { + .read = oom_adj_read, + .write = oom_adj_write, .llseek = generic_file_llseek, }; @@ -980,7 +978,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); char buffer[PROC_NUMBUF]; - int oom_score_adj = OOM_SCORE_ADJ_MIN; + short oom_score_adj = OOM_SCORE_ADJ_MIN; unsigned long flags; size_t len; @@ -991,7 +989,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, unlock_task_sighand(task, &flags); } put_task_struct(task); - len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); + len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } @@ -1038,25 +1036,17 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, goto err_task_lock; } - if (oom_score_adj < task->signal->oom_score_adj_min && + if ((short)oom_score_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_sighand; } - task->signal->oom_score_adj = oom_score_adj; + task->signal->oom_score_adj = (short)oom_score_adj; if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) - task->signal->oom_score_adj_min = oom_score_adj; + task->signal->oom_score_adj_min = (short)oom_score_adj; trace_oom_score_adj_update(task); - /* - * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is - * always attainable. - */ - if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) - task->signal->oom_adj = OOM_DISABLE; - else - task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / - OOM_SCORE_ADJ_MAX; + err_sighand: unlock_task_sighand(task, &flags); err_task_lock: @@ -1085,7 +1075,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task)); + from_kuid(file->f_cred->user_ns, + audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1097,6 +1088,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, char *page, *tmp; ssize_t length; uid_t loginuid; + kuid_t kloginuid; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { @@ -1126,7 +1118,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - length = audit_set_loginuid(loginuid); + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } + + length = audit_set_loginuid(kloginuid); if (likely(length == 0)) length = count; @@ -1427,16 +1425,19 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; + struct path path; int error = -EACCES; - /* We don't need a base pointer in the /proc filesystem */ - path_put(&nd->path); - /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); + if (error) + goto out; + + nd_jump_link(nd, &path); + return NULL; out: return ERR_PTR(error); } @@ -1485,7 +1486,7 @@ out: return error; } -static const struct inode_operations proc_pid_link_inode_operations = { +const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .follow_link = proc_pid_follow_link, .setattr = proc_setattr, @@ -1494,21 +1495,6 @@ static const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -static int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if(dumpable == 1) - return 1; - return 0; -} - struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) { struct inode * inode; @@ -1601,13 +1587,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * made this apply to all per process world readable and executable * directories. */ -int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; const struct cred *cred; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; @@ -1634,15 +1620,6 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static int pid_delete_dentry(const struct dentry * dentry) -{ - /* Is the task we represent dead? - * If so, then don't put the dentry on the lru list, - * kill it immediately. - */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; -} - const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, @@ -1705,289 +1682,6 @@ end_instantiate: return filldir(dirent, name, len, filp->f_pos, ino, type); } -static unsigned name_to_int(struct dentry *dentry) -{ - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; - unsigned n = 0; - - if (len > 1 && *name == '0') - goto out; - while (len-- > 0) { - unsigned c = *name++ - '0'; - if (c > 9) - goto out; - if (n >= (~0U-9)/10) - goto out; - n *= 10; - n += c; - } - return n; -out: - return ~0U; -} - -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) -{ - struct task_struct *task = get_proc_task(inode); - struct files_struct *files = NULL; - struct file *file; - int fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (file) { - unsigned int f_flags; - struct fdtable *fdt; - - fdt = files_fdtable(files); - f_flags = file->f_flags & ~O_CLOEXEC; - if (close_on_exec(fd, fdt)) - f_flags |= O_CLOEXEC; - - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; - } - spin_unlock(&files->file_lock); - put_files_struct(files); - } - return -ENOENT; -} - -static int proc_fd_link(struct dentry *dentry, struct path *path) -{ - return proc_fd_info(dentry->d_inode, path, NULL); -} - -static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *inode; - struct task_struct *task; - int fd; - struct files_struct *files; - const struct cred *cred; - - if (nd && nd->flags & LOOKUP_RCU) - return -ECHILD; - - inode = dentry->d_inode; - task = get_proc_task(inode); - fd = proc_fd(inode); - - if (task) { - files = get_files_struct(task); - if (files) { - struct file *file; - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - unsigned f_mode = file->f_mode; - - rcu_read_unlock(); - put_files_struct(files); - - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } - - if (S_ISLNK(inode->i_mode)) { - unsigned i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; - } - - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } - rcu_read_unlock(); - put_files_struct(files); - } - put_task_struct(task); - } - d_drop(dentry); - return 0; -} - -static const struct dentry_operations tid_fd_dentry_operations = -{ - .d_revalidate = tid_fd_revalidate, - .d_delete = pid_delete_dentry, -}; - -static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - - inode->i_mode = S_IFLNK; - inode->i_op = &proc_pid_link_inode_operations; - inode->i_size = 64; - ei->op.proc_get_link = proc_fd_link; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfd_common(struct inode *dir, - struct dentry *dentry, - instantiate_t instantiate) -{ - struct task_struct *task = get_proc_task(dir); - unsigned fd = name_to_int(dentry); - struct dentry *result = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - if (fd == ~0U) - goto out; - - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); -out: - put_task_struct(task); -out_no_task: - return result; -} - -static int proc_readfd_common(struct file * filp, void * dirent, - filldir_t filldir, instantiate_t instantiate) -{ - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *p = get_proc_task(inode); - unsigned int fd, ino; - int retval; - struct files_struct * files; - - retval = -ENOENT; - if (!p) - goto out_no_task; - retval = 0; - - fd = filp->f_pos; - switch (fd) { - case 0: - if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - default: - files = get_files_struct(p); - if (!files) - goto out; - rcu_read_lock(); - for (fd = filp->f_pos-2; - fd < files_fdtable(files)->max_fds; - fd++, filp->f_pos++) { - char name[PROC_NUMBUF]; - int len; - int rv; - - if (!fcheck_files(files, fd)) - continue; - rcu_read_unlock(); - - len = snprintf(name, sizeof(name), "%d", fd); - rv = proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, p, - (void *)(unsigned long)fd); - if (rv < 0) - goto out_fd_loop; - rcu_read_lock(); - } - rcu_read_unlock(); -out_fd_loop: - put_files_struct(files); - } -out: - put_task_struct(p); -out_no_task: - return retval; -} - -static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); -} - -static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -} - -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; -} - -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, - .llseek = no_llseek, -}; - -static const struct file_operations proc_fd_operations = { - .read = generic_read_dir, - .readdir = proc_readfd, - .llseek = default_llseek, -}; - #ifdef CONFIG_CHECKPOINT_RESTORE /* @@ -2003,7 +1697,7 @@ static int dname_to_vma_addr(struct dentry *dentry, return 0; } -static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; @@ -2013,7 +1707,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; int status = 0; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; if (!capable(CAP_SYS_ADMIN)) { @@ -2106,7 +1800,7 @@ out: } struct map_files_info { - struct file *file; + fmode_t mode; unsigned long len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -2115,13 +1809,10 @@ static struct dentry * proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - const struct file *file = ptr; + fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; - if (!file) - return ERR_PTR(-ENOENT); - inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) return ERR_PTR(-ENOENT); @@ -2133,9 +1824,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_size = 64; inode->i_mode = S_IFLNK; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) inode->i_mode |= S_IRUSR; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) inode->i_mode |= S_IWUSR; d_set_d_op(dentry, &tid_map_files_dentry_operations); @@ -2145,7 +1836,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, } static struct dentry *proc_map_files_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; @@ -2179,7 +1870,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!vma) goto out_no_vma; - result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); + if (vma->vm_file) + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: up_read(&mm->mmap_sem); @@ -2280,8 +1973,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) if (++pos <= filp->f_pos) continue; - get_file(vma->vm_file); - info.file = vma->vm_file; + info.mode = vma->vm_file->f_mode; info.len = snprintf(info.name, sizeof(info.name), "%lx-%lx", vma->vm_start, vma->vm_end); @@ -2296,19 +1988,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = proc_fill_cache(filp, dirent, filldir, p->name, p->len, proc_map_files_instantiate, - task, p->file); + task, + (void *)(unsigned long)p->mode); if (ret) break; filp->f_pos++; - fput(p->file); - } - for (; i < nr_files; i++) { - /* - * In case of error don't forget - * to put rest of file refs. - */ - p = flex_array_get(fa, i); - fput(p->file); } if (fa) flex_array_free(fa); @@ -2330,82 +2014,6 @@ static const struct file_operations proc_map_files_operations = { #endif /* CONFIG_CHECKPOINT_RESTORE */ -/* - * /proc/pid/fd needs a special permission handler so that a process can still - * access /proc/self/fd after it has executed a setuid(). - */ -static int proc_fd_permission(struct inode *inode, int mask) -{ - int rv = generic_permission(inode, mask); - if (rv == 0) - return 0; - if (task_pid(current) == proc_pid(inode)) - rv = 0; - return rv; -} - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fd_inode_operations = { - .lookup = proc_lookupfd, - .permission = proc_fd_permission, - .setattr = proc_setattr, -}; - -static struct dentry *proc_fdinfo_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - unsigned fd = (unsigned long)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); - - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_fop = &proc_fdinfo_file_operations; - d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) - error = NULL; - - out: - return error; -} - -static struct dentry *proc_lookupfdinfo(struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) -{ - return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); -} - -static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, - proc_fdinfo_instantiate); -} - -static const struct file_operations proc_fdinfo_operations = { - .read = generic_read_dir, - .readdir = proc_readfdinfo, - .llseek = default_llseek, -}; - -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fdinfo_inode_operations = { - .lookup = proc_lookupfdinfo, - .setattr = proc_setattr, -}; - - static struct dentry *proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -2430,7 +2038,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; @@ -2630,7 +2238,7 @@ static const struct file_operations proc_attr_dir_operations = { }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); @@ -2730,145 +2338,6 @@ static const struct file_operations proc_coredump_filter_operations = { }; #endif -/* - * /proc/self: - */ -static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return -ENOENT; - sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); -} - -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (tgid) { - name = __getname(); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d", tgid); - } - nd_set_link(nd, name); - return NULL; -} - -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - __putname(s); -} - -static const struct inode_operations proc_self_inode_operations = { - .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, -}; - -/* - * proc base - * - * These are the directory entries in the root directory of /proc - * that properly belong to the /proc filesystem, as they describe - * describe something that is process related. - */ -static const struct pid_entry proc_base_stuff[] = { - NOD("self", S_IFLNK|S_IRWXUGO, - &proc_self_inode_operations, NULL, {}), -}; - -static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - const struct pid_entry *p = ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error; - - /* Allocate the inode */ - error = ERR_PTR(-ENOMEM); - inode = new_inode(dir->i_sb); - if (!inode) - goto out; - - /* Initialize the inode */ - ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - - /* - * grab the reference to the task. - */ - ei->pid = get_task_pid(task, PIDTYPE_PID); - if (!ei->pid) - goto out_iput; - - inode->i_mode = p->mode; - if (S_ISDIR(inode->i_mode)) - set_nlink(inode, 2); - if (S_ISLNK(inode->i_mode)) - inode->i_size = 64; - if (p->iop) - inode->i_op = p->iop; - if (p->fop) - inode->i_fop = p->fop; - ei->op = p->op; - d_add(dentry, inode); - error = NULL; -out: - return error; -out_iput: - iput(inode); - goto out; -} - -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) -{ - struct dentry *error; - struct task_struct *task = get_proc_task(dir); - const struct pid_entry *p, *last; - - error = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - - /* Lookup the directory entry */ - last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; - for (p = proc_base_stuff; p <= last; p++) { - if (p->len != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) - break; - } - if (p > last) - goto out; - - error = proc_base_instantiate(dir, dentry, task, p); - -out: - put_task_struct(task); -out_no_task: - return error; -} - -static int proc_base_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) -{ - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_base_instantiate, task, p); -} - #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { @@ -2976,6 +2445,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file) return proc_id_map_open(inode, file, &proc_gid_seq_operations); } +static int proc_projid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_projid_seq_operations); +} + static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, @@ -2991,6 +2465,14 @@ static const struct file_operations proc_gid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static const struct file_operations proc_projid_map_operations = { + .open = proc_projid_map_open, + .write = proc_projid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -3077,7 +2559,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -3098,6 +2580,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; @@ -3114,7 +2597,8 @@ static const struct file_operations proc_tgid_base_operations = { .llseek = default_llseek, }; -static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } @@ -3208,10 +2692,6 @@ void proc_flush_task(struct task_struct *task) proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, tgid->numbers[i].nr); } - - upid = &pid->numbers[pid->level]; - if (upid->nr == 1) - pid_ns_release_proc(upid->ns); } static struct dentry *proc_pid_instantiate(struct inode *dir, @@ -3237,23 +2717,19 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; } -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result; + struct dentry *result = NULL; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; - result = proc_base_lookup(dir, dentry); - if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) - goto out; - tgid = name_to_int(dentry); if (tgid == ~0U) goto out; @@ -3316,7 +2792,7 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) +#define TGID_OFFSET (FIRST_PROCESS_ENTRY) static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct tgid_iter iter) @@ -3336,25 +2812,12 @@ static int fake_filldir(void *buf, const char *name, int namelen, /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { - unsigned int nr; - struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) - goto out_no_task; - nr = filp->f_pos - FIRST_PROCESS_ENTRY; - - reaper = get_proc_task(filp->f_path.dentry->d_inode); - if (!reaper) - goto out_no_task; - - for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - const struct pid_entry *p = &proc_base_stuff[nr]; - if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) - goto out; - } + goto out; ns = filp->f_dentry->d_sb->s_fs_info; iter.task = NULL; @@ -3375,8 +2838,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) } filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; out: - put_task_struct(reaper); -out_no_task: return 0; } @@ -3442,7 +2903,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -3460,6 +2921,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; @@ -3470,7 +2932,8 @@ static int proc_tid_base_readdir(struct file * filp, tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); } -static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } @@ -3508,13 +2971,13 @@ static struct dentry *proc_task_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; } -static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; diff --git a/fs/proc/fd.c b/fs/proc/fd.c new file mode 100644 index 00000000000..d7a4a28ef63 --- /dev/null +++ b/fs/proc/fd.c @@ -0,0 +1,369 @@ +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/dcache.h> +#include <linux/path.h> +#include <linux/fdtable.h> +#include <linux/namei.h> +#include <linux/pid.h> +#include <linux/security.h> +#include <linux/file.h> +#include <linux/seq_file.h> + +#include <linux/proc_fs.h> + +#include "internal.h" +#include "fd.h" + +static int seq_show(struct seq_file *m, void *v) +{ + struct files_struct *files = NULL; + int f_flags = 0, ret = -ENOENT; + struct file *file = NULL; + struct task_struct *task; + + task = get_proc_task(m->private); + if (!task) + return -ENOENT; + + files = get_files_struct(task); + put_task_struct(task); + + if (files) { + int fd = proc_fd(m->private); + + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + struct fdtable *fdt = files_fdtable(files); + + f_flags = file->f_flags; + if (close_on_exec(fd, fdt)) + f_flags |= O_CLOEXEC; + + get_file(file); + ret = 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + + if (!ret) { + seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", + (long long)file->f_pos, f_flags); + if (file->f_op->show_fdinfo) + ret = file->f_op->show_fdinfo(m, file); + fput(file); + } + + return ret; +} + +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, seq_show, inode); +} + +static const struct file_operations proc_fdinfo_file_operations = { + .open = seq_fdinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct files_struct *files; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int fd; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + fd = proc_fd(inode); + + if (task) { + files = get_files_struct(task); + if (files) { + struct file *file; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + unsigned f_mode = file->f_mode; + + rcu_read_unlock(); + put_files_struct(files); + + if (task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + } + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } + + security_task_to_inode(task, inode); + put_task_struct(task); + return 1; + } + rcu_read_unlock(); + put_files_struct(files); + } + put_task_struct(task); + } + + d_drop(dentry); + return 0; +} + +static const struct dentry_operations tid_fd_dentry_operations = { + .d_revalidate = tid_fd_revalidate, + .d_delete = pid_delete_dentry, +}; + +static int proc_fd_link(struct dentry *dentry, struct path *path) +{ + struct files_struct *files = NULL; + struct task_struct *task; + int ret = -ENOENT; + + task = get_proc_task(dentry->d_inode); + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + + if (files) { + int fd = proc_fd(dentry->d_inode); + struct file *fd_file; + + spin_lock(&files->file_lock); + fd_file = fcheck_files(files, fd); + if (fd_file) { + *path = fd_file->f_path; + path_get(&fd_file->f_path); + ret = 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + + return ret; +} + +static struct dentry * +proc_fd_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFLNK; + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + + ei->op.proc_get_link = proc_fd_link; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry *proc_lookupfd_common(struct inode *dir, + struct dentry *dentry, + instantiate_t instantiate) +{ + struct task_struct *task = get_proc_task(dir); + struct dentry *result = ERR_PTR(-ENOENT); + unsigned fd = name_to_int(dentry); + + if (!task) + goto out_no_task; + if (fd == ~0U) + goto out; + + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); +out: + put_task_struct(task); +out_no_task: + return result; +} + +static int proc_readfd_common(struct file * filp, void * dirent, + filldir_t filldir, instantiate_t instantiate) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + struct task_struct *p = get_proc_task(inode); + struct files_struct *files; + unsigned int fd, ino; + int retval; + + retval = -ENOENT; + if (!p) + goto out_no_task; + retval = 0; + + fd = filp->f_pos; + switch (fd) { + case 0: + if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) + goto out; + filp->f_pos++; + default: + files = get_files_struct(p); + if (!files) + goto out; + rcu_read_lock(); + for (fd = filp->f_pos - 2; + fd < files_fdtable(files)->max_fds; + fd++, filp->f_pos++) { + char name[PROC_NUMBUF]; + int len; + int rv; + + if (!fcheck_files(files, fd)) + continue; + rcu_read_unlock(); + + len = snprintf(name, sizeof(name), "%d", fd); + rv = proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, p, + (void *)(unsigned long)fd); + if (rv < 0) + goto out_fd_loop; + rcu_read_lock(); + } + rcu_read_unlock(); +out_fd_loop: + put_files_struct(files); + } +out: + put_task_struct(p); +out_no_task: + return retval; +} + +static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); +} + +const struct file_operations proc_fd_operations = { + .read = generic_read_dir, + .readdir = proc_readfd, + .llseek = default_llseek, +}; + +static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); +} + +/* + * /proc/pid/fd needs a special permission handler so that a process can still + * access /proc/self/fd after it has executed a setuid(). + */ +int proc_fd_permission(struct inode *inode, int mask) +{ + int rv = generic_permission(inode, mask); + if (rv == 0) + return 0; + if (task_pid(current) == proc_pid(inode)) + rv = 0; + return rv; +} + +const struct inode_operations proc_fd_inode_operations = { + .lookup = proc_lookupfd, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static struct dentry * +proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + struct dentry *error = ERR_PTR(-ENOENT); + unsigned fd = (unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dir->i_sb, task); + if (!inode) + goto out; + + ei = PROC_I(inode); + ei->fd = fd; + + inode->i_mode = S_IFREG | S_IRUSR; + inode->i_fop = &proc_fdinfo_file_operations; + + d_set_d_op(dentry, &tid_fd_dentry_operations); + d_add(dentry, inode); + + /* Close the race of the process dying before we return the dentry */ + if (tid_fd_revalidate(dentry, 0)) + error = NULL; + out: + return error; +} + +static struct dentry * +proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); +} + +static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) +{ + return proc_readfd_common(filp, dirent, filldir, + proc_fdinfo_instantiate); +} + +const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, +}; + +const struct file_operations proc_fdinfo_operations = { + .read = generic_read_dir, + .readdir = proc_readfdinfo, + .llseek = default_llseek, +}; diff --git a/fs/proc/fd.h b/fs/proc/fd.h new file mode 100644 index 00000000000..cbb1d47deda --- /dev/null +++ b/fs/proc/fd.h @@ -0,0 +1,14 @@ +#ifndef __PROCFS_FD_H__ +#define __PROCFS_FD_H__ + +#include <linux/fs.h> + +extern const struct file_operations proc_fd_operations; +extern const struct inode_operations proc_fd_inode_operations; + +extern const struct file_operations proc_fdinfo_operations; +extern const struct inode_operations proc_fdinfo_inode_operations; + +extern int proc_fd_permission(struct inode *inode, int mask); + +#endif /* __PROCFS_FD_H__ */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2edf34f2eb6..76ddae83daa 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -261,16 +261,9 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) if (error) return error; - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, iattr->ia_size); - if (error) - return error; - } - setattr_copy(inode, iattr); mark_inode_dirty(inode); - + de->uid = inode->i_uid; de->gid = inode->i_gid; de->mode = inode->i_mode; @@ -350,37 +343,39 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ * Return an inode number between PROC_DYNAMIC_FIRST and * 0xffffffff, or zero on failure. */ -static unsigned int get_inode_number(void) +int proc_alloc_inum(unsigned int *inum) { unsigned int i; int error; retry: - if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) - return 0; + if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) + return -ENOMEM; - spin_lock(&proc_inum_lock); + spin_lock_irq(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); - spin_unlock(&proc_inum_lock); + spin_unlock_irq(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) - return 0; + return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { - spin_lock(&proc_inum_lock); + spin_lock_irq(&proc_inum_lock); ida_remove(&proc_inum_ida, i); - spin_unlock(&proc_inum_lock); - return 0; + spin_unlock_irq(&proc_inum_lock); + return -ENOSPC; } - return PROC_DYNAMIC_FIRST + i; + *inum = PROC_DYNAMIC_FIRST + i; + return 0; } -static void release_inode_number(unsigned int inum) +void proc_free_inum(unsigned int inum) { - spin_lock(&proc_inum_lock); + unsigned long flags; + spin_lock_irqsave(&proc_inum_lock, flags); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); - spin_unlock(&proc_inum_lock); + spin_unlock_irqrestore(&proc_inum_lock, flags); } static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) @@ -427,7 +422,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { pde_get(de); spin_unlock(&proc_subdir_lock); - error = -EINVAL; + error = -ENOMEM; inode = proc_get_inode(dir->i_sb, de); goto out_unlock; } @@ -446,7 +441,7 @@ out_unlock: } struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookup_de(PDE(dir), dir, dentry); } @@ -554,13 +549,12 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - unsigned int i; struct proc_dir_entry *tmp; + int ret; - i = get_inode_number(); - if (i == 0) - return -EAGAIN; - dp->low_ino = i; + ret = proc_alloc_inum(&dp->low_ino); + if (ret) + return ret; if (S_ISDIR(dp->mode)) { if (dp->proc_iops == NULL) { @@ -605,7 +599,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, unsigned int len; /* make sure name is valid */ - if (!name || !strlen(name)) goto out; + if (!name || !strlen(name)) + goto out; if (xlate_proc_name(name, parent, &fn) != 0) goto out; @@ -616,20 +611,18 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, len = strlen(fn); - ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); - if (!ent) goto out; + ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); + if (!ent) + goto out; - memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(ent->name, fn, len + 1); ent->namelen = len; ent->mode = mode; ent->nlink = nlink; atomic_set(&ent->count, 1); - ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); - ent->pde_unload_completion = NULL; INIT_LIST_HEAD(&ent->pde_openers); - out: +out: return ent; } @@ -765,7 +758,7 @@ EXPORT_SYMBOL(proc_create_data); static void free_proc_entry(struct proc_dir_entry *de) { - release_inode_number(de->low_ino); + proc_free_inum(de->low_ino); if (S_ISLNK(de->mode)) kfree(de->data); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ac817b64a7..439ae688650 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode) struct proc_dir_entry *de; struct ctl_table_header *head; const struct proc_ns_operations *ns_ops; + void *ns; truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode) } /* Release any associated namespace */ ns_ops = PROC_I(inode)->ns_ops; - if (ns_ops && ns_ops->put) - ns_ops->put(PROC_I(inode)->ns); + ns = PROC_I(inode)->ns; + if (ns_ops && ns) + ns_ops->put(ns); } static struct kmem_cache * proc_inode_cachep; @@ -450,7 +452,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return NULL; if (inode->i_state & I_NEW) { inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; if (de->mode) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index eca4aca5b6e..252544c0520 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -9,10 +9,13 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/sched.h> #include <linux/proc_fs.h> struct ctl_table_header; +struct mempolicy; extern struct proc_dir_entry proc_root; +extern void proc_self_init(void); #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void sysctl_head_put(struct ctl_table_header *head); @@ -65,6 +68,7 @@ extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +extern const struct inode_operations proc_pid_link_inode_operations; struct proc_maps_private { struct pid *pid; @@ -72,6 +76,9 @@ struct proc_maps_private { #ifdef CONFIG_MMU struct vm_area_struct *tail_vma; #endif +#ifdef CONFIG_NUMA + struct mempolicy *task_mempolicy; +#endif }; void proc_init_inodecache(void); @@ -91,6 +98,52 @@ static inline int proc_fd(struct inode *inode) return PROC_I(inode)->fd; } +static inline int task_dumpable(struct task_struct *task) +{ + int dumpable = 0; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) + dumpable = get_dumpable(mm); + task_unlock(task); + if (dumpable == SUID_DUMPABLE_ENABLED) + return 1; + return 0; +} + +static inline int pid_delete_dentry(const struct dentry * dentry) +{ + /* Is the task we represent dead? + * If so, then don't put the dentry on the lru list, + * kill it immediately. + */ + return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; +} + +static inline unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, @@ -106,7 +159,7 @@ void pde_users_dec(struct proc_dir_entry *pde); extern spinlock_t proc_subdir_lock; -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); unsigned long task_vsize(struct mm_struct *); unsigned long task_statm(struct mm_struct *, @@ -132,7 +185,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data); * of the /proc/<pid> subdirectories. */ int proc_readdir(struct file *, void *, filldir_t); -struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); +struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); @@ -142,7 +195,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr); -int pid_revalidate(struct dentry *dentry, struct nameidata *nd); +int pid_revalidate(struct dentry *dentry, unsigned int flags); struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); extern const struct dentry_operations pid_dentry_operations; int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 86c67eee439..e96d4f18ca3 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -249,7 +249,7 @@ static int kcore_update_ram(void) /* Not inialized....update now */ /* find out "max pfn" */ end_pfn = 0; - for_each_node_state(nid, N_HIGH_MEMORY) { + for_each_node_state(nid, N_MEMORY) { unsigned long node_end; node_end = NODE_DATA(nid)->node_start_pfn + NODE_DATA(nid)->node_spanned_pages; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 0d9e23a39e4..b7a47196c8c 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -11,6 +11,7 @@ #include <net/net_namespace.h> #include <linux/ipc_namespace.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include "internal.h" @@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_IPC_NS &ipcns_operations, #endif +#ifdef CONFIG_PID_NS + &pidns_operations, +#endif +#ifdef CONFIG_USER_NS + &userns_operations, +#endif + &mntns_operations, }; static const struct file_operations ns_file_operations = { .llseek = no_llseek, }; +static const struct inode_operations ns_inode_operations = { + .setattr = proc_setattr, +}; + +static int ns_delete_dentry(const struct dentry *dentry) +{ + /* Don't cache namespace inodes when not in use */ + return 1; +} + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_delete = ns_delete_dentry, + .d_dname = ns_dname, +}; + +static struct dentry *proc_ns_get_dentry(struct super_block *sb, + struct task_struct *task, const struct proc_ns_operations *ns_ops) +{ + struct dentry *dentry, *result; + struct inode *inode; + struct proc_inode *ei; + struct qstr qname = { .name = "", }; + void *ns; + + ns = ns_ops->get(task); + if (!ns) + return ERR_PTR(-ENOENT); + + dentry = d_alloc_pseudo(sb, &qname); + if (!dentry) { + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + inode = iget_locked(sb, ns_ops->inum(ns)); + if (!inode) { + dput(dentry); + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + ei = PROC_I(inode); + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } + + d_set_d_op(dentry, &ns_dentry_operations); + result = d_instantiate_unique(dentry, inode); + if (result) { + dput(dentry); + dentry = result; + } + + return dentry; +} + +static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct proc_inode *ei = PROC_I(inode); + struct task_struct *task; + struct dentry *ns_dentry; + void *error = ERR_PTR(-EACCES); + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_dentry)) { + error = ERR_CAST(ns_dentry); + goto out_put_task; + } + + dput(nd->path.dentry); + nd->path.dentry = ns_dentry; + error = NULL; + +out_put_task: + put_task_struct(task); +out: + return error; +} + +static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + struct proc_inode *ei = PROC_I(inode); + const struct proc_ns_operations *ns_ops = ei->ns_ops; + struct task_struct *task; + void *ns; + char name[50]; + int len = -EACCES; + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + len = -ENOENT; + ns = ns_ops->get(task); + if (!ns) + goto out_put_task; + + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); + len = strlen(name); + + if (len > buflen) + len = buflen; + if (copy_to_user(buffer, name, len)) + len = -EFAULT; + + ns_ops->put(ns); +out_put_task: + put_task_struct(task); +out: + return len; +} + +static const struct inode_operations proc_ns_link_inode_operations = { + .readlink = proc_ns_readlink, + .follow_link = proc_ns_follow_link, + .setattr = proc_setattr, +}; + static struct dentry *proc_ns_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -37,32 +194,23 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); - void *ns; inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) goto out; - ns = ns_ops->get(task); - if (!ns) - goto out_iput; - ei = PROC_I(inode); - inode->i_mode = S_IFREG|S_IRUSR; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_op = &proc_ns_link_inode_operations; + ei->ns_ops = ns_ops; d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; -out_iput: - iput(inode); - goto out; } static int proc_ns_fill_cache(struct file *filp, void *dirent, @@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent, if (!task) goto out_no_task; - ret = -EPERM; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - ret = 0; i = filp->f_pos; switch (i) { @@ -140,7 +284,7 @@ const struct file_operations proc_ns_dir_operations = { }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *error; struct task_struct *task = get_proc_task(dir); @@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!task) goto out_no_task; - error = ERR_PTR(-EPERM); - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - last = &ns_entries[ARRAY_SIZE(ns_entries)]; for (entry = ns_entries; entry < last; entry++) { if (strlen((*entry)->name) != len) @@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!memcmp(dentry->d_name.name, (*entry)->name, len)) break; } - error = ERR_PTR(-ENOENT); if (entry == last) goto out; @@ -198,3 +337,7 @@ out_invalid: return ERR_PTR(-EINVAL); } +bool proc_ns_inode(struct inode *inode) +{ + return inode->i_fop == &ns_file_operations; +} diff --git a/fs/proc/page.c b/fs/proc/page.c index 7fcd0d60a96..b8730d9ebae 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; - else if (PageTransCompound(page)) + /* + * PageTransCompound can be true for non-huge compound pages (slab + * pages or pages allocated by drivers with __GFP_COMP) because it + * just checks PG_head/PG_tail, so we need to check PageLRU to make + * sure a given page is a thp, not a non-huge compound page. + */ + else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) u |= 1 << KPF_THP; /* diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 927cbd115e5..de20ec480fa 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -101,6 +101,11 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde, { struct proc_dir_entry *ent; + if (!oldprop) { + proc_device_tree_add_prop(pde, newprop); + return; + } + for (ent = pde->subdir; ent != NULL; ent = ent->next) if (ent->data == oldprop) break; @@ -190,11 +195,7 @@ void proc_device_tree_add_node(struct device_node *np, set_node_proc_entry(np, de); for (child = NULL; (child = of_get_next_child(np, child));) { /* Use everything after the last slash, or the full name */ - p = strrchr(child->full_name, '/'); - if (!p) - p = child->full_name; - else - ++p; + p = kbasename(child->full_name); if (duplicate_name(de, p)) p = fixup_name(np, de, p); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 06e1cc17caf..fe72cd073de 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir) } static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *de; struct net *net; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3476bca8f7a..1827d88ad58 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) } rb_link_node(node, parent, p); + rb_insert_color(node, &head->parent->root); return 0; } @@ -168,10 +169,8 @@ static void init_header(struct ctl_table_header *head, head->node = node; if (node) { struct ctl_table *entry; - for (entry = table; entry->procname; entry++, node++) { - rb_init_node(&node->node); + for (entry = table; entry->procname; entry++, node++) node->header = head; - } } } @@ -266,8 +265,7 @@ void sysctl_head_put(struct ctl_table_header *head) static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) { - if (!head) - BUG(); + BUG_ON(!head); spin_lock(&sysctl_lock); if (!use_table(head)) head = ERR_PTR(-ENOENT); @@ -380,12 +378,13 @@ static int test_perm(int mode, int op) return -EACCES; } -static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) +static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op) { + struct ctl_table_root *root = head->root; int mode; if (root->permissions) - mode = root->permissions(root, current->nsproxy, table); + mode = root->permissions(head, table); else mode = table->mode; @@ -433,7 +432,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; @@ -462,9 +461,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - if (h) - sysctl_head_finish(h); - if (!inode) goto out; @@ -473,6 +469,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, inode); out: + if (h) + sysctl_head_finish(h); sysctl_head_finish(head); return err; } @@ -494,7 +492,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) + if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ)) goto out; /* if that can happen at all, it should be -EINVAL, not -EISDIR */ @@ -720,7 +718,7 @@ static int proc_sys_permission(struct inode *inode, int mask) if (!table) /* global root - r-xr-xr-x */ error = mask & MAY_WRITE ? -EACCES : 0; else /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); + error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK); sysctl_head_finish(head); return error; @@ -738,13 +736,6 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error) - return error; - } - setattr_copy(inode, attr); mark_inode_dirty(inode); return 0; @@ -794,9 +785,9 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) +static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) { - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; return !PROC_I(dentry->d_inode)->sysctl->unregistering; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 7c30fce037c..c6e9fac26ba 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -61,7 +61,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) if (!*p) continue; - args[0].to = args[0].from = 0; + args[0].to = args[0].from = NULL; token = match_token(p, tokens, args); switch (token) { case Opt_gid: @@ -100,18 +100,17 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, int err; struct super_block *sb; struct pid_namespace *ns; - struct proc_inode *ei; char *options; if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; options = NULL; } else { - ns = current->nsproxy->pid_ns; + ns = task_active_pid_ns(current); options = data; } - sb = sget(fs_type, proc_test_super, proc_set_super, ns); + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); if (IS_ERR(sb)) return ERR_CAST(sb); @@ -121,7 +120,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } if (!sb->s_root) { - sb->s_flags = flags; err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); @@ -131,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, sb->s_flags |= MS_ACTIVE; } - ei = PROC_I(sb->s_root->d_inode); - if (!ei->pid) { - rcu_read_lock(); - ei->pid = get_pid(find_pid_ns(1, ns)); - rcu_read_unlock(); - } - return dget(sb->s_root); } @@ -154,6 +145,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -164,12 +156,8 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - err = pid_ns_prepare_proc(&init_pid_ns); - if (err) { - unregister_filesystem(&proc_fs_type); - return; - } + proc_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); @@ -200,13 +188,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct return 0; } -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { - if (!proc_lookup(dir, dentry, nd)) { + if (!proc_lookup(dir, dentry, flags)) return NULL; - } - return proc_pid_lookup(dir, dentry, nd); + return proc_pid_lookup(dir, dentry, flags); } static int proc_root_readdir(struct file * filp, diff --git a/fs/proc/self.c b/fs/proc/self.c new file mode 100644 index 00000000000..aa5cc3bff14 --- /dev/null +++ b/fs/proc/self.c @@ -0,0 +1,59 @@ +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/namei.h> + +/* + * /proc/self: + */ +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char tmp[PROC_NUMBUF]; + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); + return vfs_readlink(dentry,buffer,buflen,tmp); +} + +static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char *name = ERR_PTR(-ENOENT); + if (tgid) { + /* 11 for max length of signed int in decimal + NULL term */ + name = kmalloc(12, GFP_KERNEL); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d", tgid); + } + nd_set_link(nd, name); + return NULL; +} + +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + +static const struct inode_operations proc_self_inode_operations = { + .readlink = proc_self_readlink, + .follow_link = proc_self_follow_link, + .put_link = proc_self_put_link, +}; + +void __init proc_self_init(void) +{ + struct proc_dir_entry *proc_self_symlink; + mode_t mode; + + mode = S_IFLNK | S_IRWXUGO; + proc_self_symlink = proc_create("self", mode, NULL, NULL ); + proc_self_symlink->proc_iops = &proc_self_inode_operations; +} diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 64c3b317236..e296572c73e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu) static u64 get_idle_time(int cpu) { - u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); + u64 idle, idle_time = -1ULL; + + if (cpu_online(cpu)) + idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.idle */ + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else idle = usecs_to_cputime64(idle_time); @@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu) static u64 get_iowait_time(int cpu) { - u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL); + u64 iowait, iowait_time = -1ULL; + + if (cpu_online(cpu)) + iowait_time = get_cpu_iowait_time_us(cpu, NULL); if (iowait_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.iowait */ + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else iowait = usecs_to_cputime64(iowait_time); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4540b8f76f1..ca5ce7f9f80 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmPTE:\t%8lu kB\n" "VmSwap:\t%8lu kB\n", hiwater_vm << (PAGE_SHIFT-10), - (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + total_vm << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), mm->pinned_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), @@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } +#ifdef CONFIG_NUMA +/* + * These functions are for numa_maps but called in generic **maps seq_file + * ->start(), ->stop() ops. + * + * numa_maps scans all vmas under mmap_sem and checks their mempolicy. + * Each mempolicy object is controlled by reference counting. The problem here + * is how to avoid accessing dead mempolicy object. + * + * Because we're holding mmap_sem while reading seq_file, it's safe to access + * each vma's mempolicy, no vma objects will never drop refs to mempolicy. + * + * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy + * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). + * So, without task_lock(), we cannot trust get_vma_policy() because we cannot + * gurantee the task never exits under us. But taking task_lock() around + * get_vma_plicy() causes lock order problem. + * + * To access task->mempolicy without lock, we hold a reference count of an + * object pointed by task->mempolicy and remember it. This will guarantee + * that task->mempolicy points to an alive object or NULL in numa_maps accesses. + */ +static void hold_task_mempolicy(struct proc_maps_private *priv) +{ + struct task_struct *task = priv->task; + + task_lock(task); + priv->task_mempolicy = task->mempolicy; + mpol_get(priv->task_mempolicy); + task_unlock(task); +} +static void release_task_mempolicy(struct proc_maps_private *priv) +{ + mpol_put(priv->task_mempolicy); +} +#else +static void hold_task_mempolicy(struct proc_maps_private *priv) +{ +} +static void release_task_mempolicy(struct proc_maps_private *priv) +{ +} +#endif + static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { if (vma && vma != priv->tail_vma) { struct mm_struct *mm = vma->vm_mm; + release_task_mempolicy(priv); up_read(&mm->mmap_sem); mmput(mm); } @@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; - + hold_task_mempolicy(priv); /* Start with last addr hint */ vma = find_vma(mm, last_addr); if (last_addr && vma) { @@ -159,6 +204,7 @@ out: if (vma) return vma; + release_task_mempolicy(priv); /* End of vmas has been reached */ m->version = (tail_vma != NULL)? 0: -1UL; up_read(&mm->mmap_sem); @@ -480,6 +526,57 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return 0; } +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) +{ + /* + * Don't forget to update Documentation/ on changes. + */ + static const char mnemonics[BITS_PER_LONG][2] = { + /* + * In case if we meet a flag we don't know about. + */ + [0 ... (BITS_PER_LONG-1)] = "??", + + [ilog2(VM_READ)] = "rd", + [ilog2(VM_WRITE)] = "wr", + [ilog2(VM_EXEC)] = "ex", + [ilog2(VM_SHARED)] = "sh", + [ilog2(VM_MAYREAD)] = "mr", + [ilog2(VM_MAYWRITE)] = "mw", + [ilog2(VM_MAYEXEC)] = "me", + [ilog2(VM_MAYSHARE)] = "ms", + [ilog2(VM_GROWSDOWN)] = "gd", + [ilog2(VM_PFNMAP)] = "pf", + [ilog2(VM_DENYWRITE)] = "dw", + [ilog2(VM_LOCKED)] = "lo", + [ilog2(VM_IO)] = "io", + [ilog2(VM_SEQ_READ)] = "sr", + [ilog2(VM_RAND_READ)] = "rr", + [ilog2(VM_DONTCOPY)] = "dc", + [ilog2(VM_DONTEXPAND)] = "de", + [ilog2(VM_ACCOUNT)] = "ac", + [ilog2(VM_NORESERVE)] = "nr", + [ilog2(VM_HUGETLB)] = "ht", + [ilog2(VM_NONLINEAR)] = "nl", + [ilog2(VM_ARCH_1)] = "ar", + [ilog2(VM_DONTDUMP)] = "dd", + [ilog2(VM_MIXEDMAP)] = "mm", + [ilog2(VM_HUGEPAGE)] = "hg", + [ilog2(VM_NOHUGEPAGE)] = "nh", + [ilog2(VM_MERGEABLE)] = "mg", + }; + size_t i; + + seq_puts(m, "VmFlags: "); + for (i = 0; i < BITS_PER_LONG; i++) { + if (vma->vm_flags & (1UL << i)) { + seq_printf(m, "%c%c ", + mnemonics[i][0], mnemonics[i][1]); + } + } + seq_putc(m, '\n'); +} + static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; @@ -535,6 +632,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) seq_printf(m, "Nonlinear: %8lu kB\n", mss.nonlinear >> 10); + show_smap_vma_flags(m, vma); + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; @@ -597,7 +696,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; - split_huge_page_pmd(walk->mm, pmd); + split_huge_page_pmd(vma, addr, pmd); if (pmd_trans_unstable(pmd)) return 0; @@ -1080,7 +1179,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, return NULL; nid = page_to_nid(page); - if (!node_isset(nid, node_states[N_HIGH_MEMORY])) + if (!node_isset(nid, node_states[N_MEMORY])) return NULL; return page; @@ -1158,6 +1257,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; + struct task_struct *task = proc_priv->task; struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; @@ -1177,8 +1277,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.private = md; walk.mm = mm; - pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol, 0); + pol = get_vma_policy(task, vma, vma->vm_start); + mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1189,7 +1289,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_printf(m, " heap"); } else { - pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); + pid_t tid = vm_is_stack(task, vma, is_pid); if (tid != 0) { /* * Thread stack in /proc/PID/task/TID/maps or @@ -1232,7 +1332,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); - for_each_node_state(n, N_HIGH_MEMORY) + for_each_node_state(n, N_MEMORY) if (md->node[n]) seq_printf(m, " N%d=%lu", n, md->node[n]); out: |