aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 16:42:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 16:42:48 -0800
commit40ba587923ae67090d9f141c1d3c951be5c1420e (patch)
tree342a72fc0ee13a0d2496ef970b64dfeadf1355d2 /fs
parent54c2c5761febcca46c8037d3a81612991e6c209a (diff)
parent6b550f9495947fc279d12c38feaf98500e8d0646 (diff)
Merge branch 'akpm' (aka "Andrew's patch-bomb")
Andrew elucidates: - First installmeant of MM. We have a HUGE number of MM patches this time. It's crazy. - MAINTAINERS updates - backlight updates - leds - checkpatch updates - misc ELF stuff - rtc updates - reiserfs - procfs - some misc other bits * akpm: (124 commits) user namespace: make signal.c respect user namespaces workqueue: make alloc_workqueue() take printf fmt and args for name procfs: add hidepid= and gid= mount options procfs: parse mount options procfs: introduce the /proc/<pid>/map_files/ directory procfs: make proc_get_link to use dentry instead of inode signal: add block_sigmask() for adding sigmask to current->blocked sparc: make SA_NOMASK a synonym of SA_NODEFER reiserfs: don't lock root inode searching reiserfs: don't lock journal_init() reiserfs: delay reiserfs lock until journal initialization reiserfs: delete comments referring to the BKL drivers/rtc/interface.c: fix alarm rollover when day or month is out-of-range drivers/rtc/rtc-twl.c: add DT support for RTC inside twl4030/twl6030 drivers/rtc/: remove redundant spi driver bus initialization drivers/rtc/rtc-jz4740.c: make jz4740_rtc_driver static drivers/rtc/rtc-mc13xxx.c: make mc13xxx_rtc_idtable static rtc: convert drivers/rtc/* to use module_platform_driver() drivers/rtc/rtc-wm831x.c: convert to devm_kzalloc() drivers/rtc/rtc-wm831x.c: remove unused period IRQ handler ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/proc/base.c447
-rw-r--r--fs/proc/inode.c18
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/root.c70
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/journal.c64
-rw-r--r--fs/reiserfs/super.c54
12 files changed, 587 insertions, 83 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7973b..e95d1b64082 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
+config ARCH_BINFMT_ELF_RANDOMIZE_PIE
+ bool
+
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 21ac5ee4b43..bcb884e2d61 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#if defined(CONFIG_X86) || defined(CONFIG_ARM)
+#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
/* Memory randomization might have been switched off
* in runtime via sysctl.
* If that is the case, retain the original non-zero
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97fbe939c05..20375e6691c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
again:
for (i = 0; i < num_pages; i++) {
pages[i] = find_or_create_page(inode->i_mapping, index + i,
- mask);
+ mask | __GFP_WRITE);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
diff --git a/fs/exec.c b/fs/exec.c
index 3f64b9f26e7..aeb135c7ff5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,6 +59,8 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
+
+#include <trace/events/task.h>
#include "internal.h"
int core_uses_pid;
@@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
+ trace_task_rename(tsk, buf);
+
/*
* Threads may access current->comm without holding
* the task lock, so write the string carefully.
diff --git a/fs/inode.c b/fs/inode.c
index 87535753ab0..4fa4f0916af 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -776,6 +776,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&sb->s_inode_lru_lock);
+ if (current->reclaim_state)
+ current->reclaim_state->reclaimed_slab += reap;
dispose_list(&freeable);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1dddda999f..8173dfd89cb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,9 +83,11 @@
#include <linux/pid_namespace.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/flex_array.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
+#include <trace/events/oom.h>
#include "internal.h"
/* NOTE:
@@ -133,6 +135,8 @@ struct pid_entry {
NULL, &proc_single_file_operations, \
{ .proc_show = show } )
+static int proc_fd_permission(struct inode *inode, int mask);
+
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
return result;
}
-static int proc_cwd_link(struct inode *inode, struct path *path)
+static int proc_cwd_link(struct dentry *dentry, struct path *path)
{
- struct task_struct *task = get_proc_task(inode);
+ struct task_struct *task = get_proc_task(dentry->d_inode);
int result = -ENOENT;
if (task) {
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
return result;
}
-static int proc_root_link(struct inode *inode, struct path *path)
+static int proc_root_link(struct dentry *dentry, struct path *path)
{
- struct task_struct *task = get_proc_task(inode);
+ struct task_struct *task = get_proc_task(dentry->d_inode);
int result = -ENOENT;
if (task) {
@@ -627,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
+/*
+ * May current process learn task's sched/cmdline info (for hide_pid_min=1)
+ * or euid/egid (for hide_pid_min=2)?
+ */
+static bool has_pid_permissions(struct pid_namespace *pid,
+ struct task_struct *task,
+ int hide_pid_min)
+{
+ if (pid->hide_pid < hide_pid_min)
+ return true;
+ if (in_group_p(pid->pid_gid))
+ return true;
+ return ptrace_may_access(task, PTRACE_MODE_READ);
+}
+
+
+static int proc_pid_permission(struct inode *inode, int mask)
+{
+ struct pid_namespace *pid = inode->i_sb->s_fs_info;
+ struct task_struct *task;
+ bool has_perms;
+
+ task = get_proc_task(inode);
+ has_perms = has_pid_permissions(pid, task, 1);
+ put_task_struct(task);
+
+ if (!has_perms) {
+ if (pid->hide_pid == 2) {
+ /*
+ * Let's make getdents(), stat(), and open()
+ * consistent with each other. If a process
+ * may not stat() a file, it shouldn't be seen
+ * in procfs at all.
+ */
+ return -ENOENT;
+ }
+
+ return -EPERM;
+ }
+ return generic_permission(inode, mask);
+}
+
+
+
static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
@@ -1010,6 +1058,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
else
task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
-OOM_DISABLE;
+ trace_oom_score_adj_update(task);
err_sighand:
unlock_task_sighand(task, &flags);
err_task_lock:
@@ -1097,6 +1146,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
task->signal->oom_score_adj = oom_score_adj;
if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
task->signal->oom_score_adj_min = oom_score_adj;
+ trace_oom_score_adj_update(task);
/*
* Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
* always attainable.
@@ -1453,13 +1503,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
.release = single_release,
};
-static int proc_exe_link(struct inode *inode, struct path *exe_path)
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
struct task_struct *task;
struct mm_struct *mm;
struct file *exe_file;
- task = get_proc_task(inode);
+ task = get_proc_task(dentry->d_inode);
if (!task)
return -ENOENT;
mm = get_task_mm(task);
@@ -1489,7 +1539,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
+ error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
out:
return ERR_PTR(error);
}
@@ -1528,7 +1578,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
if (!proc_fd_access_allowed(inode))
goto out;
- error = PROC_I(inode)->op.proc_get_link(inode, &path);
+ error = PROC_I(inode)->op.proc_get_link(dentry, &path);
if (error)
goto out;
@@ -1609,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
struct inode *inode = dentry->d_inode;
struct task_struct *task;
const struct cred *cred;
+ struct pid_namespace *pid = dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -1617,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
stat->gid = 0;
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
+ if (!has_pid_permissions(pid, task, 2)) {
+ rcu_read_unlock();
+ /*
+ * This doesn't prevent learning whether PID exists,
+ * it only makes getattr() consistent with readdir().
+ */
+ return -ENOENT;
+ }
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
task_dumpable(task)) {
cred = __task_cred(task);
@@ -1820,9 +1879,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
return -ENOENT;
}
-static int proc_fd_link(struct inode *inode, struct path *path)
+static int proc_fd_link(struct dentry *dentry, struct path *path)
{
- return proc_fd_info(inode, path, NULL);
+ return proc_fd_info(dentry->d_inode, path, NULL);
}
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -2043,6 +2102,355 @@ static const struct file_operations proc_fd_operations = {
.llseek = default_llseek,
};
+#ifdef CONFIG_CHECKPOINT_RESTORE
+
+/*
+ * dname_to_vma_addr - maps a dentry name into two unsigned longs
+ * which represent vma start and end addresses.
+ */
+static int dname_to_vma_addr(struct dentry *dentry,
+ unsigned long *start, unsigned long *end)
+{
+ if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ unsigned long vm_start, vm_end;
+ bool exact_vma_exists = false;
+ struct mm_struct *mm = NULL;
+ struct task_struct *task;
+ const struct cred *cred;
+ struct inode *inode;
+ int status = 0;
+
+ if (nd && nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ status = -EACCES;
+ goto out_notask;
+ }
+
+ inode = dentry->d_inode;
+ task = get_proc_task(inode);
+ if (!task)
+ goto out_notask;
+
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+
+ if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
+ down_read(&mm->mmap_sem);
+ exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
+ up_read(&mm->mmap_sem);
+ }
+
+ mmput(mm);
+
+ if (exact_vma_exists) {
+ if (task_dumpable(task)) {
+ rcu_read_lock();
+ cred = __task_cred(task);
+ inode->i_uid = cred->euid;
+ inode->i_gid = cred->egid;
+ rcu_read_unlock();
+ } else {
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ }
+ security_task_to_inode(task, inode);
+ status = 1;
+ }
+
+out:
+ put_task_struct(task);
+
+out_notask:
+ if (status <= 0)
+ d_drop(dentry);
+
+ return status;
+}
+
+static const struct dentry_operations tid_map_files_dentry_operations = {
+ .d_revalidate = map_files_d_revalidate,
+ .d_delete = pid_delete_dentry,
+};
+
+static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+{
+ unsigned long vm_start, vm_end;
+ struct vm_area_struct *vma;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ int rc;
+
+ rc = -ENOENT;
+ task = get_proc_task(dentry->d_inode);
+ if (!task)
+ goto out;
+
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
+
+ rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
+ if (rc)
+ goto out_mmput;
+
+ down_read(&mm->mmap_sem);
+ vma = find_exact_vma(mm, vm_start, vm_end);
+ if (vma && vma->vm_file) {
+ *path = vma->vm_file->f_path;
+ path_get(path);
+ rc = 0;
+ }
+ up_read(&mm->mmap_sem);
+
+out_mmput:
+ mmput(mm);
+out:
+ return rc;
+}
+
+struct map_files_info {
+ struct file *file;
+ unsigned long len;
+ unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
+};
+
+static struct dentry *
+proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+ struct task_struct *task, const void *ptr)
+{
+ const struct file *file = ptr;
+ struct proc_inode *ei;
+ struct inode *inode;
+
+ if (!file)
+ return ERR_PTR(-ENOENT);
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ return ERR_PTR(-ENOENT);
+
+ ei = PROC_I(inode);
+ ei->op.proc_get_link = proc_map_files_get_link;
+
+ inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_size = 64;
+ inode->i_mode = S_IFLNK;
+
+ if (file->f_mode & FMODE_READ)
+ inode->i_mode |= S_IRUSR;
+ if (file->f_mode & FMODE_WRITE)
+ inode->i_mode |= S_IWUSR;
+
+ d_set_d_op(dentry, &tid_map_files_dentry_operations);
+ d_add(dentry, inode);
+
+ return NULL;
+}
+
+static struct dentry *proc_map_files_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ unsigned long vm_start, vm_end;
+ struct vm_area_struct *vma;
+ struct task_struct *task;
+ struct dentry *result;
+ struct mm_struct *mm;
+
+ result = ERR_PTR(-EACCES);
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ result = ERR_PTR(-ENOENT);
+ task = get_proc_task(dir);
+ if (!task)
+ goto out;
+
+ result = ERR_PTR(-EACCES);
+ if (lock_trace(task))
+ goto out_put_task;
+
+ result = ERR_PTR(-ENOENT);
+ if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
+ goto out_unlock;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_unlock;
+
+ down_read(&mm->mmap_sem);
+ vma = find_exact_vma(mm, vm_start, vm_end);
+ if (!vma)
+ goto out_no_vma;
+
+ result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
+
+out_no_vma:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out_unlock:
+ unlock_trace(task);
+out_put_task:
+ put_task_struct(task);
+out:
+ return result;
+}
+
+static const struct inode_operations proc_map_files_inode_operations = {
+ .lookup = proc_map_files_lookup,
+ .permission = proc_fd_permission,
+ .setattr = proc_setattr,
+};
+
+static int
+proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
+ struct vm_area_struct *vma;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ ino_t ino;
+ int ret;
+
+ ret = -EACCES;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = -ENOENT;
+ task = get_proc_task(inode);
+ if (!task)
+ goto out;
+
+ ret = -EACCES;
+ if (lock_trace(task))
+ goto out_put_task;
+
+ ret = 0;
+ switch (filp->f_pos) {
+ case 0:
+ ino = inode->i_ino;
+ if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
+ goto out_unlock;
+ filp->f_pos++;
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+ goto out_unlock;
+ filp->f_pos++;
+ default:
+ {
+ unsigned long nr_files, pos, i;
+ struct flex_array *fa = NULL;
+ struct map_files_info info;
+ struct map_files_info *p;
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_unlock;
+ down_read(&mm->mmap_sem);
+
+ nr_files = 0;
+
+ /*
+ * We need two passes here:
+ *
+ * 1) Collect vmas of mapped files with mmap_sem taken
+ * 2) Release mmap_sem and instantiate entries
+ *
+ * otherwise we get lockdep complained, since filldir()
+ * routine might require mmap_sem taken in might_fault().
+ */
+
+ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+ if (vma->vm_file && ++pos > filp->f_pos)
+ nr_files++;
+ }
+
+ if (nr_files) {
+ fa = flex_array_alloc(sizeof(info), nr_files,
+ GFP_KERNEL);
+ if (!fa || flex_array_prealloc(fa, 0, nr_files,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ if (fa)
+ flex_array_free(fa);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ goto out_unlock;
+ }
+ for (i = 0, vma = mm->mmap, pos = 2; vma;
+ vma = vma->vm_next) {
+ if (!vma->vm_file)
+ continue;
+ if (++pos <= filp->f_pos)
+ continue;
+
+ get_file(vma->vm_file);
+ info.file = vma->vm_file;
+ info.len = snprintf(info.name,
+ sizeof(info.name), "%lx-%lx",
+ vma->vm_start, vma->vm_end);
+ if (flex_array_put(fa, i++, &info, GFP_KERNEL))
+ BUG();
+ }
+ }
+ up_read(&mm->mmap_sem);
+
+ for (i = 0; i < nr_files; i++) {
+ p = flex_array_get(fa, i);
+ ret = proc_fill_cache(filp, dirent, filldir,
+ p->name, p->len,
+ proc_map_files_instantiate,
+ task, p->file);
+ if (ret)
+ break;
+ filp->f_pos++;
+ fput(p->file);
+ }
+ for (; i < nr_files; i++) {
+ /*
+ * In case of error don't forget
+ * to put rest of file refs.
+ */
+ p = flex_array_get(fa, i);
+ fput(p->file);
+ }
+ if (fa)
+ flex_array_free(fa);
+ mmput(mm);
+ }
+ }
+
+out_unlock:
+ unlock_trace(task);
+out_put_task:
+ put_task_struct(task);
+out:
+ return ret;
+}
+
+static const struct file_operations proc_map_files_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_map_files_readdir,
+ .llseek = default_llseek,
+};
+
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
/*
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
@@ -2658,6 +3066,9 @@ static const struct inode_operations proc_task_inode_operations;
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
+#endif
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
@@ -2761,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
.lookup = proc_tgid_base_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
+ .permission = proc_pid_permission,
};
static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -2964,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
proc_pid_instantiate, iter.task, NULL);
}
+static int fake_filldir(void *buf, const char *name, int namelen,
+ loff_t offset, u64 ino, unsigned d_type)
+{
+ return 0;
+}
+
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
@@ -2971,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
struct task_struct *reaper;
struct tgid_iter iter;
struct pid_namespace *ns;
+ filldir_t __filldir;
if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
goto out_no_task;
@@ -2992,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
for (iter = next_tgid(ns, iter);
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
+ if (has_pid_permissions(ns, iter.task, 2))
+ __filldir = filldir;
+ else
+ __filldir = fake_filldir;
+
filp->f_pos = iter.tgid + TGID_OFFSET;
- if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
+ if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
put_task_struct(iter.task);
goto out;
}
@@ -3328,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = {
.lookup = proc_task_lookup,
.getattr = proc_task_getattr,
.setattr = proc_setattr,
+ .permission = proc_pid_permission,
};
static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 51a176622b8..84fd3235a59 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -7,6 +7,7 @@
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
+#include <linux/pid_namespace.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
@@ -17,7 +18,9 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sysctl.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/mount.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -101,12 +104,27 @@ void __init proc_init_inodecache(void)
init_once);
}
+static int proc_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct super_block *sb = root->d_sb;
+ struct pid_namespace *pid = sb->s_fs_info;
+
+ if (pid->pid_gid)
+ seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
+ if (pid->hide_pid != 0)
+ seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+
+ return 0;
+}
+
static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
.evict_inode = proc_evict_inode,
.statfs = simple_statfs,
+ .remount_fs = proc_remount,
+ .show_options = proc_show_options,
};
static void __pde_users_dec(struct proc_dir_entry *pde)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7838e5cfec1..292577531ad 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde);
int proc_fill_super(struct super_block *);
struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
+int proc_remount(struct super_block *sb, int *flags, char *data);
/*
* These are generic /proc routines that use the internal
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 03102d97818..46a15d8a29c 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
+#include <linux/parser.h>
#include "internal.h"
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data)
return err;
}
+enum {
+ Opt_gid, Opt_hidepid, Opt_err,
+};
+
+static const match_table_t tokens = {
+ {Opt_hidepid, "hidepid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_err, NULL},
+};
+
+static int proc_parse_options(char *options, struct pid_namespace *pid)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+
+ if (!options)
+ return 1;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+
+ args[0].to = args[0].from = 0;
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ pid->pid_gid = option;
+ break;
+ case Opt_hidepid:
+ if (match_int(&args[0], &option))
+ return 0;
+ if (option < 0 || option > 2) {
+ pr_err("proc: hidepid value must be between 0 and 2.\n");
+ return 0;
+ }
+ pid->hide_pid = option;
+ break;
+ default:
+ pr_err("proc: unrecognized mount option \"%s\" "
+ "or missing value\n", p);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int proc_remount(struct super_block *sb, int *flags, char *data)
+{
+ struct pid_namespace *pid = sb->s_fs_info;
+ return !proc_parse_options(data, pid);
+}
+
static struct dentry *proc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
struct super_block *sb;
struct pid_namespace *ns;
struct proc_inode *ei;
+ char *options;
- if (flags & MS_KERNMOUNT)
+ if (flags & MS_KERNMOUNT) {
ns = (struct pid_namespace *)data;
- else
+ options = NULL;
+ } else {
ns = current->nsproxy->pid_ns;
+ options = data;
+ }
sb = sget(fs_type, proc_test_super, proc_set_super, ns);
if (IS_ERR(sb))
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
if (!sb->s_root) {
sb->s_flags = flags;
+ if (!proc_parse_options(options, ns)) {
+ deactivate_locked_super(sb);
+ return ERR_PTR(-EINVAL);
+ }
err = proc_fill_super(sb);
if (err) {
deactivate_locked_super(sb);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a945cd26522..70de42f09f1 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1364,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
struct reiserfs_bitmap_info *bitmap;
unsigned int bmap_nr = reiserfs_bmap_count(sb);
- /* Avoid lock recursion in fault case */
- reiserfs_write_unlock(sb);
bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
- reiserfs_write_lock(sb);
if (bitmap == NULL)
return -ENOMEM;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index eb711060a6f..c3cf54fd4de 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
char b[BDEVNAME_SIZE];
int ret;
- /*
- * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
- * dependency inversion warnings.
- */
- reiserfs_write_unlock(sb);
journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
if (!journal) {
reiserfs_warning(sb, "journal-1256",
"unable to get memory for journal structure");
- reiserfs_write_lock(sb);
return 1;
}
INIT_LIST_HEAD(&journal->j_bitmap_nodes);
@@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
INIT_LIST_HEAD(&journal->j_working_list);
INIT_LIST_HEAD(&journal->j_journal_list);
journal->j_persistent_trans = 0;
- ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
- reiserfs_bmap_count(sb));
- reiserfs_write_lock(sb);
- if (ret)
+ if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
+ reiserfs_bmap_count(sb)))
goto free_and_return;
allocate_bitmap_nodes(sb);
@@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
- /*
- * We need to unlock here to avoid creating the following
- * dependency:
- * reiserfs_lock -> sysfs_mutex
- * Because the reiserfs mmap path creates the following dependency:
- * mm->mmap -> reiserfs_lock, hence we have
- * mm->mmap -> reiserfs_lock ->sysfs_mutex
- * This would ends up in a circular dependency with sysfs readdir path
- * which does sysfs_mutex -> mm->mmap_sem
- * This is fine because the reiserfs lock is useless in mount path,
- * at least until we call journal_begin. We keep it for paranoid
- * reasons.
- */
- reiserfs_write_unlock(sb);
if (journal_init_dev(sb, journal, j_dev_name) != 0) {
- reiserfs_write_lock(sb);
reiserfs_warning(sb, "sh-462",
"unable to initialize jornal device");
goto free_and_return;
}
- reiserfs_write_lock(sb);
rs = SB_DISK_SUPER_BLOCK(sb);
@@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
journal->j_mount_id = 10;
journal->j_state = 0;
atomic_set(&(journal->j_jlock), 0);
- reiserfs_write_unlock(sb);
journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
- reiserfs_write_lock(sb);
journal->j_cnode_free_orig = journal->j_cnode_free_list;
journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
journal->j_cnode_used = 0;
@@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
init_journal_hash(sb);
jl = journal->j_current_jl;
+
+ /*
+ * get_list_bitmap() may call flush_commit_list() which
+ * requires the lock. Calling flush_commit_list() shouldn't happen
+ * this early but I like to be paranoid.
+ */
+ reiserfs_write_lock(sb);
jl->j_list_bitmap = get_list_bitmap(sb, jl);
+ reiserfs_write_unlock(sb);
if (!jl->j_list_bitmap) {
reiserfs_warning(sb, "journal-2005",
"get_list_bitmap failed for journal list 0");
goto free_and_return;
}
- if (journal_read(sb) < 0) {
+
+ /*
+ * Journal_read needs to be inspected in order to push down
+ * the lock further inside (or even remove it).
+ */
+ reiserfs_write_lock(sb);
+ ret = journal_read(sb);
+ reiserfs_write_unlock(sb);
+ if (ret < 0) {
reiserfs_warning(sb, "reiserfs-2006",
"Replay Failure, unable to mount");
goto free_and_return;
}
reiserfs_mounted_fs_count++;
- if (reiserfs_mounted_fs_count <= 1) {
- reiserfs_write_unlock(sb);
+ if (reiserfs_mounted_fs_count <= 1)
commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
- reiserfs_write_lock(sb);
- }
INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
journal->j_work_sb = sb;
@@ -2896,14 +2883,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
journal->j_cnode_free < (journal->j_trans_max * 3)) {
return 1;
}
- /* protected by the BKL here */
+
journal->j_len_alloc += new_alloc;
th->t_blocks_allocated += new_alloc ;
return 0;
}
-/* this must be called inside a transaction, and requires the
-** kernel_lock to be held
+/* this must be called inside a transaction
*/
void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
{
@@ -2914,8 +2900,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
return;
}
-/* this must be called without a transaction started, and does not
-** require BKL
+/* this must be called without a transaction started
*/
void reiserfs_allow_writes(struct super_block *s)
{
@@ -2924,8 +2909,7 @@ void reiserfs_allow_writes(struct super_block *s)
wake_up(&journal->j_join_wait);
}
-/* this must be called without a transaction started, and does not
-** require BKL
+/* this must be called without a transaction started
*/
void reiserfs_wait_on_write_block(struct super_block *s)
{
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d42e707d5f..e12d8b97cd4 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1519,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset)
static int reread_meta_blocks(struct super_block *s)
{
ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
- reiserfs_write_unlock(s);
wait_on_buffer(SB_BUFFER_WITH_SB(s));
- reiserfs_write_lock(s);
if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
reiserfs_warning(s, "reiserfs-2504", "error reading the super");
return 1;
@@ -1746,22 +1744,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
mutex_init(&REISERFS_SB(s)->lock);
REISERFS_SB(s)->lock_depth = -1;
- /*
- * This function is called with the bkl, which also was the old
- * locking used here.
- * do_journal_begin() will soon check if we hold the lock (ie: was the
- * bkl). This is likely because do_journal_begin() has several another
- * callers because at this time, it doesn't seem to be necessary to
- * protect against anything.
- * Anyway, let's be conservative and lock for now.
- */
- reiserfs_write_lock(s);
-
jdev_name = NULL;
if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
&commit_max_age, qf_names, &qfmt) == 0) {
- goto error;
+ goto error_unlocked;
}
if (jdev_name && jdev_name[0]) {
REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
@@ -1777,7 +1764,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (blocks) {
SWARN(silent, s, "jmacd-7", "resize option for remount only");
- goto error;
+ goto error_unlocked;
}
/* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
@@ -1787,7 +1774,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
reiserfs_bdevname(s));
- goto error;
+ goto error_unlocked;
}
rs = SB_DISK_SUPER_BLOCK(s);
@@ -1803,7 +1790,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
"or increase size of your LVM partition");
SWARN(silent, s, "", "Or may be you forgot to "
"reboot after fdisk when it told you to");
- goto error;
+ goto error_unlocked;
}
sbi->s_mount_state = SB_REISERFS_STATE(s);
@@ -1811,8 +1798,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if ((errval = reiserfs_init_bitmap_cache(s))) {
SWARN(silent, s, "jmacd-8", "unable to read bitmap");
- goto error;
+ goto error_unlocked;
}
+
errval = -EINVAL;
#ifdef CONFIG_REISERFS_CHECK
SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
@@ -1835,24 +1823,26 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (reiserfs_barrier_flush(s)) {
printk("reiserfs: using flush barriers\n");
}
+
// set_device_ro(s->s_dev, 1) ;
if (journal_init(s, jdev_name, old_format, commit_max_age)) {
SWARN(silent, s, "sh-2022",
"unable to initialize journal space");
- goto error;
+ goto error_unlocked;
} else {
jinit_done = 1; /* once this is set, journal_release must be called
** if we error out of the mount
*/
}
+
if (reread_meta_blocks(s)) {
SWARN(silent, s, "jmacd-9",
"unable to reread meta blocks after journal init");
- goto error;
+ goto error_unlocked;
}
if (replay_only(s))
- goto error;
+ goto error_unlocked;
if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
SWARN(silent, s, "clm-7000",
@@ -1866,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
reiserfs_init_locked_inode, (void *)(&args));
if (!root_inode) {
SWARN(silent, s, "jmacd-10", "get root inode failed");
- goto error;
+ goto error_unlocked;
}
+ /*
+ * This path assumed to be called with the BKL in the old times.
+ * Now we have inherited the big reiserfs lock from it and many
+ * reiserfs helpers called in the mount path and elsewhere require
+ * this lock to be held even if it's not always necessary. Let's be
+ * conservative and hold it early. The window can be reduced after
+ * careful review of the code.
+ */
+ reiserfs_write_lock(s);
+
if (root_inode->i_state & I_NEW) {
reiserfs_read_locked_inode(root_inode, &args);
unlock_new_inode(root_inode);
@@ -1995,12 +1995,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
return (0);
error:
- if (jinit_done) { /* kill the commit thread, free journal ram */
+ reiserfs_write_unlock(s);
+
+error_unlocked:
+ /* kill the commit thread, free journal ram */
+ if (jinit_done) {
+ reiserfs_write_lock(s);
journal_release_error(NULL, s);
+ reiserfs_write_unlock(s);
}
- reiserfs_write_unlock(s);
-
reiserfs_free_bitmap_cache(s);
if (SB_BUFFER_WITH_SB(s))
brelse(SB_BUFFER_WITH_SB(s));