From 07543f5c75cee744b791cf7716c69571486fe753 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 23:40:08 -0700 Subject: pid namespaces: make proc have multiple superblocks - one for each namespace Each pid namespace have to be visible through its own proc mount. Thus we need to have per-namespace proc trees with their own superblocks. We cannot easily show different pid namespace via one global proc tree, since each pid refers to different tasks in different namespaces. E.g. pid 1 refers to the init task in the initial namespace and to some other task when seeing from another namespace. Moreover - pid, exisintg in one namespace may not exist in the other. This approach has one move advantage is that the tasks from the init namespace can see what tasks live in another namespace by reading entries from another proc tree. Signed-off-by: Pavel Emelyanov Cc: Oleg Nesterov Cc: Sukadev Bhattiprolu Cc: Paul Menage Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/root.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 4 deletions(-) (limited to 'fs/proc/root.c') diff --git a/fs/proc/root.c b/fs/proc/root.c index cf3046638b0..94e9d734384 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,32 +18,90 @@ #include #include #include +#include #include "internal.h" struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; +static int proc_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_set_super(struct super_block *sb, void *data) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + return set_anon_super(sb, NULL); +} + static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { + int err; + struct super_block *sb; + struct pid_namespace *ns; + struct proc_inode *ei; + if (proc_mnt) { /* Seed the root directory with a pid so it doesn't need * to be special in base.c. I would do this earlier but * the only task alive when /proc is mounted the first time * is the init_task and it doesn't have any pids. */ - struct proc_inode *ei; ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); if (!ei->pid) ei->pid = find_get_pid(1); } - return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); + + if (flags & MS_KERNMOUNT) + ns = (struct pid_namespace *)data; + else + ns = current->nsproxy->pid_ns; + + sb = sget(fs_type, proc_test_super, proc_set_super, ns); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + sb->s_flags = flags; + err = proc_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + ei = PROC_I(sb->s_root->d_inode); + if (!ei->pid) { + rcu_read_lock(); + ei->pid = get_pid(find_pid_ns(1, ns)); + rcu_read_unlock(); + } + + sb->s_flags |= MS_ACTIVE; + ns->proc_mnt = mnt; + } + + return simple_set_mnt(mnt, sb); +} + +static void proc_kill_sb(struct super_block *sb) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)sb->s_fs_info; + kill_anon_super(sb); + put_pid_ns(ns); } static struct file_system_type proc_fs_type = { .name = "proc", .get_sb = proc_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = proc_kill_sb, }; void __init proc_root_init(void) @@ -54,12 +112,13 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - proc_mnt = kern_mount(&proc_fs_type); + proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); err = PTR_ERR(proc_mnt); if (IS_ERR(proc_mnt)) { unregister_filesystem(&proc_fs_type); return; } + proc_misc_init(); proc_net_init(); -- cgit v1.2.3