From 36885d7b1121c779e4060d45472fe53a5b21e09f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 10 Jun 2011 02:36:05 -0300 Subject: sysctl: remove impossible condition check Remove checks for conditions that will never happen. If procname is NULL the loop would already had bailed out, so there's no need to check it again. At the same time this also compacts the function find_in_table() by refactoring it to be easier to read. Signed-off-by: Lucas De Marchi Reviewed-by: Jesper Juhl Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a6b62173d4c..d82f4a8b4b8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -59,17 +59,11 @@ out: static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { - int len; for ( ; p->procname; p++) { - - if (!p->procname) - continue; - - len = strlen(p->procname); - if (len != name->len) + if (strlen(p->procname) != name->len) continue; - if (memcmp(p->procname, name->name, len) != 0) + if (memcmp(p->procname, name->name, name->len) != 0) continue; /* I have a match */ @@ -266,10 +260,6 @@ static int scan(struct ctl_table_header *head, ctl_table *table, for (; table->procname; table++, (*pos)++) { int res; - /* Can't do anything without a proc name */ - if (!table->procname) - continue; - if (*pos < file->f_pos) continue; -- cgit v1.2.3 From de4e83bd6b5e16d491ec068cd22801d5d063b07a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 03:34:20 -0800 Subject: sysctl: Register the base sysctl table like any other sysctl table. Simplify the code by treating the base sysctl table like any other sysctl table and register it with register_sysctl_table. To ensure this table is registered early enough to avoid problems call sysctl_init from proc_sys_init. Rename sysctl_net.c:sysctl_init() to net_sysctl_init() to avoid name conflicts now that kernel/sysctl.c:sysctl_init() is no longer static. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d82f4a8b4b8..9d29d28af57 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -468,5 +468,6 @@ int __init proc_sys_init(void) proc_sys_root->proc_iops = &proc_sys_dir_operations; proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; - return 0; + + return sysctl_init(); } -- cgit v1.2.3 From 1f87f0b52b1d6581168cb80f86746bc4df918d01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 04:07:15 -0800 Subject: sysctl: Move the implementation into fs/proc/proc_sysctl.c Move the core sysctl code from kernel/sysctl.c and kernel/sysctl_check.c into fs/proc/proc_sysctl.c. Currently sysctl maintenance is hampered by the sysctl implementation being split across 3 files with artificial layering between them. Consolidate the entire sysctl implementation into 1 file so that it is easier to see what is going on and hopefully allowing for simpler maintenance. For functions that are now only used in fs/proc/proc_sysctl.c remove their declarations from sysctl.h and make them static in fs/proc/proc_sysctl.c Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 622 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 622 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d29d28af57..06e6f10ee8e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -24,6 +25,209 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) wake_up_interruptible(&poll->wait); } +static struct ctl_table root_table[1]; +static struct ctl_table_root sysctl_table_root; +static struct ctl_table_header root_table_header = { + {{.count = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, +}; +static struct ctl_table_root sysctl_table_root = { + .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), +}; + +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + +static void sysctl_head_get(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree_rcu(head, rcu); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + +static void sysctl_head_finish(struct ctl_table_header *head) +{ + if (!head) + return; + spin_lock(&sysctl_lock); + unuse_table(head); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + +static struct list_head * +lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; +} + +static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev) +{ + struct ctl_table_root *root; + struct list_head *header_list; + struct ctl_table_header *head; + struct list_head *tmp; + + spin_lock(&sysctl_lock); + if (prev) { + head = prev; + tmp = &prev->ctl_entry; + unuse_table(prev); + goto next; + } + tmp = &root_table_header.ctl_entry; + for (;;) { + head = list_entry(tmp, struct ctl_table_header, ctl_entry); + + if (!use_table(head)) + goto next; + spin_unlock(&sysctl_lock); + return head; + next: + root = head->root; + tmp = tmp->next; + header_list = lookup_header_list(root, namespaces); + if (tmp != header_list) + continue; + + do { + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + if (root == &sysctl_table_root) + goto out; + header_list = lookup_header_list(root, namespaces); + } while (list_empty(header_list)); + tmp = header_list->next; + } +out: + spin_unlock(&sysctl_lock); + return NULL; +} + +static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +{ + return __sysctl_head_next(current->nsproxy, prev); +} + +void register_sysctl_root(struct ctl_table_root *root) +{ + spin_lock(&sysctl_lock); + list_add_tail(&root->root_list, &sysctl_table_root.root_list); + spin_unlock(&sysctl_lock); +} + +/* + * sysctl_perm does NOT grant the superuser all rights automatically, because + * some sysctl variables are readonly even to root. + */ + +static int test_perm(int mode, int op) +{ + if (!current_euid()) + mode >>= 6; + else if (in_egroup_p(0)) + mode >>= 3; + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) + return 0; + return -EACCES; +} + +static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) +{ + int mode; + + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; + + return test_perm(mode, op); +} + +static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) +{ + for (; table->procname; table++) { + table->parent = parent; + if (table->child) + sysctl_set_parent(table, table->child); + } +} + + static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -435,6 +639,21 @@ static int proc_sys_delete(const struct dentry *dentry) return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } +static int sysctl_is_seen(struct ctl_table_header *p) +{ + struct ctl_table_set *set = p->set; + int res; + spin_lock(&sysctl_lock); + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); + spin_unlock(&sysctl_lock); + return res; +} + static int proc_sys_compare(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -460,6 +679,409 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return NULL; + + /* ... and nothing else */ + if (branch[1].procname) + return NULL; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return p; + } + return NULL; +} + +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while ((next = is_branch_in(by, to)) != NULL) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = next->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; + } +} + +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK +static int sysctl_depth(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth; + + depth = 0; + for (tmp = table; tmp->parent; tmp = tmp->parent) + depth++; + + return depth; +} + +static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +{ + int i; + + for (i = 0; table && i < n; i++) + table = table->parent; + + return table; +} + + +static void sysctl_print_path(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth, i; + depth = sysctl_depth(table); + if (table->procname) { + for (i = depth; i >= 0; i--) { + tmp = sysctl_parent(table, i); + printk("/%s", tmp->procname?tmp->procname:""); + } + } + printk(" "); +} + +static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table_header *head; + struct ctl_table *ref, *test; + int depth, cur_depth; + + depth = sysctl_depth(table); + + for (head = __sysctl_head_next(namespaces, NULL); head; + head = __sysctl_head_next(namespaces, head)) { + cur_depth = depth; + ref = head->ctl_table; +repeat: + test = sysctl_parent(table, cur_depth); + for (; ref->procname; ref++) { + int match = 0; + if (cur_depth && !ref->child) + continue; + + if (test->procname && ref->procname && + (strcmp(test->procname, ref->procname) == 0)) + match++; + + if (match) { + if (cur_depth != 0) { + cur_depth--; + ref = ref->child; + goto repeat; + } + goto out; + } + } + } + ref = NULL; +out: + sysctl_head_finish(head); + return ref; +} + +static void set_fail(const char **fail, struct ctl_table *table, const char *str) +{ + if (*fail) { + printk(KERN_ERR "sysctl table check failed: "); + sysctl_print_path(table); + printk(" %s\n", *fail); + dump_stack(); + } + *fail = str; +} + +static void sysctl_check_leaf(struct nsproxy *namespaces, + struct ctl_table *table, const char **fail) +{ + struct ctl_table *ref; + + ref = sysctl_check_lookup(namespaces, table); + if (ref && (ref != table)) + set_fail(fail, table, "Sysctl already exists"); +} + +static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +{ + int error = 0; + for (; table->procname; table++) { + const char *fail = NULL; + + if (table->parent) { + if (!table->parent->procname) + set_fail(&fail, table, "Parent without procname"); + } + if (table->child) { + if (table->data) + set_fail(&fail, table, "Directory with data?"); + if (table->maxlen) + set_fail(&fail, table, "Directory with maxlen?"); + if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) + set_fail(&fail, table, "Writable sysctl directory"); + if (table->proc_handler) + set_fail(&fail, table, "Directory with proc_handler"); + if (table->extra1) + set_fail(&fail, table, "Directory with extra1"); + if (table->extra2) + set_fail(&fail, table, "Directory with extra2"); + } else { + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + set_fail(&fail, table, "No data"); + if (!table->maxlen) + set_fail(&fail, table, "No maxlen"); + } +#ifdef CONFIG_PROC_SYSCTL + if (!table->proc_handler) + set_fail(&fail, table, "No proc_handler"); +#endif + sysctl_check_leaf(namespaces, table, &fail); + } + if (table->mode > 0777) + set_fail(&fail, table, "bogus .mode"); + if (fail) { + set_fail(&fail, table, NULL); + error = -EINVAL; + } + if (table->child) + error |= sysctl_check_table(namespaces, table->child); + } + return error; +} +#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ + +/** + * __register_sysctl_paths - register a sysctl hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * The members of the &struct ctl_table structure are used as follows: + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), + * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header; + struct ctl_table *new, **prevp; + unsigned int n, npath; + struct ctl_table_set *set; + + /* Count the path components */ + for (npath = 0; path[npath].procname; ++npath) + ; + + /* + * For each path component, allocate a 2-element ctl_table array. + * The first array element will be filled with the sysctl entry + * for this, the second will be the sentinel (procname == 0). + * + * We allocate everything in one go so that we don't have to + * worry about freeing additional memory in unregister_sysctl_table. + */ + header = kzalloc(sizeof(struct ctl_table_header) + + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + if (!header) + return NULL; + + new = (struct ctl_table *) (header + 1); + + /* Now connect the dots */ + prevp = &header->ctl_table; + for (n = 0; n < npath; ++n, ++path) { + /* Copy the procname */ + new->procname = path->procname; + new->mode = 0555; + + *prevp = new; + prevp = &new->child; + + new += 2; + } + *prevp = table; + header->ctl_table_arg = table; + + INIT_LIST_HEAD(&header->ctl_entry); + header->used = 0; + header->unregistering = NULL; + header->root = root; + sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + if (sysctl_check_table(namespaces, header->ctl_table)) { + kfree(header); + return NULL; + } +#endif + spin_lock(&sysctl_lock); + header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); + spin_unlock(&sysctl_lock); + + return header; +} + +/** + * register_sysctl_table_path - register a sysctl table hierarchy + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) +{ + return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + path, table); +} +EXPORT_SYMBOL(register_sysctl_paths); + +/** + * register_sysctl_table - register a sysctl table hierarchy + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_table(struct ctl_table *table) +{ + static const struct ctl_path null_path[] = { {} }; + + return register_sysctl_paths(null_path, table); +} +EXPORT_SYMBOL(register_sysctl_table); + +/** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + might_sleep(); + + if (header == NULL) + return; + + spin_lock(&sysctl_lock); + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); + spin_unlock(&sysctl_lock); +} +EXPORT_SYMBOL(unregister_sysctl_table); + +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + + int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; -- cgit v1.2.3 From a15e20982e2fbb06e85da584a0f150784042c17d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Jan 2012 00:16:29 -0800 Subject: sysctl: Make the directories have nlink == 1 I goofed when I made sysctl directories have nlink == 0. nlink == 0 means the directory has been deleted. nlink == 1 meands a directory does not count subdirectories. Use the default nlink == 1 for sysctl directories. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 06e6f10ee8e..f6aa75111b4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -253,7 +253,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_fop = &proc_sys_file_operations; } else { inode->i_mode |= S_IFDIR; - clear_nlink(inode); inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; } -- cgit v1.2.3 From 97324cd804b7b9fb6044e114329335db79810425 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 22:19:13 -0800 Subject: sysctl: Implement retire_sysctl_set This adds a small helper retire_sysctl_set to remove the intimate knowledge about the how a sysctl_set is implemented from net/sysct_net.c Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f6aa75111b4..9d8223cd365 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1080,6 +1080,10 @@ void setup_sysctl_set(struct ctl_table_set *p, p->is_seen = is_seen; } +void retire_sysctl_set(struct ctl_table_set *set) +{ + WARN_ON(!list_empty(&set->list)); +} int __init proc_sys_init(void) { -- cgit v1.2.3 From bd295b56cfae85f2dd6c2b03951480c91e6d08f3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:10:21 -0800 Subject: sysctl: Remove the unnecessary sysctl_set parent concept. In sysctl_net register the two networking roots in the proper order. In register_sysctl walk the sysctl sets in the reverse order of the sysctl roots. Remove parent from ctl_table_set and setup_sysctl_set as it is no longer needed. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d8223cd365..86d32a318e2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -995,13 +995,20 @@ struct ctl_table_header *__register_sysctl_paths( header->attached_by = header->ctl_table; header->attached_to = root_table; header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { + set = header->set; + root = header->root; + for (;;) { struct ctl_table_header *p; list_for_each_entry(p, &set->list, ctl_entry) { if (p->unregistering) continue; try_attach(p, header); } + if (root == &sysctl_table_root) + break; + root = list_entry(root->root_list.prev, + struct ctl_table_root, root_list); + set = lookup_header_set(root, namespaces); } header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); @@ -1072,11 +1079,9 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; p->is_seen = is_seen; } -- cgit v1.2.3 From f05e53a7fbb28c951c0c8cf3963fa8019ae1d4d3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 10:03:13 -0800 Subject: sysctl: Create local copies of directory names used in paths Creating local copies of directory names is a good idea for two reasons. - The dynamic names used by callers must be copied into new strings by the callers today to ensure the strings do not change between register and unregister of the sysctl table. - Sysctl directories have a potentially different lifetime than the time between register and unregister of any particular sysctl table. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 86d32a318e2..bcf60fb8dce 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -943,10 +943,12 @@ struct ctl_table_header *__register_sysctl_paths( struct ctl_table *new, **prevp; unsigned int n, npath; struct ctl_table_set *set; + size_t path_bytes = 0; + char *new_name; /* Count the path components */ for (npath = 0; path[npath].procname; ++npath) - ; + path_bytes += strlen(path[npath].procname) + 1; /* * For each path component, allocate a 2-element ctl_table array. @@ -956,24 +958,27 @@ struct ctl_table_header *__register_sysctl_paths( * We allocate everything in one go so that we don't have to * worry about freeing additional memory in unregister_sysctl_table. */ - header = kzalloc(sizeof(struct ctl_table_header) + + header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); if (!header) return NULL; new = (struct ctl_table *) (header + 1); + new_name = (char *)(new + (2 * npath)); /* Now connect the dots */ prevp = &header->ctl_table; for (n = 0; n < npath; ++n, ++path) { /* Copy the procname */ - new->procname = path->procname; + strcpy(new_name, path->procname); + new->procname = new_name; new->mode = 0555; *prevp = new; prevp = &new->child; new += 2; + new_name += strlen(new_name) + 1; } *prevp = table; header->ctl_table_arg = table; -- cgit v1.2.3 From 6e9d5164153ad6539edd31e7afb02a3e79124cad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 10:26:26 -0800 Subject: sysctl: Add support for register sysctl tables with a normal cstring path. Make __register_sysctl_table the core sysctl registration operation and make it take a char * string as path. Now that binary paths have been banished into the real of backwards compatibility in kernel/binary_sysctl.c where they can be safely ignored there is no longer a need to use struct ctl_path to represent path names when registering ctl_tables. Start the transition to using normal char * strings to represent pathnames when registering sysctl tables. Normal strings are easier to deal with both in the internal sysctl implementation and for programmers registering sysctl tables. __register_sysctl_paths is turned into a backwards compatibility wrapper that converts a ctl_path array into a normal char * string. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 94 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 10 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index bcf60fb8dce..5704ff0e889 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_paths - register a sysctl hierarchy + * __register_sysctl_table - register a sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -934,21 +934,34 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *__register_sysctl_paths( +struct ctl_table_header *__register_sysctl_table( struct ctl_table_root *root, struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) + const char *path, struct ctl_table *table) { struct ctl_table_header *header; struct ctl_table *new, **prevp; - unsigned int n, npath; + const char *name, *nextname; + unsigned int npath = 0; struct ctl_table_set *set; size_t path_bytes = 0; char *new_name; /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - path_bytes += strlen(path[npath].procname) + 1; + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + path_bytes += namelen + 1; + npath++; + } /* * For each path component, allocate a 2-element ctl_table array. @@ -968,9 +981,20 @@ struct ctl_table_header *__register_sysctl_paths( /* Now connect the dots */ prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - strcpy(new_name, path->procname); + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + new->procname = new_name; new->mode = 0555; @@ -978,7 +1002,7 @@ struct ctl_table_header *__register_sysctl_paths( prevp = &new->child; new += 2; - new_name += strlen(new_name) + 1; + new_name += namelen + 1; } *prevp = table; header->ctl_table_arg = table; @@ -1022,6 +1046,56 @@ struct ctl_table_header *__register_sysctl_paths( return header; } +static char *append_path(const char *path, char *pos, const char *name) +{ + int namelen; + namelen = strlen(name); + if (((pos - path) + namelen + 2) >= PATH_MAX) + return NULL; + memcpy(pos, name, namelen); + pos[namelen] = '/'; + pos[namelen + 1] = '\0'; + pos += namelen + 1; + return pos; +} + +/** + * __register_sysctl_paths - register a sysctl table hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header = NULL; + const struct ctl_path *component; + char *new_path, *pos; + + pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!new_path) + return NULL; + + pos[0] = '\0'; + for (component = path; component->procname; component++) { + pos = append_path(new_path, pos, component->procname); + if (!pos) + goto out; + } + header = __register_sysctl_table(root, namespaces, new_path, table); +out: + kfree(new_path); + return header; +} + /** * register_sysctl_table_path - register a sysctl table hierarchy * @path: The path to the directory the sysctl table is in. -- cgit v1.2.3 From ec6a52668d0bbc6d648e978c327150254bf1ce7f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 12:35:23 -0800 Subject: sysctl: Add ctl_table chains into cstring paths For any component of table passed to __register_sysctl_paths that actually serves as a path, add that to the cstring path that is passed to __register_sysctl_table. The result is that for most calls to __register_sysctl_paths we only pass a table to __register_sysctl_table that contains no child directories. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5704ff0e889..9b91deeeb56 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1076,6 +1076,7 @@ struct ctl_table_header *__register_sysctl_paths( struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table) { + struct ctl_table *ctl_table_arg = table; struct ctl_table_header *header = NULL; const struct ctl_path *component; char *new_path, *pos; @@ -1090,7 +1091,15 @@ struct ctl_table_header *__register_sysctl_paths( if (!pos) goto out; } + while (table->procname && table->child && !table[1].procname) { + pos = append_path(new_path, pos, table->procname); + if (!pos) + goto out; + table = table->child; + } header = __register_sysctl_table(root, namespaces, new_path, table); + if (header) + header->ctl_table_arg = ctl_table_arg; out: kfree(new_path); return header; -- cgit v1.2.3 From f728019bb72e655680c02ad1829323054a8e875f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 18:22:05 -0800 Subject: sysctl: register only tables of sysctl files Split the registration of a complex ctl_table array which may have arbitrary numbers of directories (->child != NULL) and tables of files into a series of simpler registrations that only register tables of files. Graphically: register('dir', { + file-a + file-b + subdir1 + file-c + subdir2 + file-d + file-e }) is transformed into: wrapper->subheaders[0] = register('dir', {file1-a, file1-b}) wrapper->subheaders[1] = register('dir/subdir1', {file-c}) wrapper->subheaders[2] = register('dir/subdir2', {file-d, file-e}) return wrapper This guarantees that __register_sysctl_table will only see a simple ctl_table array with all entries having (->child == NULL). Care was taken to pass the original simple ctl_table arrays to __register_sysctl_table whenever possible. This change is derived from a similar patch written by Lucrian Grijincu. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 165 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 147 insertions(+), 18 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b91deeeb56..6bab2ae9e39 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_table - register a sysctl table + * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -900,29 +900,19 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * * maxlen - the maximum size in bytes of the data * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * mode - the file permissions for the /proc/sys file * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. + * child - must be %NULL. * * proc_handler - the text handler routine (described below) * - * de - for internal use by the sysctl routines - * * extra1, extra2 - extra pointers usable by the proc handler routines * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - + * There must be a proc_handler routine for any terminal nodes. + * Several default handlers are available to cover common cases - * * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), @@ -1059,6 +1049,100 @@ static char *append_path(const char *path, char *pos, const char *name) return pos; } +static int count_subheaders(struct ctl_table *table) +{ + int has_files = 0; + int nr_subheaders = 0; + struct ctl_table *entry; + + /* special case: no directory and empty directory */ + if (!table || !table->procname) + return 1; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_subheaders += count_subheaders(entry->child); + else + has_files = 1; + } + return nr_subheaders + has_files; +} + +static int register_leaf_sysctl_tables(const char *path, char *pos, + struct ctl_table_header ***subheader, + struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table *ctl_table_arg = NULL; + struct ctl_table *entry, *files; + int nr_files = 0; + int nr_dirs = 0; + int err = -ENOMEM; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_dirs++; + else + nr_files++; + } + + files = table; + /* If there are mixed files and directories we need a new table */ + if (nr_dirs && nr_files) { + struct ctl_table *new; + files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), + GFP_KERNEL); + if (!files) + goto out; + + ctl_table_arg = files; + for (new = files, entry = table; entry->procname; entry++) { + if (entry->child) + continue; + *new = *entry; + new++; + } + } + + /* Register everything except a directory full of subdirectories */ + if (nr_files || !nr_dirs) { + struct ctl_table_header *header; + header = __register_sysctl_table(root, namespaces, path, files); + if (!header) { + kfree(ctl_table_arg); + goto out; + } + + /* Remember if we need to free the file table */ + header->ctl_table_arg = ctl_table_arg; + **subheader = header; + (*subheader)++; + } + + /* Recurse into the subdirectories. */ + for (entry = table; entry->procname; entry++) { + char *child_pos; + + if (!entry->child) + continue; + + err = -ENAMETOOLONG; + child_pos = append_path(path, pos, entry->procname); + if (!child_pos) + goto out; + + err = register_leaf_sysctl_tables(path, child_pos, subheader, + root, namespaces, entry->child); + pos[0] = '\0'; + if (err) + goto out; + } + err = 0; +out: + /* On failure our caller will unregister all registered subheaders */ + return err; +} + /** * __register_sysctl_paths - register a sysctl table hierarchy * @root: List of sysctl headers to register on @@ -1077,7 +1161,8 @@ struct ctl_table_header *__register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; - struct ctl_table_header *header = NULL; + int nr_subheaders = count_subheaders(table); + struct ctl_table_header *header = NULL, **subheaders, **subheader; const struct ctl_path *component; char *new_path, *pos; @@ -1097,12 +1182,39 @@ struct ctl_table_header *__register_sysctl_paths( goto out; table = table->child; } - header = __register_sysctl_table(root, namespaces, new_path, table); - if (header) + if (nr_subheaders == 1) { + header = __register_sysctl_table(root, namespaces, new_path, table); + if (header) + header->ctl_table_arg = ctl_table_arg; + } else { + header = kzalloc(sizeof(*header) + + sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); + if (!header) + goto out; + + subheaders = (struct ctl_table_header **) (header + 1); + subheader = subheaders; header->ctl_table_arg = ctl_table_arg; + + if (register_leaf_sysctl_tables(new_path, pos, &subheader, + root, namespaces, table)) + goto err_register_leaves; + } + out: kfree(new_path); return header; + +err_register_leaves: + while (subheader > subheaders) { + struct ctl_table_header *subh = *(--subheader); + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + header = NULL; + goto out; } /** @@ -1149,11 +1261,28 @@ EXPORT_SYMBOL(register_sysctl_table); */ void unregister_sysctl_table(struct ctl_table_header * header) { + int nr_subheaders; might_sleep(); if (header == NULL) return; + nr_subheaders = count_subheaders(header->ctl_table_arg); + if (unlikely(nr_subheaders > 1)) { + struct ctl_table_header **subheaders; + int i; + + subheaders = (struct ctl_table_header **)(header + 1); + for (i = nr_subheaders -1; i >= 0; i--) { + struct ctl_table_header *subh = subheaders[i]; + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + return; + } + spin_lock(&sysctl_lock); start_unregistering(header); if (!--header->parent->count) { -- cgit v1.2.3 From 7c60c48f58a78195acc1f71c9a9d01958c02ab89 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 13:34:05 -0800 Subject: sysctl: Improve the sysctl sanity checks - Stop validating subdirectories now that we only register leaf tables - Cleanup and improve the duplicate filename check. * Run the duplicate filename check under the sysctl_lock to guarantee we never add duplicate names. * Reduce the duplicate filename check to nearly O(M*N) where M is the number of entries in tthe table we are registering and N is the number of entries in the directory before we got there. - Move the duplicate filename check into it's own function and call it directtly from __register_sysctl_table - Kill the config option as the sanity checks are now cheap enough the config option is unnecessary. The original reason for the config option was because we had a huge table used to verify the proc filename to binary sysctl mapping. That table has now evolved into the binary_sysctl translation layer and is no longer part of the sysctl_check code. - Tighten up the permission checks. Guarnateeing that files only have read or write permissions. - Removed redudant check for parents having a procname as now everything has a procname. - Generalize the backtrace logic so that we print a backtrace from any failure of __register_sysctl_table that was not caused by a memmory allocation failure. The backtrace allows us to track down who erroneously registered a sysctl table. Bechmark before (CONFIG_SYSCTL_CHECK=y): make-dummies 0 999 -> 12s rmmod dummy -> 0.08s Bechmark before (CONFIG_SYSCTL_CHECK=n): make-dummies 0 999 -> 0.7s rmmod dummy -> 0.06s make-dummies 0 99999 -> 1m13s rmmod dummy -> 0.38s Benchmark after: make-dummies 0 999 -> 0.65s rmmod dummy -> 0.055s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.39s The sysctl sanity checks now impose no measurable cost. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 222 +++++++++++++++++++------------------------------- 1 file changed, 86 insertions(+), 136 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6bab2ae9e39..a492ff60e07 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) } } -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK -static int sysctl_depth(struct ctl_table *table) +static int sysctl_check_table_dups(const char *path, struct ctl_table *old, + struct ctl_table *table) { - struct ctl_table *tmp; - int depth; - - depth = 0; - for (tmp = table; tmp->parent; tmp = tmp->parent) - depth++; + struct ctl_table *entry, *test; + int error = 0; - return depth; + for (entry = old; entry->procname; entry++) { + for (test = table; test->procname; test++) { + if (strcmp(entry->procname, test->procname) == 0) { + printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", + path, test->procname); + error = -EEXIST; + } + } + } + return error; } -static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +static int sysctl_check_dups(struct nsproxy *namespaces, + struct ctl_table_header *header, + const char *path, struct ctl_table *table) { - int i; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table_header *dir_head, *head; + struct ctl_table *dir_table; + int error = 0; - for (i = 0; table && i < n; i++) - table = table->parent; + /* No dups if we are the only member of our directory */ + if (header->attached_by != table) + return 0; - return table; -} + dir_head = header->parent; + dir_table = header->attached_to; + error = sysctl_check_table_dups(path, dir_table, table); -static void sysctl_print_path(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth, i; - depth = sysctl_depth(table); - if (table->procname) { - for (i = depth; i >= 0; i--) { - tmp = sysctl_parent(table, i); - printk("/%s", tmp->procname?tmp->procname:""); - } - } - printk(" "); -} + root = &sysctl_table_root; + do { + set = lookup_header_set(root, namespaces); -static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, - struct ctl_table *table) -{ - struct ctl_table_header *head; - struct ctl_table *ref, *test; - int depth, cur_depth; - - depth = sysctl_depth(table); - - for (head = __sysctl_head_next(namespaces, NULL); head; - head = __sysctl_head_next(namespaces, head)) { - cur_depth = depth; - ref = head->ctl_table; -repeat: - test = sysctl_parent(table, cur_depth); - for (; ref->procname; ref++) { - int match = 0; - if (cur_depth && !ref->child) + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) continue; - - if (test->procname && ref->procname && - (strcmp(test->procname, ref->procname) == 0)) - match++; - - if (match) { - if (cur_depth != 0) { - cur_depth--; - ref = ref->child; - goto repeat; - } - goto out; - } + if (head->attached_to != dir_table) + continue; + error = sysctl_check_table_dups(path, head->attached_by, + table); } - } - ref = NULL; -out: - sysctl_head_finish(head); - return ref; + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + } while (root != &sysctl_table_root); + return error; } -static void set_fail(const char **fail, struct ctl_table *table, const char *str) +static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { - if (*fail) { - printk(KERN_ERR "sysctl table check failed: "); - sysctl_print_path(table); - printk(" %s\n", *fail); - dump_stack(); - } - *fail = str; -} + struct va_format vaf; + va_list args; -static void sysctl_check_leaf(struct nsproxy *namespaces, - struct ctl_table *table, const char **fail) -{ - struct ctl_table *ref; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", + path, table->procname, &vaf); - ref = sysctl_check_lookup(namespaces, table); - if (ref && (ref != table)) - set_fail(fail, table, "Sysctl already exists"); + va_end(args); + return -EINVAL; } -static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +static int sysctl_check_table(const char *path, struct ctl_table *table) { - int error = 0; + int err = 0; for (; table->procname; table++) { - const char *fail = NULL; - - if (table->parent) { - if (!table->parent->procname) - set_fail(&fail, table, "Parent without procname"); - } - if (table->child) { - if (table->data) - set_fail(&fail, table, "Directory with data?"); - if (table->maxlen) - set_fail(&fail, table, "Directory with maxlen?"); - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) - set_fail(&fail, table, "Writable sysctl directory"); - if (table->proc_handler) - set_fail(&fail, table, "Directory with proc_handler"); - if (table->extra1) - set_fail(&fail, table, "Directory with extra1"); - if (table->extra2) - set_fail(&fail, table, "Directory with extra2"); - } else { - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - set_fail(&fail, table, "No data"); - if (!table->maxlen) - set_fail(&fail, table, "No maxlen"); - } -#ifdef CONFIG_PROC_SYSCTL - if (!table->proc_handler) - set_fail(&fail, table, "No proc_handler"); -#endif - sysctl_check_leaf(namespaces, table, &fail); - } - if (table->mode > 0777) - set_fail(&fail, table, "bogus .mode"); - if (fail) { - set_fail(&fail, table, NULL); - error = -EINVAL; - } if (table->child) - error |= sysctl_check_table(namespaces, table->child); + err = sysctl_err(path, table, "Not a file"); + + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + err = sysctl_err(path, table, "No data"); + if (!table->maxlen) + err = sysctl_err(path, table, "No maxlen"); + } + if (!table->proc_handler) + err = sysctl_err(path, table, "No proc_handler"); + + if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) + err = sysctl_err(path, table, "bogus .mode 0%o", + table->mode); } - return error; + return err; } -#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** * __register_sysctl_table - register a leaf sysctl table @@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table( header->root = root; sysctl_set_parent(NULL, header->ctl_table); header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif + if (sysctl_check_table(path, table)) + goto fail; spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; @@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_root, root_list); set = lookup_header_set(root, namespaces); } + if (sysctl_check_dups(namespaces, header, path, table)) + goto fail_locked; header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; +fail_locked: + spin_unlock(&sysctl_lock); +fail: + kfree(header); + dump_stack(); + return NULL; } static char *append_path(const char *path, char *pos, const char *name) -- cgit v1.2.3 From 8d6ecfcc014332fd2fe933f64194160f0e3a6696 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 11:55:30 -0800 Subject: sysctl: Remove the now unused ctl_table parent field. While useful at one time for selinux and the sysctl sanity checks those users no longer use the parent field and we can safely remove it. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a492ff60e07..e573f9b4f22 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -218,16 +218,6 @@ static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int return test_perm(mode, op); } -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - - static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -947,10 +937,10 @@ struct ctl_table_header *__register_sysctl_table( header->used = 0; header->unregistering = NULL; header->root = root; - sysctl_set_parent(NULL, header->ctl_table); header->count = 1; if (sysctl_check_table(path, table)) goto fail; + spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; -- cgit v1.2.3 From 3cc3e04636d603778d921854b84ae7bd34a349a2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 06:57:47 -0800 Subject: sysctl: A more obvious version of grab_header. Instead of relying on sysct_head_next(NULL) to magically return the right header for the root directory instead explicitly transform NULL into the root directories header. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e573f9b4f22..15444850b3e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -267,10 +267,10 @@ static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) static struct ctl_table_header *grab_header(struct inode *inode) { - if (PROC_I(inode)->sysctl) - return sysctl_head_grab(PROC_I(inode)->sysctl); - else - return sysctl_head_next(NULL); + struct ctl_table_header *head = PROC_I(inode)->sysctl; + if (!head) + head = &root_table_header; + return sysctl_head_grab(head); } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, -- cgit v1.2.3 From 938aaa4f9249aa1519fd0db07fc72125de2df338 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Initial support for auto-unregistering sysctl tables. Add nreg to ctl_table_header. When nreg drops to 0 the ctl_table_header will be unregistered. Factor out drop_sysctl_table from unregister_sysctl_table, and add the logic for decrementing nreg. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 15444850b3e..13faa48c467 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -29,8 +29,9 @@ static struct ctl_table root_table[1]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }; @@ -938,6 +939,7 @@ struct ctl_table_header *__register_sysctl_table( header->unregistering = NULL; header->root = root; header->count = 1; + header->nreg = 1; if (sysctl_check_table(path, table)) goto fail; @@ -1192,6 +1194,20 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void drop_sysctl_table(struct ctl_table_header *header) +{ + if (--header->nreg) + return; + + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1224,13 +1240,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) } spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } - if (!--header->count) - kfree_rcu(header, rcu); + drop_sysctl_table(header); spin_unlock(&sysctl_lock); } EXPORT_SYMBOL(unregister_sysctl_table); -- cgit v1.2.3 From e0d045290a8454ecd7f63c78c10d412f35d6ef94 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 22:36:41 -0800 Subject: sysctl: Factor out init_header from __register_sysctl_paths Factor out a routing to initialize the sysctl_table_header. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 13faa48c467..49799259b0f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -42,6 +42,21 @@ static struct ctl_table_root sysctl_table_root = { static DEFINE_SPINLOCK(sysctl_lock); +static void init_header(struct ctl_table_header *head, + struct ctl_table_root *root, struct ctl_table_set *set, + struct ctl_table *table) +{ + head->ctl_table_arg = table; + INIT_LIST_HEAD(&head->ctl_entry); + head->used = 0; + head->count = 1; + head->nreg = 1; + head->unregistering = NULL; + head->root = root; + head->set = set; + head->parent = NULL; +} + /* called under sysctl_lock */ static int use_table(struct ctl_table_header *p) { @@ -932,14 +947,8 @@ struct ctl_table_header *__register_sysctl_table( new_name += namelen + 1; } *prevp = table; - header->ctl_table_arg = table; - - INIT_LIST_HEAD(&header->ctl_entry); - header->used = 0; - header->unregistering = NULL; - header->root = root; - header->count = 1; - header->nreg = 1; + + init_header(header, root, NULL, table); if (sysctl_check_table(path, table)) goto fail; -- cgit v1.2.3 From 8425d6aaf0704b98480131ed339c208ffce12e44 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:35:01 -0800 Subject: sysctl: Factor out insert_header and erase_header Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 49799259b0f..7e96a2681b6 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -57,6 +57,17 @@ static void init_header(struct ctl_table_header *head, head->parent = NULL; } +static void erase_header(struct ctl_table_header *head) +{ + list_del_init(&head->ctl_entry); +} + +static void insert_header(struct ctl_table_header *header) +{ + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); +} + /* called under sysctl_lock */ static int use_table(struct ctl_table_header *p) { @@ -96,7 +107,7 @@ static void start_unregistering(struct ctl_table_header *p) * do not remove from the list until nobody holds it; walking the * list in do_sysctl() relies on that. */ - list_del_init(&p->ctl_entry); + erase_header(p); } static void sysctl_head_get(struct ctl_table_header *head) @@ -974,8 +985,7 @@ struct ctl_table_header *__register_sysctl_table( } if (sysctl_check_dups(namespaces, header, path, table)) goto fail_locked; - header->parent->count++; - list_add_tail(&header->ctl_entry, &header->set->list); + insert_header(header); spin_unlock(&sysctl_lock); return header; -- cgit v1.2.3 From a194558e8698621a9ce7f2c6a720123e644af131 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 17:51:48 -0800 Subject: sysctl: Normalize the root_table data structure. Every other directory has a .child member and we look at the .child for our entries. Do the same for the root_table. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7e96a2681b6..88d1b06cc5c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -25,7 +25,14 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) wake_up_interruptible(&poll->wait); } -static struct ctl_table root_table[1]; +static struct ctl_table root_table[] = { + { + .procname = "", + .mode = S_IRUGO|S_IXUGO, + .child = &root_table[1], + }, + { } +}; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, @@ -319,7 +326,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, goto out; } - table = table ? table->child : head->ctl_table; + table = table ? table->child : &head->ctl_table[1]; p = find_in_table(table, name); if (!p) { @@ -510,7 +517,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) goto out; } - table = table ? table->child : head->ctl_table; + table = table ? table->child : &head->ctl_table[1]; ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -966,7 +973,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; - header->attached_to = root_table; + header->attached_to = &root_table[1]; header->parent = &root_table_header; set = header->set; root = header->root; -- cgit v1.2.3 From 076c3eed2c31773200b082568957fd8852ae93d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 21:42:02 -0800 Subject: sysctl: Rewrite proc_sys_lookup introducing find_entry and lookup_entry. Replace the helpers that proc_sys_lookup uses with helpers that work in terms of an entire sysctl directory. This is worse for sysctl_lock hold times but it is much better for code clarity and the code cleanups to come. find_in_table is no longer needed so it is removed. find_entry a general helper to find entries in a directory is added. lookup_entry is a simple wrapper around find_entry that takes the sysctl_lock increases the use count if an entry is found and drops the sysctl_lock. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 102 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 26 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 88d1b06cc5c..3b63f298ce2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -49,6 +49,55 @@ static struct ctl_table_root sysctl_table_root = { static DEFINE_SPINLOCK(sysctl_lock); +static int namecmp(const char *name1, int len1, const char *name2, int len2) +{ + int minlen; + int cmp; + + minlen = len1; + if (minlen > len2) + minlen = len2; + + cmp = memcmp(name1, name2, minlen); + if (cmp == 0) + cmp = len1 - len2; + return cmp; +} + +static struct ctl_table *find_entry(struct ctl_table_header **phead, + struct ctl_table_set *set, + struct ctl_table_header *dir_head, struct ctl_table *dir, + const char *name, int namelen) +{ + struct ctl_table_header *head; + struct ctl_table *entry; + + if (dir_head->set == set) { + for (entry = dir; entry->procname; entry++) { + const char *procname = entry->procname; + if (namecmp(procname, strlen(procname), name, namelen) == 0) { + *phead = dir_head; + return entry; + } + } + } + + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) + continue; + if (head->attached_to != dir) + continue; + for (entry = head->attached_by; entry->procname; entry++) { + const char *procname = entry->procname; + if (namecmp(procname, strlen(procname), name, namelen) == 0) { + *phead = head; + return entry; + } + } + } + return NULL; +} + static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_table *table) @@ -168,6 +217,32 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) return &set->list; } +static struct ctl_table *lookup_entry(struct ctl_table_header **phead, + struct ctl_table_header *dir_head, + struct ctl_table *dir, + const char *name, int namelen) +{ + struct ctl_table_header *head; + struct ctl_table *entry; + struct ctl_table_root *root; + struct ctl_table_set *set; + + spin_lock(&sysctl_lock); + root = &sysctl_table_root; + do { + set = lookup_header_set(root, current->nsproxy); + entry = find_entry(&head, set, dir_head, dir, name, namelen); + if (entry && use_table(head)) + *phead = head; + else + entry = NULL; + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + } while (!entry && root != &sysctl_table_root); + spin_unlock(&sysctl_lock); + return entry; +} + static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev) { @@ -284,21 +359,6 @@ out: return inode; } -static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) -{ - for ( ; p->procname; p++) { - if (strlen(p->procname) != name->len) - continue; - - if (memcmp(p->procname, name->name, name->len) != 0) - continue; - - /* I have a match */ - return p; - } - return NULL; -} - static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; @@ -328,17 +388,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, table = table ? table->child : &head->ctl_table[1]; - p = find_in_table(table, name); - if (!p) { - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { - if (h->attached_to != table) - continue; - p = find_in_table(h->attached_by, name); - if (p) - break; - } - } - + p = lookup_entry(&h, head, table, name->name, name->len); if (!p) goto out; -- cgit v1.2.3 From 6a75ce167c53b41f15088d3c2c7e51c89dc8798a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 18 Jan 2012 03:15:51 -0800 Subject: sysctl: Rewrite proc_sys_readdir in terms of first_entry and next_entry Replace sysctl_head_next with first_entry and next_entry. These new iterators operate at the level of sysctl table entries and filter out any sysctl tables that should not be shown. Utilizing two specialized functions instead of a single function removes conditionals for handling awkward special cases that only come up at the beginning of iteration, making the iterators easier to read and understand. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 98 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 36 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3b63f298ce2..d9c3ae6afe4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -243,31 +243,25 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev) +static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, + struct ctl_table_root *root, struct list_head *tmp) { - struct ctl_table_root *root; + struct nsproxy *namespaces = current->nsproxy; struct list_head *header_list; struct ctl_table_header *head; - struct list_head *tmp; - spin_lock(&sysctl_lock); - if (prev) { - head = prev; - tmp = &prev->ctl_entry; - unuse_table(prev); - goto next; - } - tmp = &root_table_header.ctl_entry; + goto next; for (;;) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); + root = head->root; - if (!use_table(head)) + if (head->attached_to != dir || + !head->attached_by->procname || + !use_table(head)) goto next; - spin_unlock(&sysctl_lock); + return head; next: - root = head->root; tmp = tmp->next; header_list = lookup_header_list(root, namespaces); if (tmp != header_list) @@ -283,13 +277,53 @@ static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, tmp = header_list->next; } out: - spin_unlock(&sysctl_lock); return NULL; } -static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +static void first_entry( + struct ctl_table_header *dir_head, struct ctl_table *dir, + struct ctl_table_header **phead, struct ctl_table **pentry) { - return __sysctl_head_next(current->nsproxy, prev); + struct ctl_table_header *head = dir_head; + struct ctl_table *entry = dir; + + spin_lock(&sysctl_lock); + if (entry->procname) { + use_table(head); + } else { + head = next_usable_entry(dir, &sysctl_table_root, + &sysctl_table_root.default_set.list); + if (head) + entry = head->attached_by; + } + spin_unlock(&sysctl_lock); + *phead = head; + *pentry = entry; +} + +static void next_entry(struct ctl_table *dir, + struct ctl_table_header **phead, struct ctl_table **pentry) +{ + struct ctl_table_header *head = *phead; + struct ctl_table *entry = *pentry; + + entry++; + if (!entry->procname) { + struct ctl_table_root *root = head->root; + struct list_head *tmp = &head->ctl_entry; + if (head->attached_to != dir) { + root = &sysctl_table_root; + tmp = &sysctl_table_root.default_set.list; + } + spin_lock(&sysctl_lock); + unuse_table(head); + head = next_usable_entry(dir, root, tmp); + spin_unlock(&sysctl_lock); + if (head) + entry = head->attached_by; + } + *phead = head; + *pentry = entry; } void register_sysctl_root(struct ctl_table_root *root) @@ -533,20 +567,17 @@ static int scan(struct ctl_table_header *head, ctl_table *table, unsigned long *pos, struct file *file, void *dirent, filldir_t filldir) { + int res; - for (; table->procname; table++, (*pos)++) { - int res; + if ((*pos)++ < file->f_pos) + return 0; - if (*pos < file->f_pos) - continue; + res = proc_sys_fill_cache(file, dirent, filldir, head, table); - res = proc_sys_fill_cache(file, dirent, filldir, head, table); - if (res) - return res; + if (res == 0) + file->f_pos = *pos; - file->f_pos = *pos + 1; - } - return 0; + return res; } static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) @@ -556,6 +587,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table_header *h = NULL; + struct ctl_table *entry; unsigned long pos; int ret = -EINVAL; @@ -585,14 +617,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - ret = scan(head, table, &pos, filp, dirent, filldir); - if (ret) - goto out; - - for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { - if (h->attached_to != table) - continue; - ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); + for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { + ret = scan(h, entry, &pos, filp, dirent, filldir); if (ret) { sysctl_head_finish(h); break; -- cgit v1.2.3 From 9eb47c26f09e27506d343ef52e634b2a50ee21ef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:26:00 -0800 Subject: sysctl: Add a root pointer to ctl_table_set Add a ctl_table_root pointer to ctl_table set so it is easy to go from a ctl_table_set to a ctl_table_root. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d9c3ae6afe4..65c13dddcea 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -45,6 +45,7 @@ static struct ctl_table_header root_table_header = { static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); @@ -1348,9 +1349,11 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); + p->root = root; p->is_seen = is_seen; } -- cgit v1.2.3 From 7ec66d06362da7684a4948c4c2bf1f8546425df4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Dec 2011 08:24:29 -0800 Subject: sysctl: Stop requiring explicit management of sysctl directories Simplify the code and the sysctl semantics by autogenerating sysctl directories when a sysctl table is registered that needs the directories and autodeleting the directories when there are no more sysctl tables registered that need them. Autogenerating directories keeps sysctl tables from depending on each other, removing all of the arcane register/unregister ordering constraints and makes it impossible to get the order wrong when reigsering and unregistering sysctl tables. Autogenerating directories yields one unique entity that dentries can point to, retaining the current effective use of the dcache. Add struct ctl_dir as the type of these new autogenerated directories. The attached_by and attached_to fields in ctl_table_header are removed as they are no longer needed. The child field in ctl_table is no longer needed by the core of the sysctl code. ctl_table.child can be removed once all of the existing users have been updated. Benchmark before: make-dummies 0 999 -> 0.7s rmmod dummy -> 0.07s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 342 +++++++++++++++++++++----------------------------- 1 file changed, 143 insertions(+), 199 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 65c13dddcea..3c0767d5a55 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -28,28 +28,31 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) static struct ctl_table root_table[] = { { .procname = "", - .mode = S_IRUGO|S_IXUGO, - .child = &root_table[1], + .mode = S_IFDIR|S_IRUGO|S_IXUGO, }, { } }; static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - {{.count = 1, - .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, +static struct ctl_dir sysctl_root_dir = { + .header = { + {{.count = 1, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, + }, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); +static void drop_sysctl_table(struct ctl_table_header *header); + static int namecmp(const char *name1, int len1, const char *name2, int len2) { int minlen; @@ -66,29 +69,18 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, - struct ctl_table_header *dir_head, struct ctl_table *dir, + struct ctl_table_set *set, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - if (dir_head->set == set) { - for (entry = dir; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = dir_head; - return entry; - } - } - } - list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir) + if (head->parent != dir) continue; - for (entry = head->attached_by; entry->procname; entry++) { + for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { *phead = head; @@ -103,6 +95,7 @@ static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_table *table) { + head->ctl_table = table; head->ctl_table_arg = table; INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; @@ -119,9 +112,10 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_table_header *header) +static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { - header->parent->count++; + header->parent = dir; + header->parent->header.nreg++; list_add_tail(&header->ctl_entry, &header->set->list); } @@ -219,8 +213,7 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) } static struct ctl_table *lookup_entry(struct ctl_table_header **phead, - struct ctl_table_header *dir_head, - struct ctl_table *dir, + struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; @@ -232,7 +225,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, root = &sysctl_table_root; do { set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir_head, dir, name, namelen); + entry = find_entry(&head, set, dir, name, namelen); if (entry && use_table(head)) *phead = head; else @@ -244,7 +237,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, +static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct ctl_table_root *root, struct list_head *tmp) { struct nsproxy *namespaces = current->nsproxy; @@ -256,8 +249,8 @@ static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, head = list_entry(tmp, struct ctl_table_header, ctl_entry); root = head->root; - if (head->attached_to != dir || - !head->attached_by->procname || + if (head->parent != dir || + !head->ctl_table->procname || !use_table(head)) goto next; @@ -281,47 +274,35 @@ out: return NULL; } -static void first_entry( - struct ctl_table_header *dir_head, struct ctl_table *dir, +static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head = dir_head; - struct ctl_table *entry = dir; + struct ctl_table_header *head; + struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - if (entry->procname) { - use_table(head); - } else { - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); - if (head) - entry = head->attached_by; - } + head = next_usable_entry(dir, &sysctl_table_root, + &sysctl_table_root.default_set.list); spin_unlock(&sysctl_lock); + if (head) + entry = head->ctl_table; *phead = head; *pentry = entry; } -static void next_entry(struct ctl_table *dir, - struct ctl_table_header **phead, struct ctl_table **pentry) +static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; entry++; if (!entry->procname) { - struct ctl_table_root *root = head->root; - struct list_head *tmp = &head->ctl_entry; - if (head->attached_to != dir) { - root = &sysctl_table_root; - tmp = &sysctl_table_root.default_set.list; - } spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(dir, root, tmp); + head = next_usable_entry(head->parent, head->root, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) - entry = head->attached_by; + entry = head->ctl_table; } *phead = head; *pentry = entry; @@ -381,7 +362,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mode = table->mode; - if (!table->child) { + if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; inode->i_fop = &proc_sys_file_operations; @@ -398,7 +379,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &root_table_header; + head = &sysctl_root_dir.header; return sysctl_head_grab(head); } @@ -406,24 +387,19 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct ctl_table_header *head = grab_header(dir); - struct ctl_table *table = PROC_I(dir)->sysctl_entry; struct ctl_table_header *h = NULL; struct qstr *name = &dentry->d_name; struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); + struct ctl_dir *ctl_dir; if (IS_ERR(head)) return ERR_CAST(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } + ctl_dir = container_of(head, struct ctl_dir, header); - table = table ? table->child : &head->ctl_table[1]; - - p = lookup_entry(&h, head, table, name->name, name->len); + p = lookup_entry(&h, ctl_dir, name->name, name->len); if (!p) goto out; @@ -586,21 +562,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table_header *h = NULL; struct ctl_table *entry; + struct ctl_dir *ctl_dir; unsigned long pos; int ret = -EINVAL; if (IS_ERR(head)) return PTR_ERR(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } - - table = table ? table->child : &head->ctl_table[1]; + ctl_dir = container_of(head, struct ctl_dir, header); ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -618,7 +589,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { + for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { ret = scan(h, entry, &pos, filp, dirent, filldir); if (ret) { sysctl_head_finish(h); @@ -779,52 +750,86 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) +static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, + const char *name, int namelen) { - struct ctl_table *p; - const char *s = branch->procname; + struct ctl_table_header *head; + struct ctl_table *entry; - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; + entry = find_entry(&head, set, dir, name, namelen); + if (!entry) + return ERR_PTR(-ENOENT); + if (S_ISDIR(entry->mode)) + return container_of(head, struct ctl_dir, header); + return ERR_PTR(-ENOTDIR); +} + +static struct ctl_dir *new_dir(struct ctl_table_set *set, + const char *name, int namelen) +{ + struct ctl_table *table; + struct ctl_dir *new; + char *new_name; - /* ... and nothing else */ - if (branch[1].procname) + new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + + namelen + 1, GFP_KERNEL); + if (!new) return NULL; - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; + table = (struct ctl_table *)(new + 1); + new_name = (char *)(table + 2); + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + table[0].procname = new_name; + table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; + init_header(&new->header, set->root, set, table); + + return new; } -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +static struct ctl_dir *get_subdir(struct ctl_table_set *set, + struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } + struct ctl_dir *subdir, *new = NULL; - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; + spin_lock(&sysctl_lock); + subdir = find_subdir(dir->header.set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + spin_unlock(&sysctl_lock); + new = new_dir(set, name, namelen); + spin_lock(&sysctl_lock); + subdir = ERR_PTR(-ENOMEM); + if (!new) + goto failed; + + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + insert_header(dir, &new->header); + subdir = new; +found: + subdir->header.nreg++; +failed: + if (unlikely(IS_ERR(subdir))) { + printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", + namelen, namelen, name, PTR_ERR(subdir)); } + drop_sysctl_table(&dir->header); + if (new) + drop_sysctl_table(&new->header); + spin_unlock(&sysctl_lock); + return subdir; } static int sysctl_check_table_dups(const char *path, struct ctl_table *old, @@ -846,24 +851,14 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, } static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_table_header *header, + struct ctl_dir *dir, const char *path, struct ctl_table *table) { struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table_header *dir_head, *head; - struct ctl_table *dir_table; + struct ctl_table_header *head; int error = 0; - /* No dups if we are the only member of our directory */ - if (header->attached_by != table) - return 0; - - dir_head = header->parent; - dir_table = header->attached_to; - - error = sysctl_check_table_dups(path, dir_table, table); - root = &sysctl_table_root; do { set = lookup_header_set(root, namespaces); @@ -871,9 +866,9 @@ static int sysctl_check_dups(struct nsproxy *namespaces, list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir_table) + if (head->parent != dir) continue; - error = sysctl_check_table_dups(path, head->attached_by, + error = sysctl_check_table_dups(path, head->ctl_table, table); } root = list_entry(root->root_list.next, @@ -977,47 +972,25 @@ struct ctl_table_header *__register_sysctl_table( const char *path, struct ctl_table *table) { struct ctl_table_header *header; - struct ctl_table *new, **prevp; const char *name, *nextname; - unsigned int npath = 0; struct ctl_table_set *set; - size_t path_bytes = 0; - char *new_name; - - /* Count the path components */ - for (name = path; name; name = nextname) { - int namelen; - nextname = strchr(name, '/'); - if (nextname) { - namelen = nextname - name; - nextname++; - } else { - namelen = strlen(name); - } - if (namelen == 0) - continue; - path_bytes += namelen + 1; - npath++; - } + struct ctl_dir *dir; - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - new = (struct ctl_table *) (header + 1); - new_name = (char *)(new + (2 * npath)); + init_header(header, root, NULL, table); + if (sysctl_check_table(path, table)) + goto fail; + + spin_lock(&sysctl_lock); + header->set = set = lookup_header_set(root, namespaces); + dir = &sysctl_root_dir; + dir->header.nreg++; + spin_unlock(&sysctl_lock); - /* Now connect the dots */ - prevp = &header->ctl_table; + /* Find the directory for the ctl_table */ for (name = path; name; name = nextname) { int namelen; nextname = strchr(name, '/'); @@ -1029,51 +1002,21 @@ struct ctl_table_header *__register_sysctl_table( } if (namelen == 0) continue; - memcpy(new_name, name, namelen); - new_name[namelen] = '\0'; - - new->procname = new_name; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - new += 2; - new_name += namelen + 1; + dir = get_subdir(set, dir, name, namelen); + if (IS_ERR(dir)) + goto fail; } - *prevp = table; - - init_header(header, root, NULL, table); - if (sysctl_check_table(path, table)) - goto fail; - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = &root_table[1]; - header->parent = &root_table_header; - set = header->set; - root = header->root; - for (;;) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - if (root == &sysctl_table_root) - break; - root = list_entry(root->root_list.prev, - struct ctl_table_root, root_list); - set = lookup_header_set(root, namespaces); - } - if (sysctl_check_dups(namespaces, header, path, table)) - goto fail_locked; - insert_header(header); + if (sysctl_check_dups(namespaces, dir, path, table)) + goto fail_put_dir_locked; + insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; -fail_locked: +fail_put_dir_locked: + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: kfree(header); @@ -1299,16 +1242,17 @@ EXPORT_SYMBOL(register_sysctl_table); static void drop_sysctl_table(struct ctl_table_header *header) { + struct ctl_dir *parent = header->parent; + if (--header->nreg) return; start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } if (!--header->count) kfree_rcu(header, rcu); + + if (parent) + drop_sysctl_table(&parent->header); } /** -- cgit v1.2.3 From 6980128fe1b834c92a85e556ca8198030f0d8d01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 20:09:45 -0800 Subject: sysctl: Add sysctl_print_dir and use it in get_subdir When there are errors it is very nice to know the full sysctl path. Add a simple function that computes the sysctl path and prints it out. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3c0767d5a55..a78556514a8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -53,6 +53,13 @@ static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); +static void sysctl_print_dir(struct ctl_dir *dir) +{ + if (dir->header.parent) + sysctl_print_dir(dir->header.parent); + printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); +} + static int namecmp(const char *name1, int len1, const char *name2, int len2) { int minlen; @@ -822,7 +829,9 @@ found: subdir->header.nreg++; failed: if (unlikely(IS_ERR(subdir))) { - printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", + printk(KERN_ERR "sysctl could not get directory: "); + sysctl_print_dir(dir); + printk(KERN_CONT "/%*.*s %ld\n", namelen, namelen, name, PTR_ERR(subdir)); } drop_sysctl_table(&dir->header); -- cgit v1.2.3 From 0e47c99d7fe25e0f3907d9f3401079169d904891 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:24:30 -0800 Subject: sysctl: Replace root_list with links between sysctl_table_sets. Piecing together directories by looking first in one directory tree, than in another directory tree and finally in a third directory tree makes it hard to verify that some directory entries are not multiply defined and makes it hard to create efficient implementations the sysctl filesystem. Replace the sysctl wide list of roots with autogenerated links from the core sysctl directory tree to the other sysctl directory trees. This simplifies sysctl directory reading and lookups as now only entries in a single sysctl directory tree need to be considered. Benchmark before: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s The slowdown is caused by the lookups used in insert_headers and put_links to see if we need to add links or remove links. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 397 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 295 insertions(+), 102 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a78556514a8..ec54a57c469 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -32,26 +32,26 @@ static struct ctl_table root_table[] = { }, { } }; -static struct ctl_table_root sysctl_table_root; -static struct ctl_dir sysctl_root_dir = { - .header = { +static struct ctl_table_root sysctl_table_root = { + .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }, }; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), - .default_set.root = &sysctl_table_root, -}; static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces); +static int insert_links(struct ctl_table_header *head); +static void put_links(struct ctl_table_header *header); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -76,9 +76,9 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) + struct ctl_dir *dir, const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; @@ -119,11 +119,21 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) +static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + int err; + + dir->header.nreg++; header->parent = dir; - header->parent->header.nreg++; + err = insert_links(header); + if (err) + goto fail_links; list_add_tail(&header->ctl_entry, &header->set->list); + return 0; +fail_links: + header->parent = NULL; + drop_sysctl_table(&dir->header); + return err; } /* called under sysctl_lock */ @@ -212,72 +222,39 @@ lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) return set; } -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - static struct ctl_table *lookup_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - struct ctl_table_root *root; - struct ctl_table_set *set; spin_lock(&sysctl_lock); - root = &sysctl_table_root; - do { - set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir, name, namelen); - if (entry && use_table(head)) - *phead = head; - else - entry = NULL; - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (!entry && root != &sysctl_table_root); + entry = find_entry(&head, dir, name, namelen); + if (entry && use_table(head)) + *phead = head; + else + entry = NULL; spin_unlock(&sysctl_lock); return entry; } static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct ctl_table_root *root, struct list_head *tmp) + struct list_head *tmp) { - struct nsproxy *namespaces = current->nsproxy; - struct list_head *header_list; + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - goto next; - for (;;) { + for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - root = head->root; if (head->parent != dir || !head->ctl_table->procname || !use_table(head)) - goto next; - - return head; - next: - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) continue; - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; + return head; } -out: return NULL; } @@ -288,8 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); + head = next_usable_entry(dir, &dir->header.set->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -306,7 +282,7 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr if (!entry->procname) { spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(head->parent, head->root, &head->ctl_entry); + head = next_usable_entry(head->parent, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -317,9 +293,6 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr void register_sysctl_root(struct ctl_table_root *root) { - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); } /* @@ -386,7 +359,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &sysctl_root_dir.header; + head = &sysctl_table_root.default_set.dir.header; return sysctl_head_grab(head); } @@ -400,6 +373,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); struct ctl_dir *ctl_dir; + int ret; if (IS_ERR(head)) return ERR_CAST(head); @@ -410,6 +384,11 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!p) goto out; + ret = sysctl_follow_link(&h, &p, current->nsproxy); + err = ERR_PTR(ret); + if (ret) + goto out; + err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); if (h) @@ -547,6 +526,25 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); } +static int proc_sys_link_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) +{ + int err, ret = 0; + head = sysctl_head_grab(head); + + /* It is not an error if we can not follow the link ignore it */ + err = sysctl_follow_link(&head, &table, current->nsproxy); + if (err) + goto out; + + ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); +out: + sysctl_head_finish(head); + return ret; +} + static int scan(struct ctl_table_header *head, ctl_table *table, unsigned long *pos, struct file *file, void *dirent, filldir_t filldir) @@ -556,7 +554,10 @@ static int scan(struct ctl_table_header *head, ctl_table *table, if ((*pos)++ < file->f_pos) return 0; - res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (unlikely(S_ISLNK(table->mode))) + res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); + else + res = proc_sys_fill_cache(file, dirent, filldir, head, table); if (res == 0) file->f_pos = *pos; @@ -757,13 +758,13 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) +static struct ctl_dir *find_subdir(struct ctl_dir *dir, + const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - entry = find_entry(&head, set, dir, name, namelen); + entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); if (S_ISDIR(entry->mode)) @@ -772,7 +773,7 @@ static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *di } static struct ctl_dir *new_dir(struct ctl_table_set *set, - const char *name, int namelen) + const char *name, int namelen) { struct ctl_table *table; struct ctl_dir *new; @@ -789,22 +790,19 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name[namelen] = '\0'; table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->root, set, table); + init_header(&new->header, set->dir.header.root, set, table); return new; } -static struct ctl_dir *get_subdir(struct ctl_table_set *set, - struct ctl_dir *dir, const char *name, int namelen) +static struct ctl_dir *get_subdir(struct ctl_dir *dir, + const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; spin_lock(&sysctl_lock); - subdir = find_subdir(dir->header.set, dir, name, namelen); - if (!IS_ERR(subdir)) - goto found; - if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) @@ -817,13 +815,14 @@ static struct ctl_dir *get_subdir(struct ctl_table_set *set, if (!new) goto failed; - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; - insert_header(dir, &new->header); + if (insert_header(dir, &new->header)) + goto failed; subdir = new; found: subdir->header.nreg++; @@ -841,6 +840,57 @@ failed: return subdir; } +static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) +{ + struct ctl_dir *parent; + const char *procname; + if (!dir->header.parent) + return &set->dir; + parent = xlate_dir(set, dir->header.parent); + if (IS_ERR(parent)) + return parent; + procname = dir->header.ctl_table[0].procname; + return find_subdir(parent, procname, strlen(procname)); +} + +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces) +{ + struct ctl_table_header *head; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table *entry; + struct ctl_dir *dir; + int ret; + + /* Get out quickly if not a link */ + if (!S_ISLNK((*pentry)->mode)) + return 0; + + ret = 0; + spin_lock(&sysctl_lock); + root = (*pentry)->data; + set = lookup_header_set(root, namespaces); + dir = xlate_dir(set, (*phead)->parent); + if (IS_ERR(dir)) + ret = PTR_ERR(dir); + else { + const char *procname = (*pentry)->procname; + head = NULL; + entry = find_entry(&head, dir, procname, strlen(procname)); + ret = -ENOENT; + if (entry && use_table(head)) { + unuse_table(*phead); + *phead = head; + *pentry = entry; + ret = 0; + } + } + + spin_unlock(&sysctl_lock); + return ret; +} + static int sysctl_check_table_dups(const char *path, struct ctl_table *old, struct ctl_table *table) { @@ -859,30 +909,21 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, return error; } -static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_dir *dir, +static int sysctl_check_dups(struct ctl_dir *dir, const char *path, struct ctl_table *table) { - struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - root = &sysctl_table_root; - do { - set = lookup_header_set(root, namespaces); - - list_for_each_entry(head, &set->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(path, head->ctl_table, - table); - } - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (root != &sysctl_table_root); + set = dir->header.set; + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) + continue; + if (head->parent != dir) + continue; + error = sysctl_check_table_dups(path, head->ctl_table, table); + } return error; } @@ -932,6 +973,115 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) return err; } +static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, + struct ctl_table_root *link_root) +{ + struct ctl_table *link_table, *entry, *link; + struct ctl_table_header *links; + char *link_name; + int nr_entries, name_bytes; + + name_bytes = 0; + nr_entries = 0; + for (entry = table; entry->procname; entry++) { + nr_entries++; + name_bytes += strlen(entry->procname) + 1; + } + + links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_table)*(nr_entries + 1) + + name_bytes, + GFP_KERNEL); + + if (!links) + return NULL; + + link_table = (struct ctl_table *)(links + 1); + link_name = (char *)&link_table[nr_entries + 1]; + + for (link = link_table, entry = table; entry->procname; link++, entry++) { + int len = strlen(entry->procname) + 1; + memcpy(link_name, entry->procname, len); + link->procname = link_name; + link->mode = S_IFLNK|S_IRWXUGO; + link->data = link_root; + link_name += len; + } + init_header(links, dir->header.root, dir->header.set, link_table); + links->nreg = nr_entries; + + return links; +} + +static bool get_links(struct ctl_dir *dir, + struct ctl_table *table, struct ctl_table_root *link_root) +{ + struct ctl_table_header *head; + struct ctl_table *entry, *link; + + /* Are there links available for every entry in table? */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + if (!link) + return false; + if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) + continue; + if (S_ISLNK(link->mode) && (link->data == link_root)) + continue; + return false; + } + + /* The checks passed. Increase the registration count on the links */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + head->nreg++; + } + return true; +} + +static int insert_links(struct ctl_table_header *head) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_dir *core_parent = NULL; + struct ctl_table_header *links; + int err; + + if (head->set == root_set) + return 0; + + core_parent = xlate_dir(root_set, head->parent); + if (IS_ERR(core_parent)) + return 0; + + if (get_links(core_parent, head->ctl_table, head->root)) + return 0; + + core_parent->header.nreg++; + spin_unlock(&sysctl_lock); + + links = new_links(core_parent, head->ctl_table, head->root); + + spin_lock(&sysctl_lock); + err = -ENOMEM; + if (!links) + goto out; + + err = 0; + if (get_links(core_parent, head->ctl_table, head->root)) { + kfree(links); + goto out; + } + + err = insert_header(core_parent, links); + if (err) + kfree(links); +out: + drop_sysctl_table(&core_parent->header); + return err; +} + /** * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on @@ -980,6 +1130,7 @@ struct ctl_table_header *__register_sysctl_table( struct nsproxy *namespaces, const char *path, struct ctl_table *table) { + struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; struct ctl_table_set *set; @@ -995,7 +1146,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); header->set = set = lookup_header_set(root, namespaces); - dir = &sysctl_root_dir; + dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1012,22 +1163,28 @@ struct ctl_table_header *__register_sysctl_table( if (namelen == 0) continue; - dir = get_subdir(set, dir, name, namelen); + dir = get_subdir(dir, name, namelen); if (IS_ERR(dir)) goto fail; } + spin_lock(&sysctl_lock); - if (sysctl_check_dups(namespaces, dir, path, table)) + if (sysctl_check_dups(dir, path, table)) + goto fail_put_dir_locked; + + if (insert_header(dir, header)) goto fail_put_dir_locked; - insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; + fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: + kfree(links); kfree(header); dump_stack(); return NULL; @@ -1249,6 +1406,40 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void put_links(struct ctl_table_header *header) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_table_root *root = header->root; + struct ctl_dir *parent = header->parent; + struct ctl_dir *core_parent; + struct ctl_table *entry; + + if (header->set == root_set) + return; + + core_parent = xlate_dir(root_set, parent); + if (IS_ERR(core_parent)) + return; + + for (entry = header->ctl_table; entry->procname; entry++) { + struct ctl_table_header *link_head; + struct ctl_table *link; + const char *name = entry->procname; + + link = find_entry(&link_head, core_parent, name, strlen(name)); + if (link && + ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || + (S_ISLNK(link->mode) && (link->data == root)))) { + drop_sysctl_table(link_head); + } + else { + printk(KERN_ERR "sysctl link missing during unregister: "); + sysctl_print_dir(parent); + printk(KERN_CONT "/%s\n", name); + } + } +} + static void drop_sysctl_table(struct ctl_table_header *header) { struct ctl_dir *parent = header->parent; @@ -1256,6 +1447,7 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); @@ -1301,13 +1493,14 @@ void unregister_sysctl_table(struct ctl_table_header * header) } EXPORT_SYMBOL(unregister_sysctl_table); -void setup_sysctl_set(struct ctl_table_set *p, +void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { - INIT_LIST_HEAD(&p->list); - p->root = root; - p->is_seen = is_seen; + memset(set, sizeof(*set), 0); + INIT_LIST_HEAD(&set->list); + set->is_seen = is_seen; + init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) -- cgit v1.2.3 From 60a47a2e823cbe6b609346bffff61a00c0c76470 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Jan 2012 00:02:37 -0800 Subject: sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy An nsproxy argument here has always been awkard and now the nsproxy argument is completely unnecessary so remove it, replacing it with the set we want the registered tables to show up in. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ec54a57c469..e0d3e7e59cb 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1084,8 +1084,7 @@ out: /** * __register_sysctl_table - register a leaf sysctl table - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1126,26 +1125,24 @@ out: * to the table header on success. */ struct ctl_table_header *__register_sysctl_table( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const char *path, struct ctl_table *table) { + struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; - struct ctl_table_set *set; struct ctl_dir *dir; header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - init_header(header, root, NULL, table); + init_header(header, root, set, table); if (sysctl_check_table(path, table)) goto fail; spin_lock(&sysctl_lock); - header->set = set = lookup_header_set(root, namespaces); dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1223,8 +1220,7 @@ static int count_subheaders(struct ctl_table *table) } static int register_leaf_sysctl_tables(const char *path, char *pos, - struct ctl_table_header ***subheader, - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_header ***subheader, struct ctl_table_set *set, struct ctl_table *table) { struct ctl_table *ctl_table_arg = NULL; @@ -1261,7 +1257,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, /* Register everything except a directory full of subdirectories */ if (nr_files || !nr_dirs) { struct ctl_table_header *header; - header = __register_sysctl_table(root, namespaces, path, files); + header = __register_sysctl_table(set, path, files); if (!header) { kfree(ctl_table_arg); goto out; @@ -1286,7 +1282,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, goto out; err = register_leaf_sysctl_tables(path, child_pos, subheader, - root, namespaces, entry->child); + set, entry->child); pos[0] = '\0'; if (err) goto out; @@ -1299,8 +1295,7 @@ out: /** * __register_sysctl_paths - register a sysctl table hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1310,8 +1305,7 @@ out: * See __register_sysctl_table for more details. */ struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; @@ -1337,7 +1331,7 @@ struct ctl_table_header *__register_sysctl_paths( table = table->child; } if (nr_subheaders == 1) { - header = __register_sysctl_table(root, namespaces, new_path, table); + header = __register_sysctl_table(set, new_path, table); if (header) header->ctl_table_arg = ctl_table_arg; } else { @@ -1351,7 +1345,7 @@ struct ctl_table_header *__register_sysctl_paths( header->ctl_table_arg = ctl_table_arg; if (register_leaf_sysctl_tables(new_path, pos, &subheader, - root, namespaces, table)) + set, table)) goto err_register_leaves; } @@ -1384,7 +1378,7 @@ err_register_leaves: struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + return __register_sysctl_paths(&sysctl_table_root.default_set, path, table); } EXPORT_SYMBOL(register_sysctl_paths); -- cgit v1.2.3 From e54012cede6749528899f66a72312522a179d427 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 18 Jan 2012 22:57:15 -0800 Subject: sysctl: Move sysctl_check_dups into insert_header Simplify the callers of insert_header by removing explicit calls to check for duplicates and instead have insert_header do the work. This makes the code slightly more maintainable by enabling changes to data structures where the insertion of new entries without duplicate suppression is not possible. There is not always a convenient path string where insert_header is called so modify sysctl_check_dups to use sysctl_print_dir when printing the full path when a duplicate is discovered. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e0d3e7e59cb..160d5781638 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -52,6 +52,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_table **pentry, struct nsproxy *namespaces); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); +static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -123,6 +124,10 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { int err; + err = sysctl_check_dups(dir, header->ctl_table); + if (err) + return err; + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -891,7 +896,7 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_check_table_dups(const char *path, struct ctl_table *old, +static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, struct ctl_table *table) { struct ctl_table *entry, *test; @@ -900,8 +905,9 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, for (entry = old; entry->procname; entry++) { for (test = table; test->procname; test++) { if (strcmp(entry->procname, test->procname) == 0) { - printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", - path, test->procname); + printk(KERN_ERR "sysctl duplicate entry: "); + sysctl_print_dir(dir); + printk(KERN_CONT "/%s\n", test->procname); error = -EEXIST; } } @@ -909,8 +915,7 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, return error; } -static int sysctl_check_dups(struct ctl_dir *dir, - const char *path, struct ctl_table *table) +static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) { struct ctl_table_set *set; struct ctl_table_header *head; @@ -922,7 +927,7 @@ static int sysctl_check_dups(struct ctl_dir *dir, continue; if (head->parent != dir) continue; - error = sysctl_check_table_dups(path, head->ctl_table, table); + error = sysctl_check_table_dups(dir, head->ctl_table, table); } return error; } @@ -1166,9 +1171,6 @@ struct ctl_table_header *__register_sysctl_table( } spin_lock(&sysctl_lock); - if (sysctl_check_dups(dir, path, table)) - goto fail_put_dir_locked; - if (insert_header(dir, header)) goto fail_put_dir_locked; -- cgit v1.2.3 From 9e3d47df35abd6430fed04fb40a76c7358b1e815 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:45:12 -0800 Subject: sysctl: Make the header lists per directory. Slightly enhance efficiency and clarity of the code by making the header list per directory instead of per set. Benchmark before: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s Benchmark after: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 160d5781638..e971ccccac4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,12 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -79,15 +79,12 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) static struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; - if (head->parent != dir) - continue; for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { @@ -133,7 +130,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->set->list); + list_add_tail(&header->ctl_entry, &header->parent->list); return 0; fail_links: header->parent = NULL; @@ -247,14 +244,12 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct list_head *tmp) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { + for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - if (head->parent != dir || - !head->ctl_table->procname || + if (!head->ctl_table->procname || !use_table(head)) continue; @@ -270,7 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->header.set->list); + head = next_usable_entry(dir, &dir->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -793,6 +788,7 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; + INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; init_header(&new->header, set->dir.header.root, set, table); @@ -917,12 +913,10 @@ static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) { - struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - set = dir->header.set; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; if (head->parent != dir) @@ -1494,14 +1488,14 @@ void setup_sysctl_set(struct ctl_table_set *set, int (*is_seen)(struct ctl_table_set *)) { memset(set, sizeof(*set), 0); - INIT_LIST_HEAD(&set->list); set->is_seen = is_seen; + INIT_LIST_HEAD(&set->dir.list); init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->list)); + WARN_ON(!list_empty(&set->dir.list)); } int __init proc_sys_init(void) -- cgit v1.2.3 From ac13ac6f4c6c0504d2c927862216f4e422a2c0b5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Index sysctl directories with rbtrees. One of the most important jobs of sysctl is to export network stack tunables. Several of those tunables are per network device. In several instances people are running with 1000+ network devices in there network stacks, which makes the simple per directory linked list in sysctl a scaling bottleneck. Replace O(N^2) sysctl insertion and lookup times with O(NlogN) by using an rbtree to index the sysctl directories. Benchmark before: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Benchmark after: make-dummies 0 999 -> 0.074s rmmod dummy -> 0.070s make-dummies 0 9999 -> 3.4s rmmod dummy -> 0.44s Benchmark after (without dev_snmp6): make-dummies 0 9999 -> 0.75s rmmod dummy -> 0.44s make-dummies 0 99999 -> 11s rmmod dummy -> 4.3s At 10,000 dummy devices the bottleneck becomes the time to add and remove the files under /proc/sys/net/dev_snmp6. I have commented out the code that adds and removes files under /proc/sys/net/dev_snmp6 and taken measurments of creating and destroying 100,000 dummies to verify the sysctl continues to scale. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 224 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 134 insertions(+), 90 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e971ccccac4..05c393a5c53 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,10 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, + .ctl_table = root_table }}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -52,7 +50,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_table **pentry, struct nsproxy *namespaces); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -81,28 +78,83 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead, { struct ctl_table_header *head; struct ctl_table *entry; + struct rb_node *node = dir->root.rb_node; - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - for (entry = head->ctl_table; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = head; - return entry; - } + while (node) + { + struct ctl_node *ctl_node; + const char *procname; + int cmp; + + ctl_node = rb_entry(node, struct ctl_node, node); + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + procname = entry->procname; + + cmp = namecmp(name, namelen, procname, strlen(procname)); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else { + *phead = head; + return entry; } } return NULL; } +static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + struct rb_node **p = &head->parent->root.rb_node; + struct rb_node *parent = NULL; + const char *name = entry->procname; + int namelen = strlen(name); + + while (*p) { + struct ctl_table_header *parent_head; + struct ctl_table *parent_entry; + struct ctl_node *parent_node; + const char *parent_name; + int cmp; + + parent = *p; + parent_node = rb_entry(parent, struct ctl_node, node); + parent_head = parent_node->header; + parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; + parent_name = parent_entry->procname; + + cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { + printk(KERN_ERR "sysctl duplicate entry: "); + sysctl_print_dir(head->parent); + printk(KERN_CONT "/%s\n", entry->procname); + return -EEXIST; + } + } + + rb_link_node(node, parent, p); + return 0; +} + +static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + + rb_erase(node, &head->parent->root); +} + static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, - struct ctl_table *table) + struct ctl_node *node, struct ctl_table *table) { head->ctl_table = table; head->ctl_table_arg = table; - INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; head->count = 1; head->nreg = 1; @@ -110,28 +162,42 @@ static void init_header(struct ctl_table_header *head, head->root = root; head->set = set; head->parent = NULL; + head->node = node; + if (node) { + struct ctl_table *entry; + for (entry = table; entry->procname; entry++, node++) { + rb_init_node(&node->node); + node->header = head; + } + } } static void erase_header(struct ctl_table_header *head) { - list_del_init(&head->ctl_entry); + struct ctl_table *entry; + for (entry = head->ctl_table; entry->procname; entry++) + erase_entry(head, entry); } static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + struct ctl_table *entry; int err; - err = sysctl_check_dups(dir, header->ctl_table); - if (err) - return err; - dir->header.nreg++; header->parent = dir; err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->parent->list); + for (entry = header->ctl_table; entry->procname; entry++) { + err = insert_entry(header, entry); + if (err) + goto fail; + } return 0; +fail: + erase_header(header); + put_links(header); fail_links: header->parent = NULL; drop_sysctl_table(&dir->header); @@ -241,19 +307,14 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct list_head *tmp) +static struct ctl_node *first_usable_entry(struct rb_node *node) { - struct ctl_table_header *head; - - for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); + struct ctl_node *ctl_node; - if (!head->ctl_table->procname || - !use_table(head)) - continue; - - return head; + for (;node; node = rb_next(node)) { + ctl_node = rb_entry(node, struct ctl_node, node); + if (use_table(ctl_node->header)) + return ctl_node; } return NULL; } @@ -261,14 +322,17 @@ static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head; + struct ctl_table_header *head = NULL; struct ctl_table *entry = NULL; + struct ctl_node *ctl_node; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->list); + ctl_node = first_usable_entry(rb_first(&dir->root)); spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + } *phead = head; *pentry = entry; } @@ -277,15 +341,17 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; + struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; - entry++; - if (!entry->procname) { - spin_lock(&sysctl_lock); - unuse_table(head); - head = next_usable_entry(head->parent, &head->ctl_entry); - spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + spin_lock(&sysctl_lock); + unuse_table(head); + + ctl_node = first_usable_entry(rb_next(&ctl_node->node)); + spin_unlock(&sysctl_lock); + head = NULL; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; } *phead = head; *pentry = entry; @@ -777,21 +843,23 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, { struct ctl_table *table; struct ctl_dir *new; + struct ctl_node *node; char *new_name; - new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + - namelen + 1, GFP_KERNEL); + new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + + sizeof(struct ctl_table)*2 + namelen + 1, + GFP_KERNEL); if (!new) return NULL; - table = (struct ctl_table *)(new + 1); + node = (struct ctl_node *)(new + 1); + table = (struct ctl_table *)(node + 1); new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; - INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->dir.header.root, set, table); + init_header(&new->header, set->dir.header.root, set, node, table); return new; } @@ -892,40 +960,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, - struct ctl_table *table) -{ - struct ctl_table *entry, *test; - int error = 0; - - for (entry = old; entry->procname; entry++) { - for (test = table; test->procname; test++) { - if (strcmp(entry->procname, test->procname) == 0) { - printk(KERN_ERR "sysctl duplicate entry: "); - sysctl_print_dir(dir); - printk(KERN_CONT "/%s\n", test->procname); - error = -EEXIST; - } - } - } - return error; -} - -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) -{ - struct ctl_table_header *head; - int error = 0; - - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(dir, head->ctl_table, table); - } - return error; -} - static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { struct va_format vaf; @@ -977,6 +1011,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table { struct ctl_table *link_table, *entry, *link; struct ctl_table_header *links; + struct ctl_node *node; char *link_name; int nr_entries, name_bytes; @@ -988,6 +1023,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table } links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries + sizeof(struct ctl_table)*(nr_entries + 1) + name_bytes, GFP_KERNEL); @@ -995,7 +1031,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table if (!links) return NULL; - link_table = (struct ctl_table *)(links + 1); + node = (struct ctl_node *)(links + 1); + link_table = (struct ctl_table *)(node + nr_entries); link_name = (char *)&link_table[nr_entries + 1]; for (link = link_table, entry = table; entry->procname; link++, entry++) { @@ -1006,7 +1043,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table link->data = link_root; link_name += len; } - init_header(links, dir->header.root, dir->header.set, link_table); + init_header(links, dir->header.root, dir->header.set, node, link_table); links->nreg = nr_entries; return links; @@ -1132,12 +1169,20 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_header *header; const char *name, *nextname; struct ctl_dir *dir; + struct ctl_table *entry; + struct ctl_node *node; + int nr_entries = 0; + + for (entry = table; entry->procname; entry++) + nr_entries++; - header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); if (!header) return NULL; - init_header(header, root, set, table); + node = (struct ctl_node *)(header + 1); + init_header(header, root, set, node, table); if (sysctl_check_table(path, table)) goto fail; @@ -1489,13 +1534,12 @@ void setup_sysctl_set(struct ctl_table_set *set, { memset(set, sizeof(*set), 0); set->is_seen = is_seen; - INIT_LIST_HEAD(&set->dir.list); - init_header(&set->dir.header, root, set, root_table); + init_header(&set->dir.header, root, set, NULL, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->dir.list)); + WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); } int __init proc_sys_init(void) -- cgit v1.2.3 From fea478d4101a4285aa25c5bafaaf4cec35026fe0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jan 2012 21:47:03 -0800 Subject: sysctl: Add register_sysctl for normal sysctl users The plan is to convert all callers of register_sysctl_table and register_sysctl_paths to register_sysctl. The interface to register_sysctl is enough nicer this should make the callers a bit more readable. Additionally after the conversion the 230 lines of backwards compatibility can be removed. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 05c393a5c53..8dc7f0e46e7 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1228,6 +1228,23 @@ fail: return NULL; } +/** + * register_sysctl - register a sysctl table + * @path: The path to the directory the sysctl table is in. + * @table: the table structure + * + * Register a sysctl table. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +{ + return __register_sysctl_table(&sysctl_table_root.default_set, + path, table); +} +EXPORT_SYMBOL(register_sysctl); + static char *append_path(const char *path, char *pos, const char *name) { int namelen; -- cgit v1.2.3 From 47981787092aecb87dc3cb2d478455dcfb77516a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jan 2012 16:39:59 +0300 Subject: sysctl: remove an unused variable "links" is never used, so we can remove it. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8dc7f0e46e7..1b1f5b8f4e0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1165,7 +1165,6 @@ struct ctl_table_header *__register_sysctl_table( const char *path, struct ctl_table *table) { struct ctl_table_root *root = set->dir.header.root; - struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; struct ctl_dir *dir; @@ -1222,7 +1221,6 @@ fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: - kfree(links); kfree(header); dump_stack(); return NULL; -- cgit v1.2.3 From 1347440db6f76ec5ae0af8d8558387f571a5e1dd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 30 Jan 2012 16:40:29 +0300 Subject: sysctl: fix memset parameters in setup_sysctl_set() The current code is a nop. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1b1f5b8f4e0..27e265ba1af 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1547,7 +1547,7 @@ void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { - memset(set, sizeof(*set), 0); + memset(set, 0, sizeof(*set)); set->is_seen = is_seen; init_header(&set->dir.header, root, set, NULL, root_table); } -- cgit v1.2.3 From 51f72f4a0f92e4abde33a8bca0fac9667575d035 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 20:09:33 -0800 Subject: sysctl: An easier to read version of find_subdir Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 27e265ba1af..ebe8b3076db 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -833,9 +833,9 @@ static struct ctl_dir *find_subdir(struct ctl_dir *dir, entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); - if (S_ISDIR(entry->mode)) - return container_of(head, struct ctl_dir, header); - return ERR_PTR(-ENOTDIR); + if (!S_ISDIR(entry->mode)) + return ERR_PTR(-ENOTDIR); + return container_of(head, struct ctl_dir, header); } static struct ctl_dir *new_dir(struct ctl_table_set *set, -- cgit v1.2.3 From 0eb97f38d2bfaea289b44c5140a7b04e7b369bad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 20:37:51 -0800 Subject: sysctl: Correct error return from get_subdir When insert_header fails ensure we return the proper error value from get_subdir. In practice nothing cares, but there is no need to be sloppy. Reported-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ebe8b3076db..722ec116208 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -869,6 +869,7 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, { struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; + int err; spin_lock(&sysctl_lock); subdir = find_subdir(dir, name, namelen); @@ -890,7 +891,9 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, if (PTR_ERR(subdir) != -ENOENT) goto failed; - if (insert_header(dir, &new->header)) + err = insert_header(dir, &new->header); + subdir = ERR_PTR(err); + if (err) goto failed; subdir = new; found: -- cgit v1.2.3 From 60f126d93b210ae708e2a5bb4a3be2121831f2a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 21:23:52 -0800 Subject: sysctl: Comments to make the code clearer. Document get_subdir and that find_subdir alwasy takes a reference. Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 722ec116208..e5601dc2408 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -73,6 +73,7 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) return cmp; } +/* Called under sysctl_lock */ static struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { @@ -864,6 +865,18 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, return new; } +/** + * get_subdir - find or create a subdir with the specified name. + * @dir: Directory to create the subdirectory in + * @name: The name of the subdirectory to find or create + * @namelen: The length of name + * + * Takes a directory with an elevated reference count so we know that + * if we drop the lock the directory will not go away. Upon success + * the reference is moved from @dir to the returned subdirectory. + * Upon error an error code is returned and the reference on @dir is + * simply dropped. + */ static struct ctl_dir *get_subdir(struct ctl_dir *dir, const char *name, int namelen) { @@ -885,12 +898,14 @@ static struct ctl_dir *get_subdir(struct ctl_dir *dir, if (!new) goto failed; + /* Was the subdir added while we dropped the lock? */ subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; + /* Nope. Use the our freshly made directory entry. */ err = insert_header(dir, &new->header); subdir = ERR_PTR(err); if (err) @@ -1190,6 +1205,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); dir = &set->dir; + /* Reference moved down the diretory tree get_subdir */ dir->header.nreg++; spin_unlock(&sysctl_lock); -- cgit v1.2.3 From 4e75732035d7e97e001bdf6e3149d3967c0221de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 30 Jan 2012 21:24:59 -0800 Subject: sysctl: Don't call sysctl_follow_link unless we are a link. There are no functional changes. Just code motion to make it clear that we don't follow a link between sysctl roots unless the directory entry actually is a link. Suggested-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e5601dc2408..a7708b7c957 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -451,10 +451,12 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!p) goto out; - ret = sysctl_follow_link(&h, &p, current->nsproxy); - err = ERR_PTR(ret); - if (ret) - goto out; + if (S_ISLNK(p->mode)) { + ret = sysctl_follow_link(&h, &p, current->nsproxy); + err = ERR_PTR(ret); + if (ret) + goto out; + } err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); @@ -601,10 +603,12 @@ static int proc_sys_link_fill_cache(struct file *filp, void *dirent, int err, ret = 0; head = sysctl_head_grab(head); - /* It is not an error if we can not follow the link ignore it */ - err = sysctl_follow_link(&head, &table, current->nsproxy); - if (err) - goto out; + if (S_ISLNK(table->mode)) { + /* It is not an error if we can not follow the link ignore it */ + err = sysctl_follow_link(&head, &table, current->nsproxy); + if (err) + goto out; + } ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); out: @@ -950,10 +954,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_dir *dir; int ret; - /* Get out quickly if not a link */ - if (!S_ISLNK((*pentry)->mode)) - return 0; - ret = 0; spin_lock(&sysctl_lock); root = (*pentry)->data; -- cgit v1.2.3 From 4e474a00d7ff746ed177ddae14fa8b2d4bad7a00 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Mar 2012 14:42:22 -0700 Subject: sysctl: protect poll() in entries that may go away Protect code accessing ctl_table by grabbing the header with grab_header() and after releasing with sysctl_head_finish(). This is needed if poll() is called in entries created by modules: currently only hostname and domainname support poll(), but this bug may be triggered when/if modules use it and if user called poll() in a file that doesn't support it. Dave Jones reported the following when using a syscall fuzzer while hibernating/resuming: RIP: 0010:[] [] proc_sys_poll+0x4e/0x90 RAX: 0000000000000145 RBX: ffff88020cab6940 RCX: 0000000000000000 RDX: ffffffff81233df0 RSI: 6b6b6b6b6b6b6b6b RDI: ffff88020cab6940 [ ... ] Code: 00 48 89 fb 48 89 f1 48 8b 40 30 4c 8b 60 e8 b8 45 01 00 00 49 83 7c 24 28 00 74 2e 49 8b 74 24 30 48 85 f6 74 24 48 85 c9 75 32 <8b> 16 b8 45 01 00 00 48 63 d2 49 39 d5 74 10 8b 06 48 98 48 89 If an entry goes away while we are polling() it, ctl_table may not exist anymore. Reported-by: Dave Jones Signed-off-by: Lucas De Marchi Cc: Al Viro Cc: Linus Torvalds Cc: Alexey Dobriyan Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'fs/proc/proc_sysctl.c') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a7708b7c957..47b474b572c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -525,20 +525,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_open(struct inode *inode, struct file *filp) { + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + /* sysctl was unregistered */ + if (IS_ERR(head)) + return PTR_ERR(head); + if (table->poll) filp->private_data = proc_sys_poll_event(table->poll); + sysctl_head_finish(head); + return 0; } static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) { struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - unsigned long event = (unsigned long)filp->private_data; unsigned int ret = DEFAULT_POLLMASK; + unsigned long event; + + /* sysctl was unregistered */ + if (IS_ERR(head)) + return POLLERR | POLLHUP; if (!table->proc_handler) goto out; @@ -546,6 +558,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) if (!table->poll) goto out; + event = (unsigned long)filp->private_data; poll_wait(filp, &table->poll->wait, wait); if (event != atomic_read(&table->poll->event)) { @@ -554,6 +567,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) } out: + sysctl_head_finish(head); + return ret; } -- cgit v1.2.3