From faa65f07d21e7d37190c91fdcf9f940d733ae3cc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 3 Dec 2012 06:05:29 -0500 Subject: cifs: simplify id_to_sid and sid_to_id mapping code The cifs.idmap handling code currently causes the kernel to cache the data from userspace twice. It first looks in a rbtree to see if there is a matching entry for the given id. If there isn't then it calls request_key which then checks its cache and then calls out to userland if it doesn't have one. If the userland program establishes a mapping and downcalls with that info, it then gets cached in the keyring and in this rbtree. Aside from the double memory usage and the performance penalty in doing all of these extra copies, there are some nasty bugs in here too. The code declares four rbtrees and spinlocks to protect them, but only seems to use two of them. The upshot is that the same tree is used to hold (eg) uid:sid and sid:uid mappings. The comparitors aren't equipped to deal with that. I think we'd be best off to remove a layer of caching in this code. If this was originally done for performance reasons, then that really seems like a premature optimization. This patch does that -- it removes the rbtrees and the locks that protect them and simply has the code do a request_key call on each call into sid_to_id and id_to_sid. This greatly simplifies this code and should roughly halve the memory utilization from using the idmapping code. Reviewed-by: Shirish Pargaonkar Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 535 ++++++++++-------------------------------------------- 1 file changed, 94 insertions(+), 441 deletions(-) (limited to 'fs/cifs/cifsacl.c') diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 42b3fe981a0..f4508ee4e80 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -44,128 +44,6 @@ static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; static const struct cred *root_cred; -static void -shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, - int *nr_del) -{ - struct rb_node *node; - struct rb_node *tmp; - struct cifs_sid_id *psidid; - - node = rb_first(root); - while (node) { - tmp = node; - node = rb_next(tmp); - psidid = rb_entry(tmp, struct cifs_sid_id, rbnode); - if (nr_to_scan == 0 || *nr_del == nr_to_scan) - ++(*nr_rem); - else { - if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE) - && psidid->refcount == 0) { - rb_erase(tmp, root); - ++(*nr_del); - } else - ++(*nr_rem); - } - } -} - -/* - * Run idmap cache shrinker. - */ -static int -cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) -{ - int nr_to_scan = sc->nr_to_scan; - int nr_del = 0; - int nr_rem = 0; - struct rb_root *root; - - root = &uidtree; - spin_lock(&siduidlock); - shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); - spin_unlock(&siduidlock); - - root = &gidtree; - spin_lock(&sidgidlock); - shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); - spin_unlock(&sidgidlock); - - root = &siduidtree; - spin_lock(&uidsidlock); - shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); - spin_unlock(&uidsidlock); - - root = &sidgidtree; - spin_lock(&gidsidlock); - shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); - spin_unlock(&gidsidlock); - - return nr_rem; -} - -static void -sid_rb_insert(struct rb_root *root, unsigned long cid, - struct cifs_sid_id **psidid, char *typestr) -{ - char *strptr; - struct rb_node *node = root->rb_node; - struct rb_node *parent = NULL; - struct rb_node **linkto = &(root->rb_node); - struct cifs_sid_id *lsidid; - - while (node) { - lsidid = rb_entry(node, struct cifs_sid_id, rbnode); - parent = node; - if (cid > lsidid->id) { - linkto = &(node->rb_left); - node = node->rb_left; - } - if (cid < lsidid->id) { - linkto = &(node->rb_right); - node = node->rb_right; - } - } - - (*psidid)->id = cid; - (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); - (*psidid)->refcount = 0; - - sprintf((*psidid)->sidstr, "%s", typestr); - strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr); - sprintf(strptr, "%ld", cid); - - clear_bit(SID_ID_PENDING, &(*psidid)->state); - clear_bit(SID_ID_MAPPED, &(*psidid)->state); - - rb_link_node(&(*psidid)->rbnode, parent, linkto); - rb_insert_color(&(*psidid)->rbnode, root); -} - -static struct cifs_sid_id * -sid_rb_search(struct rb_root *root, unsigned long cid) -{ - struct rb_node *node = root->rb_node; - struct cifs_sid_id *lsidid; - - while (node) { - lsidid = rb_entry(node, struct cifs_sid_id, rbnode); - if (cid > lsidid->id) - node = node->rb_left; - else if (cid < lsidid->id) - node = node->rb_right; - else /* node found */ - return lsidid; - } - - return NULL; -} - -static struct shrinker cifs_shrinker = { - .shrink = cifs_idmap_shrinker, - .seeks = DEFAULT_SEEKS, -}; - static int cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { @@ -195,30 +73,39 @@ static struct key_type cifs_idmap_key_type = { .match = user_match, }; -static void -sid_to_str(struct cifs_sid *sidptr, char *sidstr) +static char * +sid_to_key_str(struct cifs_sid *sidptr, unsigned int type) { - int i; + int i, len; unsigned int saval; - char *strptr; + char *sidstr, *strptr; - strptr = sidstr; + /* 3 bytes for prefix */ + sidstr = kmalloc(3 + SID_STRING_BASE_SIZE + + (SID_STRING_SUBAUTH_SIZE * sidptr->num_subauth), + GFP_KERNEL); + if (!sidstr) + return sidstr; - sprintf(strptr, "S-%hhu", sidptr->revision); - strptr = sidstr + strlen(sidstr); + strptr = sidstr; + len = sprintf(strptr, "%cs:S-%hhu", type == SIDOWNER ? 'o' : 'g', + sidptr->revision); + strptr += len; for (i = 0; i < NUM_AUTHS; ++i) { if (sidptr->authority[i]) { - sprintf(strptr, "-%hhu", sidptr->authority[i]); - strptr = sidstr + strlen(sidstr); + len = sprintf(strptr, "-%hhu", sidptr->authority[i]); + strptr += len; } } for (i = 0; i < sidptr->num_subauth; ++i) { saval = le32_to_cpu(sidptr->sub_auth[i]); - sprintf(strptr, "-%u", saval); - strptr = sidstr + strlen(sidstr); + len = sprintf(strptr, "-%u", saval); + strptr += len; } + + return sidstr; } /* @@ -284,184 +171,38 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) dst->sub_auth[i] = src->sub_auth[i]; } -static void -id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, - struct cifs_sid_id **psidid, char *typestr) -{ - int rc; - char *strptr; - struct rb_node *node = root->rb_node; - struct rb_node *parent = NULL; - struct rb_node **linkto = &(root->rb_node); - struct cifs_sid_id *lsidid; - - while (node) { - lsidid = rb_entry(node, struct cifs_sid_id, rbnode); - parent = node; - rc = compare_sids(sidptr, &((lsidid)->sid)); - if (rc > 0) { - linkto = &(node->rb_left); - node = node->rb_left; - } else if (rc < 0) { - linkto = &(node->rb_right); - node = node->rb_right; - } - } - - cifs_copy_sid(&(*psidid)->sid, sidptr); - (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); - (*psidid)->refcount = 0; - - sprintf((*psidid)->sidstr, "%s", typestr); - strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr); - sid_to_str(&(*psidid)->sid, strptr); - - clear_bit(SID_ID_PENDING, &(*psidid)->state); - clear_bit(SID_ID_MAPPED, &(*psidid)->state); - - rb_link_node(&(*psidid)->rbnode, parent, linkto); - rb_insert_color(&(*psidid)->rbnode, root); -} - -static struct cifs_sid_id * -id_rb_search(struct rb_root *root, struct cifs_sid *sidptr) -{ - int rc; - struct rb_node *node = root->rb_node; - struct cifs_sid_id *lsidid; - - while (node) { - lsidid = rb_entry(node, struct cifs_sid_id, rbnode); - rc = compare_sids(sidptr, &((lsidid)->sid)); - if (rc > 0) { - node = node->rb_left; - } else if (rc < 0) { - node = node->rb_right; - } else /* node found */ - return lsidid; - } - - return NULL; -} - -static int -sidid_pending_wait(void *unused) -{ - schedule(); - return signal_pending(current) ? -ERESTARTSYS : 0; -} - static int -id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) +id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) { - int rc = 0; + int rc; struct key *sidkey; + char desc[3 + 10 + 1]; /* 3 byte prefix + 10 bytes for value + NULL */ const struct cred *saved_cred; - struct cifs_sid *lsid; - struct cifs_sid_id *psidid, *npsidid; - struct rb_root *cidtree; - spinlock_t *cidlock; - - if (sidtype == SIDOWNER) { - cidlock = &siduidlock; - cidtree = &uidtree; - } else if (sidtype == SIDGROUP) { - cidlock = &sidgidlock; - cidtree = &gidtree; - } else - return -EINVAL; - - spin_lock(cidlock); - psidid = sid_rb_search(cidtree, cid); - - if (!psidid) { /* node does not exist, allocate one & attempt adding */ - spin_unlock(cidlock); - npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL); - if (!npsidid) - return -ENOMEM; - - npsidid->sidstr = kmalloc(SID_STRING_MAX, GFP_KERNEL); - if (!npsidid->sidstr) { - kfree(npsidid); - return -ENOMEM; - } - - spin_lock(cidlock); - psidid = sid_rb_search(cidtree, cid); - if (psidid) { /* node happened to get inserted meanwhile */ - ++psidid->refcount; - spin_unlock(cidlock); - kfree(npsidid->sidstr); - kfree(npsidid); - } else { - psidid = npsidid; - sid_rb_insert(cidtree, cid, &psidid, - sidtype == SIDOWNER ? "oi:" : "gi:"); - ++psidid->refcount; - spin_unlock(cidlock); - } - } else { - ++psidid->refcount; - spin_unlock(cidlock); - } - /* - * If we are here, it is safe to access psidid and its fields - * since a reference was taken earlier while holding the spinlock. - * A reference on the node is put without holding the spinlock - * and it is OK to do so in this case, shrinker will not erase - * this node until all references are put and we do not access - * any fields of the node after a reference is put . - */ - if (test_bit(SID_ID_MAPPED, &psidid->state)) { - cifs_copy_sid(ssid, &psidid->sid); - psidid->time = jiffies; /* update ts for accessing */ - goto id_sid_out; - } + rc = snprintf(desc, sizeof(desc), "%ci:%u", + sidtype == SIDOWNER ? 'o' : 'g', cid); + if (rc >= sizeof(desc)) + return -EINVAL; - if (time_after(psidid->time + SID_MAP_RETRY, jiffies)) { + rc = 0; + saved_cred = override_creds(root_cred); + sidkey = request_key(&cifs_idmap_key_type, desc, ""); + if (IS_ERR(sidkey)) { rc = -EINVAL; - goto id_sid_out; - } - - if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) { - saved_cred = override_creds(root_cred); - sidkey = request_key(&cifs_idmap_key_type, psidid->sidstr, ""); - if (IS_ERR(sidkey)) { - rc = -EINVAL; - cFYI(1, "%s: Can't map and id to a SID", __func__); - } else if (sidkey->datalen < CIFS_SID_BASE_SIZE) { - rc = -EIO; - cFYI(1, "%s: Downcall contained malformed key " - "(datalen=%hu)", __func__, sidkey->datalen); - } else { - lsid = (struct cifs_sid *)sidkey->payload.data; - cifs_copy_sid(&psidid->sid, lsid); - cifs_copy_sid(ssid, &psidid->sid); - set_bit(SID_ID_MAPPED, &psidid->state); - key_put(sidkey); - kfree(psidid->sidstr); - } - psidid->time = jiffies; /* update ts for accessing */ - revert_creds(saved_cred); - clear_bit(SID_ID_PENDING, &psidid->state); - wake_up_bit(&psidid->state, SID_ID_PENDING); - } else { - rc = wait_on_bit(&psidid->state, SID_ID_PENDING, - sidid_pending_wait, TASK_INTERRUPTIBLE); - if (rc) { - cFYI(1, "%s: sidid_pending_wait interrupted %d", - __func__, rc); - --psidid->refcount; - return rc; - } - if (test_bit(SID_ID_MAPPED, &psidid->state)) - cifs_copy_sid(ssid, &psidid->sid); - else - rc = -EINVAL; - } -id_sid_out: - --psidid->refcount; + cFYI(1, "%s: Can't map %cid %u to a SID", __func__, + sidtype == SIDOWNER ? 'u' : 'g', cid); + goto out_revert_creds; + } else if (sidkey->datalen < CIFS_SID_BASE_SIZE) { + rc = -EIO; + cFYI(1, "%s: Downcall contained malformed key " + "(datalen=%hu)", __func__, sidkey->datalen); + goto out_key_put; + } + cifs_copy_sid(ssid, (struct cifs_sid *)sidkey->payload.data); +out_key_put: + key_put(sidkey); +out_revert_creds: + revert_creds(saved_cred); return rc; } @@ -470,111 +211,66 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, struct cifs_fattr *fattr, uint sidtype) { int rc; - unsigned long cid; - struct key *idkey; + struct key *sidkey; + char *sidstr; const struct cred *saved_cred; - struct cifs_sid_id *psidid, *npsidid; - struct rb_root *cidtree; - spinlock_t *cidlock; - - if (sidtype == SIDOWNER) { - cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */ - cidlock = &siduidlock; - cidtree = &uidtree; - } else if (sidtype == SIDGROUP) { - cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */ - cidlock = &sidgidlock; - cidtree = &gidtree; - } else - return -ENOENT; - - spin_lock(cidlock); - psidid = id_rb_search(cidtree, psid); - - if (!psidid) { /* node does not exist, allocate one & attempt adding */ - spin_unlock(cidlock); - npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL); - if (!npsidid) - return -ENOMEM; - - npsidid->sidstr = kmalloc(SID_STRING_MAX, GFP_KERNEL); - if (!npsidid->sidstr) { - kfree(npsidid); - return -ENOMEM; - } - - spin_lock(cidlock); - psidid = id_rb_search(cidtree, psid); - if (psidid) { /* node happened to get inserted meanwhile */ - ++psidid->refcount; - spin_unlock(cidlock); - kfree(npsidid->sidstr); - kfree(npsidid); - } else { - psidid = npsidid; - id_rb_insert(cidtree, psid, &psidid, - sidtype == SIDOWNER ? "os:" : "gs:"); - ++psidid->refcount; - spin_unlock(cidlock); - } - } else { - ++psidid->refcount; - spin_unlock(cidlock); - } + uid_t fuid = cifs_sb->mnt_uid; + gid_t fgid = cifs_sb->mnt_gid; /* - * If we are here, it is safe to access psidid and its fields - * since a reference was taken earlier while holding the spinlock. - * A reference on the node is put without holding the spinlock - * and it is OK to do so in this case, shrinker will not erase - * this node until all references are put and we do not access - * any fields of the node after a reference is put . + * If we have too many subauthorities, then something is really wrong. + * Just return an error. */ - if (test_bit(SID_ID_MAPPED, &psidid->state)) { - cid = psidid->id; - psidid->time = jiffies; /* update ts for accessing */ - goto sid_to_id_out; + if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) { + cFYI(1, "%s: %u subauthorities is too many!", __func__, + psid->num_subauth); + return -EIO; } - if (time_after(psidid->time + SID_MAP_RETRY, jiffies)) - goto sid_to_id_out; - - if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) { - saved_cred = override_creds(root_cred); - idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, ""); - if (IS_ERR(idkey)) - cFYI(1, "%s: Can't map SID to an id", __func__); - else { - cid = *(unsigned long *)idkey->payload.value; - psidid->id = cid; - set_bit(SID_ID_MAPPED, &psidid->state); - key_put(idkey); - kfree(psidid->sidstr); - } - revert_creds(saved_cred); - psidid->time = jiffies; /* update ts for accessing */ - clear_bit(SID_ID_PENDING, &psidid->state); - wake_up_bit(&psidid->state, SID_ID_PENDING); - } else { - rc = wait_on_bit(&psidid->state, SID_ID_PENDING, - sidid_pending_wait, TASK_INTERRUPTIBLE); - if (rc) { - cFYI(1, "%s: sidid_pending_wait interrupted %d", - __func__, rc); - --psidid->refcount; /* decremented without spinlock */ - return rc; - } - if (test_bit(SID_ID_MAPPED, &psidid->state)) - cid = psidid->id; + sidstr = sid_to_key_str(psid, sidtype); + if (!sidstr) + return -ENOMEM; + + saved_cred = override_creds(root_cred); + sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); + if (IS_ERR(sidkey)) { + rc = -EINVAL; + cFYI(1, "%s: Can't map SID %s to a %cid", __func__, sidstr, + sidtype == SIDOWNER ? 'u' : 'g'); + goto out_revert_creds; + } + + /* + * FIXME: Here we assume that uid_t and gid_t are same size. It's + * probably a safe assumption but might be better to check based on + * sidtype. + */ + if (sidkey->datalen < sizeof(uid_t)) { + rc = -EIO; + cFYI(1, "%s: Downcall contained malformed key " + "(datalen=%hu)", __func__, sidkey->datalen); + goto out_key_put; } -sid_to_id_out: - --psidid->refcount; /* decremented without spinlock */ if (sidtype == SIDOWNER) - fattr->cf_uid = cid; + fuid = *(uid_t *)sidkey->payload.value; else - fattr->cf_gid = cid; + fgid = *(gid_t *)sidkey->payload.value; +out_key_put: + key_put(sidkey); +out_revert_creds: + revert_creds(saved_cred); + kfree(sidstr); + + /* + * Note that we return 0 here unconditionally. If the mapping + * fails then we just fall back to using the mnt_uid/mnt_gid. + */ + if (sidtype == SIDOWNER) + fattr->cf_uid = fuid; + else + fattr->cf_gid = fgid; return 0; } @@ -621,17 +317,6 @@ init_cifs_idmap(void) cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; root_cred = cred; - spin_lock_init(&siduidlock); - uidtree = RB_ROOT; - spin_lock_init(&sidgidlock); - gidtree = RB_ROOT; - - spin_lock_init(&uidsidlock); - siduidtree = RB_ROOT; - spin_lock_init(&gidsidlock); - sidgidtree = RB_ROOT; - register_shrinker(&cifs_shrinker); - cFYI(1, "cifs idmap keyring: %d", key_serial(keyring)); return 0; @@ -648,41 +333,9 @@ exit_cifs_idmap(void) key_revoke(root_cred->thread_keyring); unregister_key_type(&cifs_idmap_key_type); put_cred(root_cred); - unregister_shrinker(&cifs_shrinker); cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name); } -void -cifs_destroy_idmaptrees(void) -{ - struct rb_root *root; - struct rb_node *node; - - root = &uidtree; - spin_lock(&siduidlock); - while ((node = rb_first(root))) - rb_erase(node, root); - spin_unlock(&siduidlock); - - root = &gidtree; - spin_lock(&sidgidlock); - while ((node = rb_first(root))) - rb_erase(node, root); - spin_unlock(&sidgidlock); - - root = &siduidtree; - spin_lock(&uidsidlock); - while ((node = rb_first(root))) - rb_erase(node, root); - spin_unlock(&uidsidlock); - - root = &sidgidtree; - spin_lock(&gidsidlock); - while ((node = rb_first(root))) - rb_erase(node, root); - spin_unlock(&gidsidlock); -} - /* copy ntsd, owner sid, and group sid from a security descriptor to another */ static void copy_sec_desc(const struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, __u32 sidsoffset) -- cgit v1.2.3