From 6d7b5aaed7d887b34f29f900244cdbd17a86637c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 04:15:17 -0400 Subject: namei.c: let follow_link() do put_link() on failure no need for kludgy "set cookie to ERR_PTR(...) because we failed before we did actual ->follow_link() and want to suppress put_link()", no pointless check in put_link() itself. Callers checked if follow_link() has failed anyway; might as well break out of their loops if that happened, without bothering to call put_link() first. [AV: folded fixes from hch] Signed-off-by: Al Viro --- fs/namei.c | 74 ++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 33 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 7d694194024..6135a14d5a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -605,7 +605,7 @@ static inline void path_to_nameidata(const struct path *path, static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; - if (!IS_ERR(cookie) && inode->i_op->put_link) + if (inode->i_op->put_link) inode->i_op->put_link(link->dentry, nd, cookie); path_put(link); } @@ -613,19 +613,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { - int error; struct dentry *dentry = link->dentry; + int error; + char *s; BUG_ON(nd->flags & LOOKUP_RCU); if (link->mnt == nd->path.mnt) mntget(link->mnt); - if (unlikely(current->total_link_count >= 40)) { - *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ - path_put(&nd->path); - return -ELOOP; - } + error = -ELOOP; + if (unlikely(current->total_link_count >= 40)) + goto out_put_nd_path; + cond_resched(); current->total_link_count++; @@ -633,30 +633,37 @@ follow_link(struct path *link, struct nameidata *nd, void **p) nd_set_link(nd, NULL); error = security_inode_follow_link(link->dentry, nd); - if (error) { - *p = ERR_PTR(error); /* no ->put_link(), please */ - path_put(&nd->path); - return error; - } + if (error) + goto out_put_nd_path; nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); - if (!IS_ERR(*p)) { - char *s = nd_get_link(nd); - error = 0; - if (s) - error = __vfs_follow_link(nd, s); - else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (IS_ERR(*p)) + goto out_put_link; + + error = 0; + s = nd_get_link(nd); + if (s) { + error = __vfs_follow_link(nd, s); + } else if (nd->last_type == LAST_BIND) { + nd->flags |= LOOKUP_JUMPED; + nd->inode = nd->path.dentry->d_inode; + if (nd->inode->i_op->follow_link) { + /* stepped on a _really_ weird one */ + path_put(&nd->path); + error = -ELOOP; } } + if (unlikely(error)) + put_link(nd, link, *p); + + return error; + +out_put_nd_path: + path_put(&nd->path); +out_put_link: + path_put(link); return error; } @@ -1383,9 +1390,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) void *cookie; res = follow_link(&link, nd, &cookie); - if (!res) - res = walk_component(nd, path, &nd->last, - nd->last_type, LOOKUP_FOLLOW); + if (res) + break; + res = walk_component(nd, path, &nd->last, + nd->last_type, LOOKUP_FOLLOW); put_link(nd, &link, cookie); } while (res > 0); @@ -1777,8 +1785,9 @@ static int path_lookupat(int dfd, const char *name, struct path link = path; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); - if (!err) - err = lookup_last(nd, &path); + if (err) + break; + err = lookup_last(nd, &path); put_link(nd, &link, cookie); } } @@ -2475,9 +2484,8 @@ static struct file *path_openat(int dfd, const char *pathname, nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - filp = ERR_PTR(error); - else - filp = do_last(nd, &path, op, pathname); + goto out_filp; + filp = do_last(nd, &path, op, pathname); put_link(nd, &link, cookie); } out: -- cgit v1.2.3 From 37d7fffc9cafe75ded8a840fa30ba625f99ed7ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:12 +0200 Subject: vfs: do_last(): inline lookup_slow() Copy lookup_slow() into do_last(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6135a14d5a8..68742e3cb98 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2254,9 +2254,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error < 0) goto exit; - error = lookup_slow(nd, &nd->last, path); - if (error < 0) + BUG_ON(nd->inode != dir->d_inode); + + mutex_lock(&dir->d_inode->i_mutex); + dentry = __lookup_hash(&nd->last, dir, nd); + mutex_unlock(&dir->d_inode->i_mutex); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) goto exit; + path->mnt = nd->path.mnt; + path->dentry = dentry; + error = follow_managed(path, nd->flags); + if (unlikely(error < 0)) + goto exit_dput; + + if (error) + nd->flags |= LOOKUP_JUMPED; inode = path->dentry->d_inode; } -- cgit v1.2.3 From b6183df7b294997a748eeb9991daa126986ead12 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:13 +0200 Subject: vfs: do_last(): separate O_CREAT specific code Check O_CREAT on the slow lookup paths where necessary. This allows the rest to be shared with plain open. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 68742e3cb98..12ed29712b4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2274,22 +2274,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } goto finish_lookup; - } - - /* create side of things */ - /* - * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been - * cleared when we got to the last component we are about to look up - */ - error = complete_walk(nd); - if (error) - return ERR_PTR(error); + } else { + /* create side of things */ + /* + * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED + * has been cleared when we got to the last component we are + * about to look up + */ + error = complete_walk(nd); + if (error) + return ERR_PTR(error); - audit_inode(pathname, dir); - error = -EISDIR; - /* trailing slashes? */ - if (nd->last.name[nd->last.len]) - goto exit; + audit_inode(pathname, dir); + error = -EISDIR; + /* trailing slashes? */ + if (nd->last.name[nd->last.len]) + goto exit; + } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); @@ -2305,7 +2306,7 @@ retry_lookup: path->mnt = nd->path.mnt; /* Negative dentry, just create the file */ - if (!dentry->d_inode) { + if (!dentry->d_inode && (open_flag & O_CREAT)) { umode_t mode = op->mode; if (!IS_POSIXACL(dir->d_inode)) mode &= ~current_umask(); -- cgit v1.2.3 From 7157486541bffc0dfec912e21ae639b029dae3d3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:14 +0200 Subject: vfs: do_last(): common slow lookup Make the slow lookup part of O_CREAT and non-O_CREAT opens common. This allows atomic_open to be hooked into the slow lookup part. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 12ed29712b4..285e62e925f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2250,30 +2250,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, symlink_ok = 1; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); - if (unlikely(error)) { - if (error < 0) - goto exit; - - BUG_ON(nd->inode != dir->d_inode); - - mutex_lock(&dir->d_inode->i_mutex); - dentry = __lookup_hash(&nd->last, dir, nd); - mutex_unlock(&dir->d_inode->i_mutex); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto exit; - path->mnt = nd->path.mnt; - path->dentry = dentry; - error = follow_managed(path, nd->flags); - if (unlikely(error < 0)) - goto exit_dput; + if (likely(!error)) + goto finish_lookup; - if (error) - nd->flags |= LOOKUP_JUMPED; + if (error < 0) + goto exit; - inode = path->dentry->d_inode; - } - goto finish_lookup; + BUG_ON(nd->inode != dir->d_inode); } else { /* create side of things */ /* -- cgit v1.2.3 From d58ffd35c1e595df2cf8ac4803f178c8be95ca7a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:15 +0200 Subject: vfs: add lookup_open() Split out lookup + maybe create from do_last(). This is the part under i_mutex protection. The function is called lookup_open() and returns a filp even though the open part is not used yet. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 99 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 61 insertions(+), 38 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 285e62e925f..fad7117dbb2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,6 +2196,60 @@ static inline int open_to_namei_flags(int flag) return flag; } +/* + * Lookup, maybe create and open the last component + * + * Must be called with i_mutex held on parent. + * + * Returns open file or NULL on success, error otherwise. NULL means no open + * was performed, only lookup. + */ +static struct file *lookup_open(struct nameidata *nd, struct path *path, + const struct open_flags *op, + int *want_write, bool *created) +{ + struct dentry *dir = nd->path.dentry; + struct dentry *dentry; + int error; + + *created = false; + dentry = lookup_hash(nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + /* Negative dentry, just create the file */ + if (!dentry->d_inode && (op->open_flag & O_CREAT)) { + umode_t mode = op->mode; + if (!IS_POSIXACL(dir->d_inode)) + mode &= ~current_umask(); + /* + * This write is needed to ensure that a + * rw->ro transition does not occur between + * the time when the file is created and when + * a permanent write count is taken through + * the 'struct file' in nameidata_to_filp(). + */ + error = mnt_want_write(nd->path.mnt); + if (error) + goto out_dput; + *want_write = 1; + *created = true; + error = security_path_mknod(&nd->path, dentry, mode, 0); + if (error) + goto out_dput; + error = vfs_create(dir->d_inode, dentry, mode, nd); + if (error) + goto out_dput; + } + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; + +out_dput: + dput(dentry); + return ERR_PTR(error); +} + /* * Handle the last step of open() */ @@ -2203,13 +2257,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, const struct open_flags *op, const char *pathname) { struct dentry *dir = nd->path.dentry; - struct dentry *dentry; int open_flag = op->open_flag; int will_truncate = open_flag & O_TRUNC; int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; + bool created; int symlink_ok = 0; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2277,53 +2331,24 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); + filp = lookup_open(nd, path, op, &want_write, &created); + mutex_unlock(&dir->d_inode->i_mutex); - dentry = lookup_hash(nd); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - mutex_unlock(&dir->d_inode->i_mutex); - goto exit; - } - - path->dentry = dentry; - path->mnt = nd->path.mnt; + if (IS_ERR(filp)) + goto out; - /* Negative dentry, just create the file */ - if (!dentry->d_inode && (open_flag & O_CREAT)) { - umode_t mode = op->mode; - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current_umask(); - /* - * This write is needed to ensure that a - * rw->ro transition does not occur between - * the time when the file is created and when - * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). - */ - error = mnt_want_write(nd->path.mnt); - if (error) - goto exit_mutex_unlock; - want_write = 1; + if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = 0; acc_mode = MAY_OPEN; - error = security_path_mknod(&nd->path, dentry, mode, 0); - if (error) - goto exit_mutex_unlock; - error = vfs_create(dir->d_inode, dentry, mode, nd); - if (error) - goto exit_mutex_unlock; - mutex_unlock(&dir->d_inode->i_mutex); - dput(nd->path.dentry); - nd->path.dentry = dentry; + path_to_nameidata(path, nd); goto common; } /* * It already exists. */ - mutex_unlock(&dir->d_inode->i_mutex); audit_inode(pathname, path->dentry); error = -EEXIST; @@ -2432,8 +2457,6 @@ out: terminate_walk(nd); return filp; -exit_mutex_unlock: - mutex_unlock(&dir->d_inode->i_mutex); exit_dput: path_put_conditional(path, nd); exit: -- cgit v1.2.3 From 54ef487241e863a6046536ac5b1fcd5d7cde86e5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:16 +0200 Subject: vfs: lookup_open(): expand lookup_hash() Copy __lookup_hash() into lookup_open(). The next patch will insert the atomic open call just before the real lookup. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index fad7117dbb2..ccb0eb17f52 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2209,14 +2209,24 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int *want_write, bool *created) { struct dentry *dir = nd->path.dentry; + struct inode *dir_inode = dir->d_inode; struct dentry *dentry; int error; + bool need_lookup; *created = false; - dentry = lookup_hash(nd); + dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); + if (need_lookup) { + BUG_ON(dentry->d_inode); + + dentry = lookup_real(dir_inode, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + } + /* Negative dentry, just create the file */ if (!dentry->d_inode && (op->open_flag & O_CREAT)) { umode_t mode = op->mode; -- cgit v1.2.3 From d18e9008c377dc6a6d2166a6840bf3a23a5867fd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:17 +0200 Subject: vfs: add i_op->atomic_open() Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ccb0eb17f52..9e11ae83bff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag) return flag; } +static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) +{ + int error = security_path_mknod(dir, dentry, mode, 0); + if (error) + return error; + + error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + if (error) + return error; + + return security_inode_create(dir->dentry->d_inode, dentry, mode); +} + +static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, const struct open_flags *op, + int *want_write, bool need_lookup, + bool *created) +{ + struct inode *dir = nd->path.dentry->d_inode; + unsigned open_flag = open_to_namei_flags(op->open_flag); + umode_t mode; + int error; + int acc_mode; + struct opendata od; + struct file *filp; + int create_error = 0; + struct dentry *const DENTRY_NOT_SET = (void *) -1UL; + + BUG_ON(dentry->d_inode); + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(dir))) { + filp = ERR_PTR(-ENOENT); + goto out; + } + + mode = op->mode & S_IALLUGO; + if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) + mode &= ~current_umask(); + + if (open_flag & O_EXCL) { + open_flag &= ~O_TRUNC; + *created = true; + } + + /* + * Checking write permission is tricky, bacuse we don't know if we are + * going to actually need it: O_CREAT opens should work as long as the + * file exists. But checking existence breaks atomicity. The trick is + * to check access and if not granted clear O_CREAT from the flags. + * + * Another problem is returing the "right" error value (e.g. for an + * O_EXCL open we want to return EEXIST not EROFS). + */ + if ((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) { + error = mnt_want_write(nd->path.mnt); + if (!error) { + *want_write = 1; + } else if (!(open_flag & O_CREAT)) { + /* + * No O_CREATE -> atomicity not a requirement -> fall + * back to lookup + open + */ + goto no_open; + } else if (open_flag & (O_EXCL | O_TRUNC)) { + /* Fall back and fail with the right error */ + create_error = error; + goto no_open; + } else { + /* No side effects, safe to clear O_CREAT */ + create_error = error; + open_flag &= ~O_CREAT; + } + } + + if (open_flag & O_CREAT) { + error = may_o_create(&nd->path, dentry, op->mode); + if (error) { + create_error = error; + if (open_flag & O_EXCL) + goto no_open; + open_flag &= ~O_CREAT; + } + } + + if (nd->flags & LOOKUP_DIRECTORY) + open_flag |= O_DIRECTORY; + + od.dentry = DENTRY_NOT_SET; + od.mnt = nd->path.mnt; + od.filp = &nd->intent.open.file; + filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + created); + if (IS_ERR(filp)) { + if (WARN_ON(od.dentry != DENTRY_NOT_SET)) + dput(od.dentry); + + if (create_error && PTR_ERR(filp) == -ENOENT) + filp = ERR_PTR(create_error); + goto out; + } + + acc_mode = op->acc_mode; + if (*created) { + fsnotify_create(dir, dentry); + acc_mode = MAY_OPEN; + } + + if (!filp) { + if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + filp = ERR_PTR(-EIO); + goto out; + } + if (od.dentry) { + dput(dentry); + dentry = od.dentry; + } + goto looked_up; + } + + /* + * We didn't have the inode before the open, so check open permission + * here. + */ + error = may_open(&filp->f_path, acc_mode, open_flag); + if (error) + goto out_fput; + + error = open_check_o_direct(filp); + if (error) + goto out_fput; + +out: + dput(dentry); + return filp; + +out_fput: + fput(filp); + filp = ERR_PTR(error); + goto out; + +no_open: + if (need_lookup) { + dentry = lookup_real(dir, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (create_error) { + int open_flag = op->open_flag; + + filp = ERR_PTR(create_error); + if ((open_flag & O_EXCL)) { + if (!dentry->d_inode) + goto out; + } else if (!dentry->d_inode) { + goto out; + } else if ((open_flag & O_TRUNC) && + S_ISREG(dentry->d_inode->i_mode)) { + goto out; + } + /* will fail later, go on to get the right error */ + } + } +looked_up: + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; +} + /* * Lookup, maybe create and open the last component * @@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (IS_ERR(dentry)) return ERR_CAST(dentry); + /* Cached positive dentry: will open in f_op->open */ + if (!need_lookup && dentry->d_inode) + goto out_no_open; + + if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { + return atomic_open(nd, dentry, path, op, want_write, + need_lookup, created); + } + if (need_lookup) { BUG_ON(dentry->d_inode); @@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; } +out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; return NULL; @@ -2344,8 +2524,16 @@ retry_lookup: filp = lookup_open(nd, path, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); - if (IS_ERR(filp)) - goto out; + if (filp) { + if (IS_ERR(filp)) + goto out; + + if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + will_truncate = 0; + + audit_inode(pathname, filp->f_path.dentry); + goto opened; + } if (created) { /* Don't check for write permission, don't truncate */ @@ -2361,6 +2549,16 @@ retry_lookup: */ audit_inode(pathname, path->dentry); + /* + * If atomic_open() acquired write access it is dropped now due to + * possible mount and symlink following (this might be optimized away if + * necessary...) + */ + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + error = -EEXIST; if (open_flag & O_EXCL) goto exit_dput; @@ -2444,6 +2642,7 @@ common: retried = true; goto retry_lookup; } +opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { -- cgit v1.2.3 From 015c3bbcd88df2305aae5b017a9c91c08b380aa1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:27 +0200 Subject: vfs: remove open intents from nameidata All users of open intents have been converted to use ->atomic_{open,create}. This patch gets rid of nd->intent.open and related infrastructure. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 95 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 45 insertions(+), 50 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 9e11ae83bff..0ed876259f8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,22 +463,6 @@ err_root: return -ECHILD; } -/** - * release_open_intent - free up open intent resources - * @nd: pointer to nameidata - */ -void release_open_intent(struct nameidata *nd) -{ - struct file *file = nd->intent.open.file; - - if (file && !IS_ERR(file)) { - if (file->f_path.dentry == NULL) - put_filp(file); - else - fput(file); - } -} - static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { return dentry->d_op->d_revalidate(dentry, nd); @@ -2210,7 +2194,8 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, const struct open_flags *op, + struct path *path, struct opendata *od, + const struct open_flags *op, int *want_write, bool need_lookup, bool *created) { @@ -2219,7 +2204,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct opendata od; struct file *filp; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2285,14 +2269,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od.dentry = DENTRY_NOT_SET; - od.mnt = nd->path.mnt; - od.filp = &nd->intent.open.file; - filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + od->dentry = DENTRY_NOT_SET; + od->mnt = nd->path.mnt; + filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, created); if (IS_ERR(filp)) { - if (WARN_ON(od.dentry != DENTRY_NOT_SET)) - dput(od.dentry); + if (WARN_ON(od->dentry != DENTRY_NOT_SET)) + dput(od->dentry); if (create_error && PTR_ERR(filp) == -ENOENT) filp = ERR_PTR(create_error); @@ -2306,13 +2289,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (!filp) { - if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od.dentry) { + if (od->dentry) { dput(dentry); - dentry = od.dentry; + dentry = od->dentry; } goto looked_up; } @@ -2375,6 +2358,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, + struct opendata *od, const struct open_flags *op, int *want_write, bool *created) { @@ -2394,7 +2378,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, op, want_write, + return atomic_open(nd, dentry, path, od, op, want_write, need_lookup, created); } @@ -2416,7 +2400,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, * rw->ro transition does not occur between * the time when the file is created and when * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). + * the 'struct file' in finish_open(). */ error = mnt_want_write(nd->path.mnt); if (error) @@ -2444,7 +2428,8 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - const struct open_flags *op, const char *pathname) + struct opendata *od, const struct open_flags *op, + const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2521,7 +2506,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2627,7 +2612,8 @@ common: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - filp = nameidata_to_filp(nd); + od->mnt = nd->path.mnt; + filp = finish_open(od, nd->path.dentry, NULL); if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { BUG_ON(save_parent.dentry != dir); path_put(&nd->path); @@ -2642,6 +2628,11 @@ common: retried = true; goto retry_lookup; } + if (IS_ERR(filp)) + goto out; + error = open_check_o_direct(filp); + if (error) + goto exit_fput; opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); @@ -2671,24 +2662,26 @@ exit_dput: exit: filp = ERR_PTR(error); goto out; +exit_fput: + fput(filp); + goto exit; + } static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct file *filp; + struct opendata od; + struct file *res; struct path path; int error; - filp = get_empty_filp(); - if (!filp) + od.filp = get_empty_filp(); + if (!od.filp) return ERR_PTR(-ENFILE); - filp->f_flags = op->open_flag; - nd->intent.open.file = filp; - nd->intent.open.flags = open_to_namei_flags(op->open_flag); - nd->intent.open.create_mode = op->mode; + od.filp->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2699,14 +2692,14 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); - while (unlikely(!filp)) { /* trailing symlink */ + res = do_last(nd, &path, &od, op, pathname); + while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - filp = ERR_PTR(-ELOOP); + res = ERR_PTR(-ELOOP); break; } nd->flags |= LOOKUP_PARENT; @@ -2714,7 +2707,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); + res = do_last(nd, &path, &od, op, pathname); put_link(nd, &link, cookie); } out: @@ -2722,17 +2715,20 @@ out: path_put(&nd->root); if (base) fput(base); - release_open_intent(nd); - if (filp == ERR_PTR(-EOPENSTALE)) { + if (od.filp) { + BUG_ON(od.filp->f_path.dentry); + put_filp(od.filp); + } + if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) - filp = ERR_PTR(-ECHILD); + res = ERR_PTR(-ECHILD); else - filp = ERR_PTR(-ESTALE); + res = ERR_PTR(-ESTALE); } - return filp; + return res; out_filp: - filp = ERR_PTR(error); + res = ERR_PTR(error); goto out; } @@ -2788,7 +2784,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path goto out; nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; - nd.intent.open.flags = O_EXCL; /* * Do the final lookup. -- cgit v1.2.3 From aa4caadb70b782999ce5d150ac2f4b1d18e2fc75 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:28 +0200 Subject: vfs: do_last(): clean up error handling Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 0ed876259f8..044215a7bb0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2634,21 +2634,14 @@ common: if (error) goto exit_fput; opened: - if (!IS_ERR(filp)) { - error = ima_file_check(filp, op->acc_mode); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } - if (!IS_ERR(filp)) { - if (will_truncate) { - error = handle_truncate(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } + error = ima_file_check(filp, op->acc_mode); + if (error) + goto exit_fput; + + if (will_truncate) { + error = handle_truncate(filp); + if (error) + goto exit_fput; } out: if (want_write) -- cgit v1.2.3 From e83db167229702da0f48957641e0dbf36b2644aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:29 +0200 Subject: vfs: do_last(): clean up labels Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 044215a7bb0..ea24376cfa9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2463,13 +2463,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -EISDIR; goto exit; } - goto ok; + goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) return ERR_PTR(error); audit_inode(pathname, dir); - goto ok; + goto finish_open; } if (!(open_flag & O_CREAT)) { @@ -2526,7 +2526,7 @@ retry_lookup: will_truncate = 0; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); - goto common; + goto finish_open_created; } /* @@ -2598,7 +2598,7 @@ finish_lookup: if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto exit; audit_inode(pathname, nd->path.dentry); -ok: +finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; @@ -2608,7 +2608,7 @@ ok: goto exit; want_write = 1; } -common: +finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; -- cgit v1.2.3 From 77d660a8a83036432dc33f092a367d06563d233e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:30 +0200 Subject: vfs: do_last(): clean up bool Consistently use bool for boolean values in do_last(). Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ea24376cfa9..6bdb8d73253 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,7 +2196,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool need_lookup, + bool *want_write, bool need_lookup, bool *created) { struct inode *dir = nd->path.dentry->d_inode; @@ -2238,7 +2238,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, (open_flag & O_ACCMODE) != O_RDONLY) { error = mnt_want_write(nd->path.mnt); if (!error) { - *want_write = 1; + *want_write = true; } else if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall @@ -2360,7 +2360,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool *created) + bool *want_write, bool *created) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2405,7 +2405,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, error = mnt_want_write(nd->path.mnt); if (error) goto out_dput; - *want_write = 1; + *want_write = true; *created = true; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2433,13 +2433,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; - int will_truncate = open_flag & O_TRUNC; - int want_write = 0; + bool will_truncate = (open_flag & O_TRUNC) != 0; + bool want_write = false; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; bool created; - int symlink_ok = 0; + bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; int error; @@ -2476,7 +2476,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) - symlink_ok = 1; + symlink_ok = true; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); if (likely(!error)) @@ -2514,7 +2514,7 @@ retry_lookup: goto out; if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - will_truncate = 0; + will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; @@ -2523,7 +2523,7 @@ retry_lookup: if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; - will_truncate = 0; + will_truncate = false; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); goto finish_open_created; @@ -2541,7 +2541,7 @@ retry_lookup: */ if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } error = -EEXIST; @@ -2600,13 +2600,13 @@ finish_lookup: audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) - will_truncate = 0; + will_truncate = false; if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) goto exit; - want_write = 1; + want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2623,7 +2623,7 @@ finish_open_created: save_parent.dentry = NULL; if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } retried = true; goto retry_lookup; -- cgit v1.2.3 From f60dc3db6e24b7c36445cf1feb56b34c799074b3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:31 +0200 Subject: vfs: do_last(): clean up retry Move the lookup retry logic to the bottom of the function to make the normal case simpler to read. Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6bdb8d73253..183a769537f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2614,22 +2614,11 @@ finish_open_created: goto exit; od->mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL); - if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (want_write) { - mnt_drop_write(nd->path.mnt); - want_write = false; - } - retried = true; - goto retry_lookup; - } - if (IS_ERR(filp)) + if (IS_ERR(filp)) { + if (filp == ERR_PTR(-EOPENSTALE)) + goto stale_open; goto out; + } error = open_check_o_direct(filp); if (error) goto exit_fput; @@ -2659,6 +2648,23 @@ exit_fput: fput(filp); goto exit; +stale_open: + /* If no saved parent or already retried then can't retry */ + if (!save_parent.dentry || retried) + goto out; + + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = false; + } + retried = true; + goto retry_lookup; } static struct file *path_openat(int dfd, const char *pathname, -- cgit v1.2.3 From a8277b9baa6268de386529a33061775bc716198b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:32 +0200 Subject: vfs: move O_DIRECT check to common code Perform open_check_o_direct() in a common place in do_last after opening the file. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 183a769537f..4bc4bc6a693 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2305,22 +2305,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * here. */ error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) - goto out_fput; - - error = open_check_o_direct(filp); - if (error) - goto out_fput; + if (error) { + fput(filp); + filp = ERR_PTR(error); + } out: dput(dentry); return filp; -out_fput: - fput(filp); - filp = ERR_PTR(error); - goto out; - no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); @@ -2619,10 +2612,10 @@ finish_open_created: goto stale_open; goto out; } +opened: error = open_check_o_direct(filp); if (error) goto exit_fput; -opened: error = ima_file_check(filp, op->acc_mode); if (error) goto exit_fput; -- cgit v1.2.3 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro --- fs/namei.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 4bc4bc6a693..7a33f074e5b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2197,7 +2197,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, bool *want_write, bool need_lookup, - bool *created) + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); @@ -2222,7 +2222,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (open_flag & O_EXCL) { open_flag &= ~O_TRUNC; - *created = true; + *opened |= FILE_CREATED; } /* @@ -2272,7 +2272,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, - created); + opened); if (IS_ERR(filp)) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); @@ -2283,7 +2283,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } acc_mode = op->acc_mode; - if (*created) { + if (*opened & FILE_CREATED) { fsnotify_create(dir, dentry); acc_mode = MAY_OPEN; } @@ -2353,7 +2353,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - bool *want_write, bool *created) + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2361,7 +2361,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int error; bool need_lookup; - *created = false; + *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); @@ -2372,7 +2372,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { return atomic_open(nd, dentry, path, od, op, want_write, - need_lookup, created); + need_lookup, opened); } if (need_lookup) { @@ -2399,7 +2399,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; *want_write = true; - *created = true; + *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; @@ -2422,7 +2422,7 @@ out_dput: */ static struct file *do_last(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - const char *pathname) + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2431,7 +2431,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; - bool created; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2499,21 +2498,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { if (IS_ERR(filp)) goto out; - if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if ((*opened & FILE_CREATED) || + !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; } - if (created) { + if (*opened & FILE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; @@ -2606,7 +2606,7 @@ finish_open_created: if (error) goto exit; od->mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL); + filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) goto stale_open; @@ -2667,6 +2667,7 @@ static struct file *path_openat(int dfd, const char *pathname, struct opendata od; struct file *res; struct path path; + int opened = 0; int error; od.filp = get_empty_filp(); @@ -2684,7 +2685,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2700,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); put_link(nd, &link, cookie); } out: -- cgit v1.2.3 From 3d8a00d2099ebc6d5a6e95fadaf861709d9919a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:04:43 -0400 Subject: don't modify od->filp at all make put_filp() conditional on flag set by finish_open() Signed-off-by: Al Viro --- fs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 7a33f074e5b..18b9326d951 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2708,10 +2708,8 @@ out: path_put(&nd->root); if (base) fput(base); - if (od.filp) { - BUG_ON(od.filp->f_path.dentry); + if (!(opened & FILE_OPENED)) put_filp(od.filp); - } if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); -- cgit v1.2.3 From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:39:14 +0400 Subject: make ->atomic_open() return int Change of calling conventions: old new NULL 1 file 0 ERR_PTR(-ve) -ve Caller *knows* that struct file *; no need to return it. Signed-off-by: Al Viro --- fs/namei.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 18b9326d951..f0dae0057ec 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2204,7 +2204,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct file *filp; + struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2271,14 +2271,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; - filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); - if (IS_ERR(filp)) { + if (error < 0) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); - if (create_error && PTR_ERR(filp) == -ENOENT) - filp = ERR_PTR(create_error); + if (create_error && error == -ENOENT) + error = create_error; + filp = ERR_PTR(error); goto out; } @@ -2288,7 +2289,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, acc_mode = MAY_OPEN; } - if (!filp) { + if (error) { /* returned 1, that is */ if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; @@ -2304,6 +2305,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ + filp = od->filp; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); -- cgit v1.2.3 From a4a3bdd778715999ddfeefdc52ab76254580fa76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:55:37 -0400 Subject: kill opendata->{mnt,dentry} ->filp->f_path is there for purpose... Signed-off-by: Al Viro --- fs/namei.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index f0dae0057ec..af83ede92a4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2269,14 +2269,11 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->dentry = DENTRY_NOT_SET; - od->mnt = nd->path.mnt; + od->filp->f_path.dentry = DENTRY_NOT_SET; + od->filp->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); if (error < 0) { - if (WARN_ON(od->dentry != DENTRY_NOT_SET)) - dput(od->dentry); - if (create_error && error == -ENOENT) error = create_error; filp = ERR_PTR(error); @@ -2290,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->dentry) { + if (od->filp->f_path.dentry) { dput(dentry); - dentry = od->dentry; + dentry = od->filp->f_path.dentry; } goto looked_up; } @@ -2607,7 +2604,7 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->mnt = nd->path.mnt; + od->filp->f_path.mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) -- cgit v1.2.3 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro --- fs/namei.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index af83ede92a4..aaff8a86215 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2194,7 +2194,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct opendata *od, + struct path *path, struct file *file, const struct open_flags *op, bool *want_write, bool need_lookup, int *opened) @@ -2269,9 +2269,9 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->filp->f_path.dentry = DENTRY_NOT_SET; - od->filp->f_path.mnt = nd->path.mnt; - error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + file->f_path.dentry = DENTRY_NOT_SET; + file->f_path.mnt = nd->path.mnt; + error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode, opened); if (error < 0) { if (create_error && error == -ENOENT) @@ -2287,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->filp->f_path.dentry) { + if (file->f_path.dentry) { dput(dentry); - dentry = od->filp->f_path.dentry; + dentry = file->f_path.dentry; } goto looked_up; } @@ -2302,7 +2302,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = od->filp; + filp = file; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); @@ -2350,7 +2350,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct opendata *od, + struct file *file, const struct open_flags *op, bool *want_write, int *opened) { @@ -2370,7 +2370,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, od, op, want_write, + return atomic_open(nd, dentry, path, file, op, want_write, need_lookup, opened); } @@ -2420,7 +2420,7 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - struct opendata *od, const struct open_flags *op, + struct file *file, const struct open_flags *op, int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; @@ -2497,7 +2497,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, opened); + filp = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2604,13 +2604,15 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->filp->f_path.mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL, opened); - if (IS_ERR(filp)) { - if (filp == ERR_PTR(-EOPENSTALE)) + file->f_path.mnt = nd->path.mnt; + error = finish_open(file, nd->path.dentry, NULL, opened); + if (error) { + filp = ERR_PTR(error); + if (error == -EOPENSTALE) goto stale_open; goto out; } + filp = file; opened: error = open_check_o_direct(filp); if (error) @@ -2663,17 +2665,17 @@ static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct opendata od; + struct file *file; struct file *res; struct path path; int opened = 0; int error; - od.filp = get_empty_filp(); - if (!od.filp) + file = get_empty_filp(); + if (!file) return ERR_PTR(-ENFILE); - od.filp->f_flags = op->open_flag; + file->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2684,7 +2686,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2701,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2708,7 +2710,7 @@ out: if (base) fput(base); if (!(opened & FILE_OPENED)) - put_filp(od.filp); + put_filp(file); if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); -- cgit v1.2.3 From 2675a4eb6a9f1240098721c8a84ede28abd9d7b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:41:10 +0400 Subject: fs/namei.c: get do_last() and friends return int Same conventions as for ->atomic_open(). Trimmed the forest of labels a bit, while we are at it... Signed-off-by: Al Viro --- fs/namei.c | 150 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 70 insertions(+), 80 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index aaff8a86215..16256d915cb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2193,18 +2193,17 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } -static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct file *file, - const struct open_flags *op, - bool *want_write, bool need_lookup, - int *opened) +static int atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, struct file *file, + const struct open_flags *op, + bool *want_write, bool need_lookup, + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); umode_t mode; int error; int acc_mode; - struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2212,7 +2211,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) { - filp = ERR_PTR(-ENOENT); + error = -ENOENT; goto out; } @@ -2276,7 +2275,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error < 0) { if (create_error && error == -ENOENT) error = create_error; - filp = ERR_PTR(error); goto out; } @@ -2288,7 +2286,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { - filp = ERR_PTR(-EIO); + error = -EIO; goto out; } if (file->f_path.dentry) { @@ -2302,27 +2300,24 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = file; - error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } + error = may_open(&file->f_path, acc_mode, open_flag); + if (error) + fput(file); out: dput(dentry); - return filp; + return error; no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); if (create_error) { int open_flag = op->open_flag; - filp = ERR_PTR(create_error); + error = create_error; if ((open_flag & O_EXCL)) { if (!dentry->d_inode) goto out; @@ -2338,7 +2333,7 @@ no_open: looked_up: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; } /* @@ -2349,10 +2344,10 @@ looked_up: * Returns open file or NULL on success, error otherwise. NULL means no open * was performed, only lookup. */ -static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct file *file, - const struct open_flags *op, - bool *want_write, int *opened) +static int lookup_open(struct nameidata *nd, struct path *path, + struct file *file, + const struct open_flags *op, + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2363,7 +2358,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); /* Cached positive dentry: will open in f_op->open */ if (!need_lookup && dentry->d_inode) @@ -2379,7 +2374,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, dentry = lookup_real(dir_inode, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); } /* Negative dentry, just create the file */ @@ -2409,26 +2404,25 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; out_dput: dput(dentry); - return ERR_PTR(error); + return error; } /* * Handle the last step of open() */ -static struct file *do_last(struct nameidata *nd, struct path *path, - struct file *file, const struct open_flags *op, - int *opened, const char *pathname) +static int do_last(struct nameidata *nd, struct path *path, + struct file *file, const struct open_flags *op, + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool want_write = false; int acc_mode = op->acc_mode; - struct file *filp; struct inode *inode; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; @@ -2443,22 +2437,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, case LAST_DOT: error = handle_dots(nd, nd->last_type); if (error) - return ERR_PTR(error); + return error; /* fallthrough */ case LAST_ROOT: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, nd->path.dentry); if (open_flag & O_CREAT) { error = -EISDIR; - goto exit; + goto out; } goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); goto finish_open; } @@ -2474,7 +2468,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, goto finish_lookup; if (error < 0) - goto exit; + goto out; BUG_ON(nd->inode != dir->d_inode); } else { @@ -2486,29 +2480,29 @@ static struct file *do_last(struct nameidata *nd, struct path *path, */ error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) - goto exit; + goto out; } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); - if (filp) { - if (IS_ERR(filp)) + if (error <= 0) { + if (error) goto out; if ((*opened & FILE_CREATED) || - !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + !S_ISREG(file->f_path.dentry->d_inode->i_mode)) will_truncate = false; - audit_inode(pathname, filp->f_path.dentry); + audit_inode(pathname, file->f_path.dentry); goto opened; } @@ -2554,18 +2548,18 @@ finish_lookup: error = -ENOENT; if (!inode) { path_to_nameidata(path, nd); - goto exit; + goto out; } if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; - goto exit; + goto out; } } BUG_ON(inode != path->dentry->d_inode); - return NULL; + return 1; } if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { @@ -2581,14 +2575,14 @@ finish_lookup: error = complete_walk(nd); if (error) { path_put(&save_parent); - return ERR_PTR(error); + return error; } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) - goto exit; + goto out; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) - goto exit; + goto out; audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) @@ -2597,32 +2591,30 @@ finish_open: if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) - goto exit; + goto out; want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) - goto exit; + goto out; file->f_path.mnt = nd->path.mnt; error = finish_open(file, nd->path.dentry, NULL, opened); if (error) { - filp = ERR_PTR(error); if (error == -EOPENSTALE) goto stale_open; goto out; } - filp = file; opened: - error = open_check_o_direct(filp); + error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(filp, op->acc_mode); + error = ima_file_check(file, op->acc_mode); if (error) goto exit_fput; if (will_truncate) { - error = handle_truncate(filp); + error = handle_truncate(file); if (error) goto exit_fput; } @@ -2631,16 +2623,14 @@ out: mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); - return filp; + return error; exit_dput: path_put_conditional(path, nd); -exit: - filp = ERR_PTR(error); goto out; exit_fput: - fput(filp); - goto exit; + fput(file); + goto out; stale_open: /* If no saved parent or already retried then can't retry */ @@ -2666,7 +2656,6 @@ static struct file *path_openat(int dfd, const char *pathname, { struct file *base = NULL; struct file *file; - struct file *res; struct path path; int opened = 0; int error; @@ -2679,29 +2668,29 @@ static struct file *path_openat(int dfd, const char *pathname, error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) - goto out_filp; + goto out; current->total_link_count = 0; error = link_path_walk(pathname, nd); if (unlikely(error)) - goto out_filp; + goto out; - res = do_last(nd, &path, file, op, &opened, pathname); - while (unlikely(!res)) { /* trailing symlink */ + error = do_last(nd, &path, file, op, &opened, pathname); + while (unlikely(error > 0)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - res = ERR_PTR(-ELOOP); + error = -ELOOP; break; } nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - goto out_filp; - res = do_last(nd, &path, file, op, &opened, pathname); + break; + error = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2709,19 +2698,20 @@ out: path_put(&nd->root); if (base) fput(base); - if (!(opened & FILE_OPENED)) + if (!(opened & FILE_OPENED)) { + BUG_ON(!error); put_filp(file); - if (res == ERR_PTR(-EOPENSTALE)) { - if (flags & LOOKUP_RCU) - res = ERR_PTR(-ECHILD); - else - res = ERR_PTR(-ESTALE); } - return res; - -out_filp: - res = ERR_PTR(error); - goto out; + if (unlikely(error)) { + if (error == -EOPENSTALE) { + if (flags & LOOKUP_RCU) + error = -ECHILD; + else + error = -ESTALE; + } + file = ERR_PTR(error); + } + return file; } struct file *do_filp_open(int dfd, const char *pathname, -- cgit v1.2.3 From 0b728e1911cbe6e24020727c3870628b9653f32a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:03:43 -0400 Subject: stop passing nameidata * to ->d_revalidate() Just the lookup flags. Die, bastard, die... Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 16256d915cb..1a5707aaed3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -465,7 +465,7 @@ err_root: static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { - return dentry->d_op->d_revalidate(dentry, nd); + return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); } /** -- cgit v1.2.3 From 4ce16ef3fed92c627b4b0136c02c85c81ee105e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:10:59 -0400 Subject: fs/namei.c: don't pass nameidata to d_revalidate() since the method wrapped by it doesn't need that anymore... Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 1a5707aaed3..91c637b6898 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,9 +463,9 @@ err_root: return -ECHILD; } -static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) +static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); + return dentry->d_op->d_revalidate(dentry, flags); } /** @@ -511,7 +511,7 @@ static int complete_walk(struct nameidata *nd) return 0; /* Note: we do not d_invalidate() */ - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (status > 0) return 0; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd); + error = d_revalidate(dentry, nd ? nd->flags : 0); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1158,7 +1158,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, if (unlikely(d_need_lookup(dentry))) goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status != -ECHILD) need_reval = 0; @@ -1188,7 +1188,7 @@ unlazy: } if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status < 0) { dput(dentry); -- cgit v1.2.3 From 201f956e43d4542723514e024d948011dd766d43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:42:10 +0400 Subject: fs/namei.c: don't pass namedata to lookup_dcache() just the flags... Signed-off-by: Al Viro --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 91c637b6898..2e943ab04f3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1039,7 +1039,7 @@ static void follow_dotdot(struct nameidata *nd) * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, - struct nameidata *nd, bool *need_lookup) + unsigned int flags, bool *need_lookup) { struct dentry *dentry; int error; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd ? nd->flags : 0); + error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1104,7 +1104,7 @@ static struct dentry *__lookup_hash(struct qstr *name, bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd, &need_lookup); + dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); if (!need_lookup) return dentry; @@ -2356,7 +2356,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, bool need_lookup; *opened &= ~FILE_CREATED; - dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); + dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); if (IS_ERR(dentry)) return PTR_ERR(dentry); -- cgit v1.2.3 From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 2e943ab04f3..175e81b8f26 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd); + old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); if (unlikely(old)) { dput(dentry); dentry = old; -- cgit v1.2.3 From 72bd866a01fc62ccbc466f3eb7599b14c937e96b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:17:17 -0400 Subject: fs/namei.c: don't pass nameidata to __lookup_hash() and lookup_real() Signed-off-by: Al Viro --- fs/namei.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 175e81b8f26..fc01090a96c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1080,7 +1080,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *old; @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); + old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { dput(dentry); dentry = old; @@ -1099,16 +1099,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, } static struct dentry *__lookup_hash(struct qstr *name, - struct dentry *base, struct nameidata *nd) + struct dentry *base, unsigned int flags) { bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); + dentry = lookup_dcache(name, base, flags, &need_lookup); if (!need_lookup) return dentry; - return lookup_real(base->d_inode, dentry, nd); + return lookup_real(base->d_inode, dentry, flags); } /* @@ -1227,7 +1227,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name, BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); - dentry = __lookup_hash(name, parent, nd); + dentry = __lookup_hash(name, parent, nd->flags); mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1859,7 +1859,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, */ static struct dentry *lookup_hash(struct nameidata *nd) { - return __lookup_hash(&nd->last, nd->path.dentry, nd); + return __lookup_hash(&nd->last, nd->path.dentry, nd->flags); } /** @@ -1906,7 +1906,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, NULL); + return __lookup_hash(&this, base, 0); } int user_path_at_empty(int dfd, const char __user *name, unsigned flags, @@ -2310,7 +2310,7 @@ out: no_open: if (need_lookup) { - dentry = lookup_real(dir, dentry, nd); + dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -2372,7 +2372,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (need_lookup) { BUG_ON(dentry->d_inode); - dentry = lookup_real(dir_inode, dentry, nd); + dentry = lookup_real(dir_inode, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); } -- cgit v1.2.3 From ebfc3b49a7ac25920cb5be5445f602e51d2ea559 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:05:36 -0400 Subject: don't pass nameidata to ->create() boolean "does it have to be exclusive?" flag is passed instead; Local filesystem should just ignore it - the object is guaranteed not to be there yet. Signed-off-by: Al Viro --- fs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index fc01090a96c..fd71156bfd7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2082,7 +2082,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { int error = may_create(dir, dentry); - if (error) return error; @@ -2093,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, nd); + error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); if (!error) fsnotify_create(dir, dentry); return error; -- cgit v1.2.3 From 312b63fba9e88a0dcf800834b8ede8716bcc1e17 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:09:36 -0400 Subject: don't pass nameidata * to vfs_create() all we want is a boolean flag, same as the method gets now Signed-off-by: Al Viro --- fs/namei.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index fd71156bfd7..ffcd4e114b6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2079,7 +2079,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool want_excl) { int error = may_create(dir, dentry); if (error) @@ -2092,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); + error = dir->i_op->create(dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; @@ -2396,7 +2396,8 @@ static int lookup_open(struct nameidata *nd, struct path *path, error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; - error = vfs_create(dir->d_inode, dentry, mode, nd); + error = vfs_create(dir->d_inode, dentry, mode, + nd->flags & LOOKUP_EXCL); if (error) goto out_dput; } @@ -2883,7 +2884,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,NULL); + error = vfs_create(path.dentry->d_inode,dentry,mode,true); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(path.dentry->d_inode,dentry,mode, -- cgit v1.2.3 From 1acf0af9b981027f3e73e93f0d3f85abdc794f71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Jun 2012 16:13:46 +0100 Subject: VFS: Fix the banner comment on lookup_open() Since commit 197e37d9, the banner comment on lookup_open() no longer matches what the function returns. It used to return a struct file pointer or NULL and now it returns an integer and is passed the struct file pointer it is to use amongst its arguments. Update the comment to reflect this. Also add a banner comment to atomic_open(). Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ffcd4e114b6..5abab917690 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2192,6 +2192,19 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } +/* + * Attempt to atomically look up, create and open a file from a negative + * dentry. + * + * Returns 0 if successful. The file will have been created and attached to + * @file by the filesystem calling finish_open(). + * + * Returns 1 if the file was looked up only or didn't need creating. The + * caller will need to perform the open themselves. @path will have been + * updated to point to the new dentry. This may be negative. + * + * Returns an error code otherwise. + */ static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, @@ -2336,12 +2349,22 @@ looked_up: } /* - * Lookup, maybe create and open the last component + * Look up and maybe create and open the last component. * * Must be called with i_mutex held on parent. * - * Returns open file or NULL on success, error otherwise. NULL means no open - * was performed, only lookup. + * Returns 0 if the file was successfully atomically created (if necessary) and + * opened. In this case the file will be returned attached to @file. + * + * Returns 1 if the file was not completely opened at this time, though lookups + * and creations will have been performed and the dentry returned in @path will + * be positive upon return if O_CREAT was specified. If O_CREAT wasn't + * specified then a negative dentry may be returned. + * + * An error code is returned otherwise. + * + * FILE_CREATE will be set in @*opened if the dentry was created and will be + * cleared otherwise prior to returning. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, -- cgit v1.2.3 From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Jun 2012 03:01:42 +0400 Subject: get rid of kern_path_parent() all callers want the same thing, actually - a kinda-sorta analog of kern_path_create(). I.e. they want parent vfsmount/dentry (with ->i_mutex held, to make sure the child dentry is still their child) + the child dentry. Signed-off-by Al Viro --- fs/namei.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 5abab917690..6b29a51bef5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1814,9 +1814,27 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int kern_path_parent(const char *name, struct nameidata *nd) +/* does lookup, returns the object with parent locked */ +struct dentry *kern_path_locked(const char *name, struct path *path) { - return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); + struct nameidata nd; + struct dentry *d; + int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); + if (err) + return ERR_PTR(err); + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return ERR_PTR(-EINVAL); + } + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return d; + } + *path = nd.path; + return d; } int kern_path(const char *name, unsigned int flags, struct path *path) -- cgit v1.2.3 From 408ef013cc9e2f94a14f7ccbbe52ddfb18437a99 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:03 -0400 Subject: fs: move path_put on failure out of ->follow_link Currently the non-nd_set_link based versions of ->follow_link are expected to do a path_put(&nd->path) on failure. This calling convention is unexpected, undocumented and doesn't match what the nd_set_link-based instances do. Move the path_put out of the only non-nd_set_link based ->follow_link instance into the caller. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6b29a51bef5..a9b94c62c30 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -624,7 +624,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); if (IS_ERR(*p)) - goto out_put_link; + goto out_put_nd_path; error = 0; s = nd_get_link(nd); @@ -646,7 +646,6 @@ follow_link(struct path *link, struct nameidata *nd, void **p) out_put_nd_path: path_put(&nd->path); -out_put_link: path_put(link); return error; } -- cgit v1.2.3 From b5fb63c18315c5510c1d0636179c057e0c761c77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:04 -0400 Subject: fs: add nd_jump_link Add a helper that abstracts out the jump to an already parsed struct path from ->follow_link operation from procfs. Not only does this clean up the code by moving the two sides of this game into a single helper, but it also prepares for making struct nameidata private to namei.c Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/namei.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index a9b94c62c30..0e1b9c3eb36 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -586,6 +586,21 @@ static inline void path_to_nameidata(const struct path *path, nd->path.dentry = path->dentry; } +/* + * Helper to directly jump to a known parsed path from ->follow_link, + * caller must have taken a reference to path beforehand. + */ +void nd_jump_link(struct nameidata *nd, struct path *path) +{ + path_put(&nd->path); + + nd->path = *path; + nd->inode = nd->path.dentry->d_inode; + nd->flags |= LOOKUP_JUMPED; + + BUG_ON(nd->inode->i_op->follow_link); +} + static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; @@ -630,17 +645,9 @@ follow_link(struct path *link, struct nameidata *nd, void **p) s = nd_get_link(nd); if (s) { error = __vfs_follow_link(nd, s); - } else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (unlikely(error)) + put_link(nd, link, *p); } - if (unlikely(error)) - put_link(nd, link, *p); return error; -- cgit v1.2.3 From f015f1267b23d3530d3f874243fb83cb5f443005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:28 +0100 Subject: VFS: Comment mount following code Add comments describing what the directions "up" and "down" mean and ref count handling to the VFS mount following family of functions. Signed-off-by: Valerie Aurora (Original author) Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 0e1b9c3eb36..c6dcb4c8f86 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -672,6 +672,16 @@ static int follow_up_rcu(struct path *path) return 1; } +/* + * follow_up - Find the mountpoint of path's vfsmount + * + * Given a path, find the mountpoint of its source file system. + * Replace @path with the path of the mountpoint in the parent mount. + * Up is towards /. + * + * Return 1 if we went up a level and 0 if we were already at the + * root. + */ int follow_up(struct path *path) { struct mount *mnt = real_mount(path->mnt); -- cgit v1.2.3 From 0bdaea9017b9d2b9996e153a71ee03555969b80e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:46 +0100 Subject: VFS: Split inode_permission() Split inode_permission() into inode- and superblock-dependent parts. This is aimed at unionmounts where the superblock from the upper layer has to be checked rather than the superblock from the lower layer as the upper layer may be writable, thus allowing an unwritable file from the lower layer to be copied up and modified. Original-author: Valerie Aurora Signed-off-by: David Howells (Further development) Signed-off-by: Al Viro --- fs/namei.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 17 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index c6dcb4c8f86..1b647468769 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask) } /** - * inode_permission - check for access rights to a given inode - * @inode: inode to check permission on - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) + * __inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * - * Used to check for read/write/execute permissions on an inode. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. + * Check for read/write/execute permissions on an inode. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + * + * This does not check for a read-only file system. You probably want + * inode_permission(). */ -int inode_permission(struct inode *inode, int mask) +int __inode_permission(struct inode *inode, int mask) { int retval; if (unlikely(mask & MAY_WRITE)) { - umode_t mode = inode->i_mode; - - /* - * Nobody gets write access to a read-only fs. - */ - if (IS_RDONLY(inode) && - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) - return -EROFS; - /* * Nobody gets write access to an immutable file. */ @@ -358,6 +349,47 @@ int inode_permission(struct inode *inode, int mask) return security_inode_permission(inode, mask); } +/** + * sb_permission - Check superblock-level permissions + * @sb: Superblock of inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Separate out file-system wide checks from inode-specific permission checks. + */ +static int sb_permission(struct super_block *sb, struct inode *inode, int mask) +{ + if (unlikely(mask & MAY_WRITE)) { + umode_t mode = inode->i_mode; + + /* Nobody gets write access to a read-only fs. */ + if ((sb->s_flags & MS_RDONLY) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + } + return 0; +} + +/** + * inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Check for read/write/execute permissions on an inode. We use fs[ug]id for + * this, letting us set arbitrary permissions for filesystem access without + * changing the "normal" UIDs which are used for other things. + * + * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + */ +int inode_permission(struct inode *inode, int mask) +{ + int retval; + + retval = sb_permission(inode->i_sb, inode, mask); + if (retval) + return retval; + return __inode_permission(inode, mask); +} + /** * path_get - get a reference to a path * @path: path to get the reference to -- cgit v1.2.3 From 1e0ea0014479f066ba26f937e8740b8902229616 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2012 23:46:21 +0400 Subject: use __lookup_hash() in kern_path_parent() No need to bother with lookup_one_len() here - it's an overkill Signed-off-by Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 1b647468769..c14dfac83c2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1875,7 +1875,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path) return ERR_PTR(-EINVAL); } mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + d = __lookup_hash(&nd.last, nd.path.dentry, 0); if (IS_ERR(d)) { mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -- cgit v1.2.3 From 3c0a6163688b8ca3f44029c7bdb3d91a865c878a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 17:32:50 +0400 Subject: unobfuscate follow_up() a bit really convoluted test in there has grown up during struct mount introduction; what it checks is that we'd reached the root of mount tree. --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index c14dfac83c2..d4d15bbc8af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -722,7 +722,7 @@ int follow_up(struct path *path) br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; - if (&parent->mnt == path->mnt) { + if (parent == mnt) { br_read_unlock(&vfsmount_lock); return 0; } -- cgit v1.2.3 From 32a7991b6a9c758e4e2b8166c5e1cc7563c3dcde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 20:43:19 +0400 Subject: tidy up namei.c a bit locking/unlocking for rcu walk taken to a couple of inline helpers Signed-off-by: Al Viro --- fs/namei.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index d4d15bbc8af..2ccc35c4dc2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -427,6 +427,18 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ +static inline void lock_rcu_walk(void) +{ + br_read_lock(&vfsmount_lock); + rcu_read_lock(); +} + +static inline void unlock_rcu_walk(void) +{ + rcu_read_unlock(); + br_read_unlock(&vfsmount_lock); +} + /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -480,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) } mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); nd->flags &= ~LOOKUP_RCU; return 0; @@ -522,15 +533,13 @@ static int complete_walk(struct nameidata *nd) spin_lock(&dentry->d_lock); if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -985,8 +994,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } @@ -1323,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } } @@ -1691,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); @@ -1704,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); set_root_rcu(nd); } else { set_root(nd); @@ -1717,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); do { seq = read_seqcount_begin(&fs->seq); @@ -1753,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); } else { path_get(&file->f_path); fput_light(file, fput_needed); -- cgit v1.2.3 From 921a1650de9eed40dd64d681aba4a4d98856f289 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 01:15:31 +0400 Subject: new helper: done_path_create() releases what needs to be released after {kern,user}_path_create() Signed-off-by: Al Viro --- fs/namei.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 2ccc35c4dc2..5bc6f3d1dc8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2893,6 +2893,14 @@ out: } EXPORT_SYMBOL(kern_path_create); +void done_path_create(struct path *path, struct dentry *dentry) +{ + dput(dentry); + mutex_unlock(&path->dentry->d_inode->i_mutex); + path_put(path); +} +EXPORT_SYMBOL(done_path_create); + struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) { char *tmp = getname(pathname); @@ -2989,9 +2997,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return error; } @@ -3048,9 +3054,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); return error; } @@ -3334,9 +3338,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, out_drop_write: mnt_drop_write(path.mnt); out_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); + done_path_create(&path, dentry); out_putname: putname(from); return error; @@ -3446,9 +3448,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, out_drop_write: mnt_drop_write(new_path.mnt); out_dput: - dput(new_dentry); - mutex_unlock(&new_path.dentry->d_inode->i_mutex); - path_put(&new_path); + done_path_create(&new_path, new_dentry); out: path_put(&old_path); -- cgit v1.2.3 From 8e4bfca1d1f0de62301dd223675717e7a5f63a27 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 01:17:26 +0400 Subject: mknod: take sanity checks on mode into the very beginning Note that applying umask can't affect their results. While that affects errno in cases like mknod("/no_such_directory/a", 030000) yielding -EINVAL (due to impossible mode_t) instead of -ENOENT (due to inexistent directory), IMO that makes a lot more sense, POSIX allows to return either and any software that relies on getting -ENOENT instead of -EINVAL in that case deserves everything it gets. Signed-off-by: Al Viro --- fs/namei.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 5bc6f3d1dc8..cf362dc9d1f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2964,8 +2964,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, struct path path; int error; - if (S_ISDIR(mode)) - return -EPERM; + error = may_mknod(mode); + if (error) + return error; dentry = user_path_create(dfd, filename, &path, 0); if (IS_ERR(dentry)) @@ -2973,9 +2974,6 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = may_mknod(mode); - if (error) - goto out_dput; error = mnt_want_write(path.mnt); if (error) goto out_dput; -- cgit v1.2.3 From a8104a9fcdeb82e22d7acd55fca20746581067d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 02:25:00 +0400 Subject: pull mnt_want_write()/mnt_drop_write() into kern_path_create()/done_path_create() resp. One side effect - attempt to create a cross-device link on a read-only fs fails with EROFS instead of EXDEV now. Makes more sense, POSIX allows, etc. Signed-off-by: Al Viro --- fs/namei.c | 57 ++++++++++++++++++--------------------------------------- 1 file changed, 18 insertions(+), 39 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index cf362dc9d1f..a3fb78fd70d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2865,10 +2865,11 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) - goto fail; + goto unlock; + error = -EEXIST; if (dentry->d_inode) - goto eexist; + goto fail; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -2876,16 +2877,18 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path * been asking for (non-existent) directory. -ENOENT for you. */ if (unlikely(!is_dir && nd.last.name[nd.last.len])) { - dput(dentry); - dentry = ERR_PTR(-ENOENT); + error = -ENOENT; goto fail; } + error = mnt_want_write(nd.path.mnt); + if (error) + goto fail; *path = nd.path; return dentry; -eexist: - dput(dentry); - dentry = ERR_PTR(-EEXIST); fail: + dput(dentry); + dentry = ERR_PTR(error); +unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); out: path_put(&nd.path); @@ -2897,6 +2900,7 @@ void done_path_create(struct path *path, struct dentry *dentry) { dput(dentry); mutex_unlock(&path->dentry->d_inode->i_mutex); + mnt_drop_write(path->mnt); path_put(path); } EXPORT_SYMBOL(done_path_create); @@ -2974,12 +2978,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mknod(&path, dentry, mode, dev); if (error) - goto out_drop_write; + goto out; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(path.dentry->d_inode,dentry,mode,true); @@ -2992,11 +2993,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); break; } -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: +out: done_path_create(&path, dentry); - return error; } @@ -3042,16 +3040,9 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_mkdir(&path, dentry, mode); - if (error) - goto out_drop_write; - error = vfs_mkdir(path.dentry->d_inode, dentry, mode); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: + if (!error) + error = vfs_mkdir(path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); return error; } @@ -3326,16 +3317,9 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (IS_ERR(dentry)) goto out_putname; - error = mnt_want_write(path.mnt); - if (error) - goto out_dput; error = security_path_symlink(&path, dentry, from); - if (error) - goto out_drop_write; - error = vfs_symlink(path.dentry->d_inode, dentry, from); -out_drop_write: - mnt_drop_write(path.mnt); -out_dput: + if (!error) + error = vfs_symlink(path.dentry->d_inode, dentry, from); done_path_create(&path, dentry); out_putname: putname(from); @@ -3436,15 +3420,10 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; - error = mnt_want_write(new_path.mnt); - if (error) - goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) - goto out_drop_write; + goto out_dput; error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); -out_drop_write: - mnt_drop_write(new_path.mnt); out_dput: done_path_create(&new_path, new_dentry); out: -- cgit v1.2.3 From 3134f37e931d75931bdf6d4eacd82a3fd26eca7c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Jul 2012 10:19:47 -0400 Subject: vfs: don't let do_last pass negative dentry to audit_inode I can reliably reproduce the following panic by simply setting an audit rule on a recent 3.5.0+ kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] audit_copy_inode+0x10/0x90 PGD 7acd9067 PUD 7b8fb067 PMD 0 Oops: 0000 [#86] SMP Modules linked in: nfs nfs_acl auth_rpcgss fscache lockd sunrpc tpm_bios btrfs zlib_deflate libcrc32c kvm_amd kvm joydev virtio_net pcspkr i2c_piix4 floppy virtio_balloon microcode virtio_blk cirrus drm_kms_helper ttm drm i2c_core [last unloaded: scsi_wait_scan] CPU 0 Pid: 1286, comm: abrt-dump-oops Tainted: G D 3.5.0+ #1 Bochs Bochs RIP: 0010:[] [] audit_copy_inode+0x10/0x90 RSP: 0018:ffff88007aebfc38 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88003692d860 RCX: 00000000000038c4 RDX: 0000000000000000 RSI: ffff88006baf5d80 RDI: ffff88003692d860 RBP: ffff88007aebfc68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffff880036d30f00 R14: ffff88006baf5d80 R15: ffff88003692d800 FS: 00007f7562634740(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000040 CR3: 000000003643d000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process abrt-dump-oops (pid: 1286, threadinfo ffff88007aebe000, task ffff880079614530) Stack: ffff88007aebfdf8 ffff88007aebff28 ffff88007aebfc98 ffffffff81211358 ffff88003692d860 0000000000000000 ffff88007aebfcc8 ffffffff810d4968 ffff88007aebfcc8 ffff8800000038c4 0000000000000000 0000000000000000 Call Trace: [] ? ext4_lookup+0xe8/0x160 [] __audit_inode+0x118/0x2d0 [] do_last+0x999/0xe80 [] ? inode_permission+0x18/0x50 [] ? kmem_cache_alloc_trace+0x11a/0x130 [] path_openat+0xba/0x420 [] do_filp_open+0x41/0xa0 [] ? alloc_fd+0x4d/0x120 [] do_sys_open+0xed/0x1c0 [] ? __audit_syscall_entry+0xcc/0x300 [] sys_open+0x21/0x30 [] system_call_fastpath+0x16/0x1b RSP CR2: 0000000000000040 The problem is that do_last is passing a negative dentry to audit_inode. The comments on lookup_open note that it can pass back a negative dentry if O_CREAT is not set. This patch fixes the oops, but I'm not clear on whether there's a better approach. Cc: Miklos Szeredi Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index a3fb78fd70d..afa087649dd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2608,9 +2608,10 @@ retry_lookup: } /* - * It already exists. + * create/update audit record if it already exists. */ - audit_inode(pathname, path->dentry); + if (path->dentry->d_inode) + audit_inode(pathname, path->dentry); /* * If atomic_open() acquired write access it is dropped now due to -- cgit v1.2.3 From 800179c9b8a1e796e441674776d11cd4c05d61d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jul 2012 17:29:07 -0700 Subject: fs: add link restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds symlink and hardlink restrictions to the Linux VFS. Symlinks: A long-standing class of security issues is the symlink-based time-of-check-time-of-use race, most commonly seen in world-writable directories like /tmp. The common method of exploitation of this flaw is to cross privilege boundaries when following a given symlink (i.e. a root process follows a symlink belonging to another user). For a likely incomplete list of hundreds of examples across the years, please see: http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp The solution is to permit symlinks to only be followed when outside a sticky world-writable directory, or when the uid of the symlink and follower match, or when the directory owner matches the symlink's owner. Some pointers to the history of earlier discussion that I could find: 1996 Aug, Zygo Blaxell http://marc.info/?l=bugtraq&m=87602167419830&w=2 1996 Oct, Andrew Tridgell http://lkml.indiana.edu/hypermail/linux/kernel/9610.2/0086.html 1997 Dec, Albert D Cahalan http://lkml.org/lkml/1997/12/16/4 2005 Feb, Lorenzo Hernández García-Hierro http://lkml.indiana.edu/hypermail/linux/kernel/0502.0/1896.html 2010 May, Kees Cook https://lkml.org/lkml/2010/5/30/144 Past objections and rebuttals could be summarized as: - Violates POSIX. - POSIX didn't consider this situation and it's not useful to follow a broken specification at the cost of security. - Might break unknown applications that use this feature. - Applications that break because of the change are easy to spot and fix. Applications that are vulnerable to symlink ToCToU by not having the change aren't. Additionally, no applications have yet been found that rely on this behavior. - Applications should just use mkstemp() or O_CREATE|O_EXCL. - True, but applications are not perfect, and new software is written all the time that makes these mistakes; blocking this flaw at the kernel is a single solution to the entire class of vulnerability. - This should live in the core VFS. - This should live in an LSM. (https://lkml.org/lkml/2010/5/31/135) - This should live in an LSM. - This should live in the core VFS. (https://lkml.org/lkml/2010/8/2/188) Hardlinks: On systems that have user-writable directories on the same partition as system files, a long-standing class of security issues is the hardlink-based time-of-check-time-of-use race, most commonly seen in world-writable directories like /tmp. The common method of exploitation of this flaw is to cross privilege boundaries when following a given hardlink (i.e. a root process follows a hardlink created by another user). Additionally, an issue exists where users can "pin" a potentially vulnerable setuid/setgid file so that an administrator will not actually upgrade a system fully. The solution is to permit hardlinks to only be created when the user is already the existing file's owner, or if they already have read/write access to the existing file. Many Linux users are surprised when they learn they can link to files they have no access to, so this change appears to follow the doctrine of "least surprise". Additionally, this change does not violate POSIX, which states "the implementation may require that the calling process has permission to access the existing file"[1]. This change is known to break some implementations of the "at" daemon, though the version used by Fedora and Ubuntu has been fixed[2] for a while. Otherwise, the change has been undisruptive while in use in Ubuntu for the last 1.5 years. [1] http://pubs.opengroup.org/onlinepubs/9699919799/functions/linkat.html [2] http://anonscm.debian.org/gitweb/?p=collab-maint/at.git;a=commitdiff;h=f4114656c3a6c6f6070e315ffdf940a49eda3279 This patch is based on the patches in Openwall and grsecurity, along with suggestions from Al Viro. I have added a sysctl to enable the protected behavior, and documentation. Signed-off-by: Kees Cook Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/namei.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index afa087649dd..3861d85f848 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -650,6 +650,119 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki path_put(link); } +int sysctl_protected_symlinks __read_mostly = 1; +int sysctl_protected_hardlinks __read_mostly = 1; + +/** + * may_follow_link - Check symlink following for unsafe situations + * @link: The path of the symlink + * + * In the case of the sysctl_protected_symlinks sysctl being enabled, + * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is + * in a sticky world-writable directory. This is to protect privileged + * processes from failing races against path names that may change out + * from under them by way of other users creating malicious symlinks. + * It will permit symlinks to be followed only when outside a sticky + * world-writable directory, or when the uid of the symlink and follower + * match, or when the directory owner matches the symlink's owner. + * + * Returns 0 if following the symlink is allowed, -ve on error. + */ +static inline int may_follow_link(struct path *link, struct nameidata *nd) +{ + const struct inode *inode; + const struct inode *parent; + + if (!sysctl_protected_symlinks) + return 0; + + /* Allowed if owner and follower match. */ + inode = link->dentry->d_inode; + if (current_cred()->fsuid == inode->i_uid) + return 0; + + /* Allowed if parent directory not sticky and world-writable. */ + parent = nd->path.dentry->d_inode; + if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) + return 0; + + /* Allowed if parent directory and link owner match. */ + if (parent->i_uid == inode->i_uid) + return 0; + + path_put_conditional(link, nd); + path_put(&nd->path); + return -EACCES; +} + +/** + * safe_hardlink_source - Check for safe hardlink conditions + * @inode: the source inode to hardlink from + * + * Return false if at least one of the following conditions: + * - inode is not a regular file + * - inode is setuid + * - inode is setgid and group-exec + * - access failure for read and write + * + * Otherwise returns true. + */ +static bool safe_hardlink_source(struct inode *inode) +{ + umode_t mode = inode->i_mode; + + /* Special files should not get pinned to the filesystem. */ + if (!S_ISREG(mode)) + return false; + + /* Setuid files should not get pinned to the filesystem. */ + if (mode & S_ISUID) + return false; + + /* Executable setgid files should not get pinned to the filesystem. */ + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) + return false; + + /* Hardlinking to unreadable or unwritable sources is dangerous. */ + if (inode_permission(inode, MAY_READ | MAY_WRITE)) + return false; + + return true; +} + +/** + * may_linkat - Check permissions for creating a hardlink + * @link: the source to hardlink from + * + * Block hardlink when all of: + * - sysctl_protected_hardlinks enabled + * - fsuid does not match inode + * - hardlink source is unsafe (see safe_hardlink_source() above) + * - not CAP_FOWNER + * + * Returns 0 if successful, -ve on error. + */ +static int may_linkat(struct path *link) +{ + const struct cred *cred; + struct inode *inode; + + if (!sysctl_protected_hardlinks) + return 0; + + cred = current_cred(); + inode = link->dentry->d_inode; + + /* Source inode owner (or CAP_FOWNER) can hardlink all they like, + * otherwise, it must be a safe source. + */ + if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || + capable(CAP_FOWNER)) + return 0; + + return -EPERM; +} + static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { @@ -1818,6 +1931,9 @@ static int path_lookupat(int dfd, const char *name, while (err > 0) { void *cookie; struct path link = path; + err = may_follow_link(&link, nd); + if (unlikely(err)) + break; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); if (err) @@ -2778,6 +2894,9 @@ static struct file *path_openat(int dfd, const char *pathname, error = -ELOOP; break; } + error = may_follow_link(&link, nd); + if (unlikely(error)) + break; nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); @@ -3421,6 +3540,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; + error = may_linkat(&old_path); + if (unlikely(error)) + goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; -- cgit v1.2.3 From a51d9eaa41866ab6b4b6ecad7b621f8b66ece0dc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jul 2012 17:29:08 -0700 Subject: fs: add link restriction audit reporting Adds audit messages for unexpected link restriction violations so that system owners will have some sort of potentially actionable information about misbehaving processes. Signed-off-by: Kees Cook Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 3861d85f848..618d3531cf9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -692,6 +692,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) path_put_conditional(link, nd); path_put(&nd->path); + audit_log_link_denied("follow_link", link); return -EACCES; } @@ -760,6 +761,7 @@ static int may_linkat(struct path *link) capable(CAP_FOWNER)) return 0; + audit_log_link_denied("linkat", link); return -EPERM; } -- cgit v1.2.3 From f8310c59201b183ebee2e3fe0c7242f5729be0af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Jul 2012 11:50:30 +0400 Subject: fix O_EXCL handling for devices O_EXCL without O_CREAT has different semantics; it's "fail if already opened", not "fail if already exists". commit 71574865 broke that... Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 618d3531cf9..e133bf3bbb0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2418,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); - if (open_flag & O_EXCL) { + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { open_flag &= ~O_TRUNC; *opened |= FILE_CREATED; } @@ -2742,7 +2742,7 @@ retry_lookup: } error = -EEXIST; - if (open_flag & O_EXCL) + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) goto exit_dput; error = follow_managed(path, nd->flags); -- cgit v1.2.3 From 64894cf843278c7b2653a6fac2cd1a697ff930dc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Jul 2012 00:53:35 +0400 Subject: simplify lookup_open()/atomic_open() - do the temporary mnt_want_write() early The write ref to vfsmount taken in lookup_open()/atomic_open() is going to be dropped; we take the one to stay in dentry_open(). Just grab the temporary in caller if it looks like we are going to need it (create/truncate/writable open) and pass (by value) "has it succeeded" flag. Instead of doing mnt_want_write() inside, check that flag and treat "false" as "mnt_want_write() has just failed". mnt_want_write() is cheap and the things get considerably simpler and more robust that way - we get it and drop it in the same function, to start with, rather than passing a "has something in the guts of really scary functions taken it" back to caller. Signed-off-by: Al Viro --- fs/namei.c | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index e133bf3bbb0..35291ac6f42 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2395,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, bool need_lookup, + bool got_write, bool need_lookup, int *opened) { struct inode *dir = nd->path.dentry->d_inode; @@ -2432,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ - if ((open_flag & (O_CREAT | O_TRUNC)) || - (open_flag & O_ACCMODE) != O_RDONLY) { - error = mnt_want_write(nd->path.mnt); - if (!error) { - *want_write = true; - } else if (!(open_flag & O_CREAT)) { + if (((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) { + if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall * back to lookup + open @@ -2445,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto no_open; } else if (open_flag & (O_EXCL | O_TRUNC)) { /* Fall back and fail with the right error */ - create_error = error; + create_error = -EROFS; goto no_open; } else { /* No side effects, safe to clear O_CREAT */ - create_error = error; + create_error = -EROFS; open_flag &= ~O_CREAT; } } @@ -2556,7 +2553,7 @@ looked_up: static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - bool *want_write, int *opened) + bool got_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2574,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, file, op, want_write, + return atomic_open(nd, dentry, path, file, op, got_write, need_lookup, opened); } @@ -2598,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path, * a permanent write count is taken through * the 'struct file' in finish_open(). */ - error = mnt_want_write(nd->path.mnt); - if (error) + if (!got_write) { + error = -EROFS; goto out_dput; - *want_write = true; + } *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2631,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path, struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; - bool want_write = false; + bool got_write = false; int acc_mode = op->acc_mode; struct inode *inode; bool symlink_ok = false; @@ -2700,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path, } retry_lookup: + if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { + error = mnt_want_write(nd->path.mnt); + if (!error) + got_write = true; + /* + * do _not_ fail yet - we might not need that or fail with + * a different error; let lookup_open() decide; we'll be + * dropping this one anyway. + */ + } mutex_lock(&dir->d_inode->i_mutex); - error = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, got_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (error <= 0) { @@ -2736,9 +2743,9 @@ retry_lookup: * possible mount and symlink following (this might be optimized away if * necessary...) */ - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } error = -EEXIST; @@ -2803,7 +2810,7 @@ finish_open: error = mnt_want_write(nd->path.mnt); if (error) goto out; - want_write = true; + got_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2830,7 +2837,7 @@ opened: goto exit_fput; } out: - if (want_write) + if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); @@ -2854,9 +2861,9 @@ stale_open: nd->inode = dir->d_inode; save_parent.mnt = NULL; save_parent.dentry = NULL; - if (want_write) { + if (got_write) { mnt_drop_write(nd->path.mnt); - want_write = false; + got_write = false; } retried = true; goto retry_lookup; -- cgit v1.2.3 From c30dabfe5d10c5fd70d882e5afb8f59f2942b194 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:30 +0200 Subject: fs: Push mnt_want_write() outside of i_mutex Currently, mnt_want_write() is sometimes called with i_mutex held and sometimes without it. This isn't really a problem because mnt_want_write() is a non-blocking operation (essentially has a trylock semantics) but when the function starts to handle also frozen filesystems, it will get a full lock semantics and thus proper lock ordering has to be established. So move all mnt_want_write() calls outside of i_mutex. One non-trivial case needing conversion is kern_path_create() / user_path_create() which didn't include mnt_want_write() but now needs to because it acquires i_mutex. Because there are virtual file systems which don't bother with freeze / remount-ro protection we actually provide both versions of the function - one which calls mnt_want_write() and one which does not. [AV: scratch the previous, mnt_want_write() has been moved to kern_path_create() by now] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/namei.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 35291ac6f42..1b464390dde 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2975,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; + int err2; int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); @@ -2988,6 +2989,8 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; + /* don't fail immediately if it's r/o, at least try to report other errors */ + err2 = mnt_want_write(nd.path.mnt); /* * Do the final lookup. */ @@ -3009,9 +3012,10 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path error = -ENOENT; goto fail; } - error = mnt_want_write(nd.path.mnt); - if (error) + if (unlikely(err2)) { + error = err2; goto fail; + } *path = nd.path; return dentry; fail: @@ -3019,6 +3023,8 @@ fail: dentry = ERR_PTR(error); unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + if (!err2) + mnt_drop_write(nd.path.mnt); out: path_put(&nd.path); return dentry; @@ -3266,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname) } nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3276,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -ENOENT; goto exit3; } - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit3; error = security_path_rmdir(&nd.path, dentry); if (error) - goto exit4; + goto exit3; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); -exit4: - mnt_drop_write(nd.path.mnt); exit3: dput(dentry); exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3355,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) goto exit1; nd.flags &= ~LOOKUP_PARENT; + error = mnt_want_write(nd.path.mnt); + if (error) + goto exit1; mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); @@ -3367,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname) if (!inode) goto slashes; ihold(inode); - error = mnt_want_write(nd.path.mnt); - if (error) - goto exit2; error = security_path_unlink(&nd.path, dentry); if (error) - goto exit3; + goto exit2; error = vfs_unlink(nd.path.dentry->d_inode, dentry); -exit3: - mnt_drop_write(nd.path.mnt); - exit2: +exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ + mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); putname(name); @@ -3753,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (newnd.last_type != LAST_NORM) goto exit2; + error = mnt_want_write(oldnd.path.mnt); + if (error) + goto exit2; + oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; @@ -3788,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, if (new_dentry == trap) goto exit5; - error = mnt_want_write(oldnd.path.mnt); - if (error) - goto exit5; error = security_path_rename(&oldnd.path, old_dentry, &newnd.path, new_dentry); if (error) - goto exit6; + goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); -exit6: - mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); + mnt_drop_write(oldnd.path.mnt); exit2: path_put(&newnd.path); putname(to); -- cgit v1.2.3 From 81abe27b10af98f861c955be63da700938dd59c1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Aug 2012 09:38:08 -0700 Subject: userns: Fix link restrictions to use uid_eq Signed-off-by: "Eric W. Biederman" --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde..05480a64d7b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -678,7 +678,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) /* Allowed if owner and follower match. */ inode = link->dentry->d_inode; - if (current_cred()->fsuid == inode->i_uid) + if (uid_eq(current_cred()->fsuid, inode->i_uid)) return 0; /* Allowed if parent directory not sticky and world-writable. */ @@ -687,7 +687,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) return 0; /* Allowed if parent directory and link owner match. */ - if (parent->i_uid == inode->i_uid) + if (uid_eq(parent->i_uid, inode->i_uid)) return 0; path_put_conditional(link, nd); @@ -757,7 +757,7 @@ static int may_linkat(struct path *link) /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ - if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || + if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) || capable(CAP_FOWNER)) return 0; -- cgit v1.2.3 From 62b259d8b3ea9d4a73108fc599e40c863ec25ae6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: atomic_open(): fix create mode usage Don't mask S_ISREG off the create mode before passing to ->atomic_open(). Other methods (->create, ->mknod) also get the complete file mode and filesystems expect it. Reported-by: Steve Reported-by: Richard W.M. Jones Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 1b464390dde..5bac1bb6e58 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2414,7 +2414,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - mode = op->mode & S_IALLUGO; + mode = op->mode; if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); -- cgit v1.2.3 From 38227f78a5020b3100cbb0406c89807563b10dae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: pass right create mode to may_o_create() Pass the umask-ed create mode to may_o_create() instead of the original one. Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 5bac1bb6e58..26c28ec4f4a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2452,7 +2452,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (open_flag & O_CREAT) { - error = may_o_create(&nd->path, dentry, op->mode); + error = may_o_create(&nd->path, dentry, mode); if (error) { create_error = error; if (open_flag & O_EXCL) -- cgit v1.2.3 From 62b2ce964bb901f00a480104bd35a2e1f8d2cf58 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 15 Aug 2012 13:30:12 -0700 Subject: vfs: fix propagation of atomic_open create error on negative dentry If ->atomic_open() returns -ENOENT, we take care to return the create error (e.g., EACCES), if any. Do the same when ->atomic_open() returns 1 and provides a negative dentry. This fixes a regression where an unprivileged open O_CREAT fails with ENOENT instead of EACCES, introduced with the new atomic_open code. It is tested by the open/08.t test in the pjd posix test suite, and was observed on top of fuse (backed by ceph-fuse). Signed-off-by: Sage Weil Signed-off-by: Miklos Szeredi --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 26c28ec4f4a..db76b866a09 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2489,6 +2489,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } + if (create_error && dentry->d_inode == NULL) { + error = create_error; + goto out; + } goto looked_up; } -- cgit v1.2.3 From 55852635a8e2803cbc22d0e143d727813f0fcdb5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 18 Aug 2012 17:39:25 -0700 Subject: fs: fix fs/namei.c kernel-doc warnings Fix kernel-doc warnings in fs/namei.c: Warning(fs/namei.c:360): No description found for parameter 'inode' Warning(fs/namei.c:672): No description found for parameter 'nd' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index db76b866a09..dd1ed1b8e98 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is -- cgit v1.2.3 From f6d2ac5ca79d1fd468dbc77e488aec06e86f3035 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 12:55:54 -0400 Subject: namei.c: fix BS comment get_write_access() is needed for nfsd, not binfmt_aout (the latter has no business doing anything of that kind, of course) Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index dd1ed1b8e98..c5b85b3523c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3971,7 +3971,7 @@ EXPORT_SYMBOL(user_path_at); EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); -EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ +EXPORT_SYMBOL(get_write_access); /* nfsd */ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); EXPORT_SYMBOL(lookup_one_len); -- cgit v1.2.3 From 2903ff019b346ab8d36ebbf54853c3aaf6590608 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 12:52:22 -0400 Subject: switch simple cases of fget_light to fdget Signed-off-by: Al Viro --- fs/namei.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index c5b85b3523c..e1c7072c7af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd, struct file **fp) { int retval = 0; - int fput_needed; - struct file *file; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED; @@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags, get_fs_pwd(current->fs, &nd->path); } } else { + struct fd f = fdget_raw(dfd); struct dentry *dentry; - file = fget_raw_light(dfd, &fput_needed); - retval = -EBADF; - if (!file) - goto out_fail; + if (!f.file) + return -EBADF; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; if (*name) { - retval = -ENOTDIR; - if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_fail; + if (!S_ISDIR(dentry->d_inode->i_mode)) { + fdput(f); + return -ENOTDIR; + } retval = inode_permission(dentry->d_inode, MAY_EXEC); - if (retval) - goto fput_fail; + if (retval) { + fdput(f); + return retval; + } } - nd->path = file->f_path; + nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (fput_needed) - *fp = file; + if (f.need_put) + *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); lock_rcu_walk(); } else { - path_get(&file->f_path); - fput_light(file, fput_needed); + path_get(&nd->path); + fdput(f); } } nd->inode = nd->path.dentry->d_inode; return 0; - -fput_fail: - fput_light(file, fput_needed); -out_fail: - return retval; } static inline int lookup_last(struct nameidata *nd, struct path *path) -- cgit v1.2.3 From ffd8d101a3a7d3f2e79deee1e342801703b6dc70 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 4 Oct 2012 19:56:40 -0400 Subject: fs: prevent use after free in auditing when symlink following was denied Commit "fs: add link restriction audit reporting" has added auditing of failed attempts to follow symlinks. Unfortunately, the auditing was being done after the struct path structure was released earlier. Signed-off-by: Sasha Levin Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index aa30d19e9ed..6d47fac6429 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -692,9 +692,9 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) if (uid_eq(parent->i_uid, inode->i_uid)) return 0; + audit_log_link_denied("follow_link", link); path_put_conditional(link, nd); path_put(&nd->path); - audit_log_link_denied("follow_link", link); return -EACCES; } -- cgit v1.2.3 From 98f6ef64b15a48f15062aff5d143b5d9a6ae7711 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Oct 2012 13:20:00 +0000 Subject: vfs: bogus warnings in fs/namei.c The follow_link() function always initializes its *p argument, or returns an error, but when building with 'gcc -s', the compiler gets confused by the __always_inline attribute to the function and can no longer detect where the cookie was initialized. The solution is to always initialize the pointer from follow_link, even in the error path. When building with -O2, this has zero impact on generated code and adds a single instruction in the error path for a -Os build on ARM. Without this patch, building with gcc-4.6 through gcc-4.8 and CONFIG_CC_OPTIMIZE_FOR_SIZE results in: fs/namei.c: In function 'link_path_walk': fs/namei.c:649:24: warning: 'cookie' may be used uninitialized in this function [-Wuninitialized] fs/namei.c:1544:9: note: 'cookie' was declared here fs/namei.c: In function 'path_lookupat': fs/namei.c:649:24: warning: 'cookie' may be used uninitialized in this function [-Wuninitialized] fs/namei.c:1934:10: note: 'cookie' was declared here fs/namei.c: In function 'path_openat': fs/namei.c:649:24: warning: 'cookie' may be used uninitialized in this function [-Wuninitialized] fs/namei.c:2899:9: note: 'cookie' was declared here Signed-off-by: Arnd Bergmann Signed-off-by: Al Viro --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6d47fac6429..c1f18e4f034 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -810,6 +810,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) return error; out_put_nd_path: + *p = NULL; path_put(&nd->path); path_put(link); return error; -- cgit v1.2.3 From f78570dd6ad9563fffd24cc5e1808e1a1242f16e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:20 -0400 Subject: audit: remove unnecessary NULL ptr checks from do_path_lookup As best I can tell, whenever retval == 0, nd->path.dentry and nd->inode are also non-NULL. Eliminate those checks and the superfluous audit_context check. Signed-off-by: Eric Paris Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index c1f18e4f034..f04ce114229 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1972,12 +1972,8 @@ static int do_path_lookup(int dfd, const char *name, if (unlikely(retval == -ESTALE)) retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); - if (likely(!retval)) { - if (unlikely(!audit_dummy_context())) { - if (nd->path.dentry && nd->inode) - audit_inode(name, nd->path.dentry); - } - } + if (likely(!retval)) + audit_inode(name, nd->path.dentry); return retval; } -- cgit v1.2.3 From c43a25abba97c7d87131e71db6be24b24d7791a5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:21 -0400 Subject: audit: reverse arguments to audit_inode_child Most of the callers get called with an inode and dentry in the reverse order. The compiler then has to reshuffle the arg registers and/or stack in order to pass them on to audit_inode_child. Reverse those arguments for a micro-optimization. Reported-by: Eric Paris Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index f04ce114229..a7ad35c6680 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2176,7 +2176,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) return -ENOENT; BUG_ON(victim->d_parent->d_inode != dir); - audit_inode_child(victim, dir); + audit_inode_child(dir, victim); error = inode_permission(dir, MAY_WRITE | MAY_EXEC); if (error) -- cgit v1.2.3 From bfcec7087458812f575d9022b2d151641f34ee84 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:23 -0400 Subject: audit: set the name_len in audit_inode for parent lookups Currently, this gets set mostly by happenstance when we call into audit_inode_child. While that might be a little more efficient, it seems wrong. If the syscall ends up failing before audit_inode_child ever gets called, then you'll have an audit_names record that shows the full path but has the parent inode info attached. Fix this by passing in a parent flag when we call audit_inode that gets set to the value of LOOKUP_PARENT. We can then fix up the pathname for the audit entry correctly from the get-go. While we're at it, clean up the no-op macro for audit_inode in the !CONFIG_AUDITSYSCALL case. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index a7ad35c6680..6a92d988573 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1973,7 +1973,7 @@ static int do_path_lookup(int dfd, const char *name, retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) - audit_inode(name, nd->path.dentry); + audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); return retval; } @@ -2648,7 +2648,7 @@ static int do_last(struct nameidata *nd, struct path *path, error = complete_walk(nd); if (error) return error; - audit_inode(pathname, nd->path.dentry); + audit_inode(pathname, nd->path.dentry, 0); if (open_flag & O_CREAT) { error = -EISDIR; goto out; @@ -2658,7 +2658,7 @@ static int do_last(struct nameidata *nd, struct path *path, error = complete_walk(nd); if (error) return error; - audit_inode(pathname, dir); + audit_inode(pathname, dir, 0); goto finish_open; } @@ -2687,7 +2687,7 @@ static int do_last(struct nameidata *nd, struct path *path, if (error) return error; - audit_inode(pathname, dir); + audit_inode(pathname, dir, 0); error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) @@ -2717,7 +2717,7 @@ retry_lookup: !S_ISREG(file->f_path.dentry->d_inode->i_mode)) will_truncate = false; - audit_inode(pathname, file->f_path.dentry); + audit_inode(pathname, file->f_path.dentry, 0); goto opened; } @@ -2734,7 +2734,7 @@ retry_lookup: * create/update audit record if it already exists. */ if (path->dentry->d_inode) - audit_inode(pathname, path->dentry); + audit_inode(pathname, path->dentry, 0); /* * If atomic_open() acquired write access it is dropped now due to @@ -2799,7 +2799,7 @@ finish_lookup: error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto out; - audit_inode(pathname, nd->path.dentry); + audit_inode(pathname, nd->path.dentry, 0); finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; -- cgit v1.2.3 From 4fa6b5ecbf092c6ee752ece8a55d71f663d23254 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:25 -0400 Subject: audit: overhaul __audit_inode_child to accomodate retrying In order to accomodate retrying path-based syscalls, we need to add a new "type" argument to audit_inode_child. This will tell us whether we're looking for a child entry that represents a create or a delete. If we find a parent, don't automatically assume that we need to create a new entry. Instead, use the information we have to try to find an existing entry first. Update it if one is found and create a new one if not. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6a92d988573..ca14d8432d3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2176,7 +2176,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) return -ENOENT; BUG_ON(victim->d_parent->d_inode != dir); - audit_inode_child(dir, victim); + audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(dir, MAY_WRITE | MAY_EXEC); if (error) -- cgit v1.2.3 From 8e377d15078a501c4da98471f56396343c407d92 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 16:43:13 -0400 Subject: vfs: unexport getname and putname symbols I see no callers in module code. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ca14d8432d3..9cc0fce7fc9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -163,7 +163,6 @@ void putname(const char *name) else __putname(name); } -EXPORT_SYMBOL(putname); #endif static int check_acl(struct inode *inode, int mask) @@ -3964,7 +3963,6 @@ EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* nfsd */ -EXPORT_SYMBOL(getname); EXPORT_SYMBOL(lock_rename); EXPORT_SYMBOL(lookup_one_len); EXPORT_SYMBOL(page_follow_link_light); -- cgit v1.2.3 From 91a27b2a756784714e924e5e854b919273082d26 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:28 -0400 Subject: vfs: define struct filename and have getname() return it getname() is intended to copy pathname strings from userspace into a kernel buffer. The result is just a string in kernel space. It would however be quite helpful to be able to attach some ancillary info to the string. For instance, we could attach some audit-related info to reduce the amount of audit-related processing needed. When auditing is enabled, we could also call getname() on the string more than once and not need to recopy it from userspace. This patchset converts the getname()/putname() interfaces to return a struct instead of a string. For now, the struct just tracks the string in kernel space and the original userland pointer for it. Later, we'll add other information to the struct as it becomes convenient. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 108 ++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 41 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 9cc0fce7fc9..ec638d27642 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -117,18 +117,37 @@ * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ -static char *getname_flags(const char __user *filename, int flags, int *empty) +void final_putname(struct filename *name) { - char *result = __getname(), *err; + __putname(name->name); + kfree(name); +} + +static struct filename * +getname_flags(const char __user *filename, int flags, int *empty) +{ + struct filename *result, *err; + char *kname; int len; + /* FIXME: create dedicated slabcache? */ + result = kzalloc(sizeof(*result), GFP_KERNEL); if (unlikely(!result)) return ERR_PTR(-ENOMEM); - len = strncpy_from_user(result, filename, PATH_MAX); - err = ERR_PTR(len); - if (unlikely(len < 0)) + kname = __getname(); + if (unlikely(!kname)) { + err = ERR_PTR(-ENOMEM); + goto error_free_name; + } + + result->name = kname; + result->uptr = filename; + len = strncpy_from_user(kname, filename, PATH_MAX); + if (unlikely(len < 0)) { + err = ERR_PTR(len); goto error; + } /* The empty path is special. */ if (unlikely(!len)) { @@ -146,22 +165,25 @@ static char *getname_flags(const char __user *filename, int flags, int *empty) } error: - __putname(result); + __putname(kname); +error_free_name: + kfree(result); return err; } -char *getname(const char __user * filename) +struct filename * +getname(const char __user * filename) { return getname_flags(filename, 0, NULL); } +EXPORT_SYMBOL(getname); #ifdef CONFIG_AUDITSYSCALL -void putname(const char *name) +void putname(struct filename *name) { if (unlikely(!audit_dummy_context())) - audit_putname(name); - else - __putname(name); + return audit_putname(name); + final_putname(name); } #endif @@ -2093,13 +2115,13 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { struct nameidata nd; - char *tmp = getname_flags(name, flags, empty); + struct filename *tmp = getname_flags(name, flags, empty); int err = PTR_ERR(tmp); if (!IS_ERR(tmp)) { BUG_ON(flags & LOOKUP_PARENT); - err = do_path_lookup(dfd, tmp, flags, &nd); + err = do_path_lookup(dfd, tmp->name, flags, &nd); putname(tmp); if (!err) *path = nd.path; @@ -2113,22 +2135,22 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, return user_path_at_empty(dfd, name, flags, path, NULL); } -static int user_path_parent(int dfd, const char __user *path, - struct nameidata *nd, char **name) +static struct filename * +user_path_parent(int dfd, const char __user *path, struct nameidata *nd) { - char *s = getname(path); + struct filename *s = getname(path); int error; if (IS_ERR(s)) - return PTR_ERR(s); + return s; - error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd); - if (error) + error = do_path_lookup(dfd, s->name, LOOKUP_PARENT, nd); + if (error) { putname(s); - else - *name = s; + return ERR_PTR(error); + } - return error; + return s; } /* @@ -3039,11 +3061,11 @@ EXPORT_SYMBOL(done_path_create); struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) { - char *tmp = getname(pathname); + struct filename *tmp = getname(pathname); struct dentry *res; if (IS_ERR(tmp)) return ERR_CAST(tmp); - res = kern_path_create(dfd, tmp, path, is_dir); + res = kern_path_create(dfd, tmp->name, path, is_dir); putname(tmp); return res; } @@ -3248,13 +3270,13 @@ out: static long do_rmdir(int dfd, const char __user *pathname) { int error = 0; - char * name; + struct filename *name; struct dentry *dentry; struct nameidata nd; - error = user_path_parent(dfd, pathname, &nd, &name); - if (error) - return error; + name = user_path_parent(dfd, pathname, &nd); + if (IS_ERR(name)) + return PTR_ERR(name); switch(nd.last_type) { case LAST_DOTDOT: @@ -3343,14 +3365,14 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) static long do_unlinkat(int dfd, const char __user *pathname) { int error; - char *name; + struct filename *name; struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; - error = user_path_parent(dfd, pathname, &nd, &name); - if (error) - return error; + name = user_path_parent(dfd, pathname, &nd); + if (IS_ERR(name)) + return PTR_ERR(name); error = -EISDIR; if (nd.last_type != LAST_NORM) @@ -3434,7 +3456,7 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname) { int error; - char *from; + struct filename *from; struct dentry *dentry; struct path path; @@ -3447,9 +3469,9 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (IS_ERR(dentry)) goto out_putname; - error = security_path_symlink(&path, dentry, from); + error = security_path_symlink(&path, dentry, from->name); if (!error) - error = vfs_symlink(path.dentry->d_inode, dentry, from); + error = vfs_symlink(path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); out_putname: putname(from); @@ -3729,17 +3751,21 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct nameidata oldnd, newnd; - char *from; - char *to; + struct filename *from; + struct filename *to; int error; - error = user_path_parent(olddfd, oldname, &oldnd, &from); - if (error) + from = user_path_parent(olddfd, oldname, &oldnd); + if (IS_ERR(from)) { + error = PTR_ERR(from); goto exit; + } - error = user_path_parent(newdfd, newname, &newnd, &to); - if (error) + to = user_path_parent(newdfd, newname, &newnd); + if (IS_ERR(to)) { + error = PTR_ERR(to); goto exit1; + } error = -EXDEV; if (oldnd.path.mnt != newnd.path.mnt) -- cgit v1.2.3 From 7ac86265dc8f665cc49d6e60a125e608cd2fca14 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:28 -0400 Subject: audit: allow audit code to satisfy getname requests from its names_list Currently, if we call getname() on a userland string more than once, we'll get multiple copies of the string and multiple audit_names records. Add a function that will allow the audit_names code to satisfy getname requests using info from the audit_names list, avoiding a new allocation and audit_names records. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index ec638d27642..5dbc3f83693 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -130,6 +130,10 @@ getname_flags(const char __user *filename, int flags, int *empty) char *kname; int len; + result = audit_reusename(filename); + if (result) + return result; + /* FIXME: create dedicated slabcache? */ result = kzalloc(sizeof(*result), GFP_KERNEL); if (unlikely(!result)) -- cgit v1.2.3 From 873f1eedc1b983d772283279192c4ca2f60e8482 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:29 -0400 Subject: vfs: turn do_path_lookup into wrapper around struct filename variant ...and make the user_path callers use that variant instead. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 5dbc3f83693..8c14353fb75 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1988,20 +1988,30 @@ static int path_lookupat(int dfd, const char *name, return err; } -static int do_path_lookup(int dfd, const char *name, +static int filename_lookup(int dfd, struct filename *name, unsigned int flags, struct nameidata *nd) { - int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); + int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); if (unlikely(retval == -ECHILD)) - retval = path_lookupat(dfd, name, flags, nd); + retval = path_lookupat(dfd, name->name, flags, nd); if (unlikely(retval == -ESTALE)) - retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); + retval = path_lookupat(dfd, name->name, + flags | LOOKUP_REVAL, nd); if (likely(!retval)) - audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); + audit_inode(name->name, nd->path.dentry, + flags & LOOKUP_PARENT); return retval; } +static int do_path_lookup(int dfd, const char *name, + unsigned int flags, struct nameidata *nd) +{ + struct filename filename = { .name = name }; + + return filename_lookup(dfd, &filename, flags, nd); +} + /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { @@ -2125,7 +2135,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, BUG_ON(flags & LOOKUP_PARENT); - err = do_path_lookup(dfd, tmp->name, flags, &nd); + err = filename_lookup(dfd, tmp, flags, &nd); putname(tmp); if (!err) *path = nd.path; @@ -2139,6 +2149,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, return user_path_at_empty(dfd, name, flags, path, NULL); } +/* + * NB: most callers don't do anything directly with the reference to the + * to struct filename, but the nd->last pointer points into the name string + * allocated by getname. So we must hold the reference to it until all + * path-walking is complete. + */ static struct filename * user_path_parent(int dfd, const char __user *path, struct nameidata *nd) { @@ -2148,7 +2164,7 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd) if (IS_ERR(s)) return s; - error = do_path_lookup(dfd, s->name, LOOKUP_PARENT, nd); + error = filename_lookup(dfd, s, LOOKUP_PARENT, nd); if (error) { putname(s); return ERR_PTR(error); -- cgit v1.2.3 From 669abf4e5539c8aa48bf28c965be05c0a7b58a27 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 16:43:10 -0400 Subject: vfs: make path_openat take a struct filename pointer ...and fix up the callers. For do_file_open_root, just declare a struct filename on the stack and fill out the .name field. For do_filp_open, make it also take a struct filename pointer, and fix up its callers to call it appropriately. For filp_open, add a variant that takes a struct filename pointer and turn filp_open into a wrapper around it. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 8c14353fb75..6bbd8fdfb1f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2662,7 +2662,7 @@ out_dput: */ static int do_last(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - int *opened, const char *pathname) + int *opened, struct filename *name) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2674,6 +2674,7 @@ static int do_last(struct nameidata *nd, struct path *path, struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; int error; + const char *pathname = name->name; nd->flags &= ~LOOKUP_PARENT; nd->flags |= op->intent; @@ -2908,7 +2909,7 @@ stale_open: goto retry_lookup; } -static struct file *path_openat(int dfd, const char *pathname, +static struct file *path_openat(int dfd, struct filename *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; @@ -2923,12 +2924,12 @@ static struct file *path_openat(int dfd, const char *pathname, file->f_flags = op->open_flag; - error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); + error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) goto out; current->total_link_count = 0; - error = link_path_walk(pathname, nd); + error = link_path_walk(pathname->name, nd); if (unlikely(error)) goto out; @@ -2974,7 +2975,7 @@ out: return file; } -struct file *do_filp_open(int dfd, const char *pathname, +struct file *do_filp_open(int dfd, struct filename *pathname, const struct open_flags *op, int flags) { struct nameidata nd; @@ -2993,6 +2994,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, { struct nameidata nd; struct file *file; + struct filename filename = { .name = name }; nd.root.mnt = mnt; nd.root.dentry = dentry; @@ -3002,11 +3004,11 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); - file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); + file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); if (unlikely(file == ERR_PTR(-ECHILD))) - file = path_openat(-1, name, &nd, op, flags); + file = path_openat(-1, &filename, &nd, op, flags); if (unlikely(file == ERR_PTR(-ESTALE))) - file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); + file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_REVAL); return file; } -- cgit v1.2.3 From adb5c2473d3f91526c79db972aafb20a56d3fbb3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 16:43:13 -0400 Subject: audit: make audit_inode take struct filename Keep a pointer to the audit_names "slot" in struct filename. Have all of the audit_inode callers pass a struct filename ponter to audit_inode instead of a string pointer. If the aname field is already populated, then we can skip walking the list altogether and just use it directly. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6bbd8fdfb1f..80b162b142f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1999,8 +1999,7 @@ static int filename_lookup(int dfd, struct filename *name, flags | LOOKUP_REVAL, nd); if (likely(!retval)) - audit_inode(name->name, nd->path.dentry, - flags & LOOKUP_PARENT); + audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); return retval; } @@ -2674,7 +2673,6 @@ static int do_last(struct nameidata *nd, struct path *path, struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; int error; - const char *pathname = name->name; nd->flags &= ~LOOKUP_PARENT; nd->flags |= op->intent; @@ -2690,7 +2688,7 @@ static int do_last(struct nameidata *nd, struct path *path, error = complete_walk(nd); if (error) return error; - audit_inode(pathname, nd->path.dentry, 0); + audit_inode(name, nd->path.dentry, 0); if (open_flag & O_CREAT) { error = -EISDIR; goto out; @@ -2700,7 +2698,7 @@ static int do_last(struct nameidata *nd, struct path *path, error = complete_walk(nd); if (error) return error; - audit_inode(pathname, dir, 0); + audit_inode(name, dir, 0); goto finish_open; } @@ -2729,7 +2727,7 @@ static int do_last(struct nameidata *nd, struct path *path, if (error) return error; - audit_inode(pathname, dir, 0); + audit_inode(name, dir, 0); error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) @@ -2759,7 +2757,7 @@ retry_lookup: !S_ISREG(file->f_path.dentry->d_inode->i_mode)) will_truncate = false; - audit_inode(pathname, file->f_path.dentry, 0); + audit_inode(name, file->f_path.dentry, 0); goto opened; } @@ -2776,7 +2774,7 @@ retry_lookup: * create/update audit record if it already exists. */ if (path->dentry->d_inode) - audit_inode(pathname, path->dentry, 0); + audit_inode(name, path->dentry, 0); /* * If atomic_open() acquired write access it is dropped now due to @@ -2841,7 +2839,7 @@ finish_lookup: error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto out; - audit_inode(pathname, nd->path.dentry, 0); + audit_inode(name, nd->path.dentry, 0); finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; -- cgit v1.2.3 From 7950e3852ab86826b7349a535d2e8b0000340d7f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 16:43:13 -0400 Subject: vfs: embed struct filename inside of names_cache allocation if possible In the common case where a name is much smaller than PATH_MAX, an extra allocation for struct filename is unnecessary. Before allocating a separate one, try to embed the struct filename inside the buffer first. If it turns out that that's not long enough, then fall back to allocating a separate struct filename and redoing the copy. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 20 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 80b162b142f..d1895f30815 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -119,40 +119,69 @@ */ void final_putname(struct filename *name) { - __putname(name->name); - kfree(name); + if (name->separate) { + __putname(name->name); + kfree(name); + } else { + __putname(name); + } } +#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) + static struct filename * getname_flags(const char __user *filename, int flags, int *empty) { struct filename *result, *err; - char *kname; int len; + long max; + char *kname; result = audit_reusename(filename); if (result) return result; - /* FIXME: create dedicated slabcache? */ - result = kzalloc(sizeof(*result), GFP_KERNEL); + result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); - kname = __getname(); - if (unlikely(!kname)) { - err = ERR_PTR(-ENOMEM); - goto error_free_name; - } - + /* + * First, try to embed the struct filename inside the names_cache + * allocation + */ + kname = (char *)result + sizeof(*result); result->name = kname; - result->uptr = filename; - len = strncpy_from_user(kname, filename, PATH_MAX); + result->separate = false; + max = EMBEDDED_NAME_MAX; + +recopy: + len = strncpy_from_user(kname, filename, max); if (unlikely(len < 0)) { err = ERR_PTR(len); goto error; } + /* + * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a + * separate struct filename so we can dedicate the entire + * names_cache allocation for the pathname, and re-do the copy from + * userland. + */ + if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { + kname = (char *)result; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) { + err = ERR_PTR(-ENOMEM); + result = (struct filename *)kname; + goto error; + } + result->name = kname; + result->separate = true; + max = PATH_MAX; + goto recopy; + } + /* The empty path is special. */ if (unlikely(!len)) { if (empty) @@ -163,15 +192,15 @@ getname_flags(const char __user *filename, int flags, int *empty) } err = ERR_PTR(-ENAMETOOLONG); - if (likely(len < PATH_MAX)) { - audit_getname(result); - return result; - } + if (unlikely(len >= PATH_MAX)) + goto error; + + result->uptr = filename; + audit_getname(result); + return result; error: - __putname(kname); -error_free_name: - kfree(result); + final_putname(result); return err; } -- cgit v1.2.3 From 561ec64ae67ef25cac8d72bb9c4bfc955edfd415 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 26 Oct 2012 10:05:07 -0700 Subject: VFS: don't do protected {sym,hard}links by default In commit 800179c9b8a1 ("This adds symlink and hardlink restrictions to the Linux VFS"), the new link protections were enabled by default, in the hope that no actual application would care, despite it being technically against legacy UNIX (and documented POSIX) behavior. However, it does turn out to break some applications. It's rare, and it's unfortunate, but it's unacceptable to break existing systems, so we'll have to default to legacy behavior. In particular, it has broken the way AFD distributes files, see http://www.dwd.de/AFD/ along with some legacy scripts. Distributions can end up setting this at initrd time or in system scripts: if you have security problems due to link attacks during your early boot sequence, you have bigger problems than some kernel sysctl setting. Do: echo 1 > /proc/sys/fs/protected_symlinks echo 1 > /proc/sys/fs/protected_hardlinks to re-enable the link protections. Alternatively, we may at some point introduce a kernel config option that sets these kinds of "more secure but not traditional" behavioural options automatically. Reported-by: Nick Bowler Reported-by: Holger Kiehl Cc: Kees Cook Cc: Ingo Molnar Cc: Andrew Morton Cc: Al Viro Cc: Alan Cox Cc: Theodore Ts'o Cc: stable@kernel.org # v3.6 Signed-off-by: Linus Torvalds --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index d1895f30815..937f9d50c84 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -705,8 +705,8 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki path_put(link); } -int sysctl_protected_symlinks __read_mostly = 1; -int sysctl_protected_hardlinks __read_mostly = 1; +int sysctl_protected_symlinks __read_mostly = 0; +int sysctl_protected_hardlinks __read_mostly = 0; /** * may_follow_link - Check symlink following for unsafe situations -- cgit v1.2.3 From 21d8a15ac333b05f1fecdf9fdc30996be2e11d60 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:17:21 -0500 Subject: lookup_one_len: don't accept . and .. Signed-off-by: Al Viro --- fs/namei.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 937f9d50c84..5f4cdf3ad91 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (!len) return ERR_PTR(-EACCES); + if (unlikely(name[0] == '.')) { + if (len < 2 || (len == 2 && name[1] == '.')) + return ERR_PTR(-EACCES); + } + while (len--) { c = *(const unsigned char *)name++; if (c == '/' || c == '\0') -- cgit v1.2.3 From 582aa64a04a579d47d05e4a0ee85bf047978ef4d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 08:56:16 -0500 Subject: vfs: remove unneeded permission check from path_init When path_init is called with a valid dfd, that code checks permissions on the open directory fd and returns an error if the check fails. This permission check is redundant, however. Both callers of path_init immediately call link_path_walk afterward. The first thing that link_path_walk does for pathnames that do not consist only of slashes is to check for exec permissions at the starting point of the path walk. And this check in path_init() is on the path taken only when *name != '/' && *name != '\0'. In most cases, these checks are very quick, but when the dfd is for a file on a NFS mount with the actimeo=0, each permission check goes out onto the wire. The result is 2 identical ACCESS calls. Given that these codepaths are fairly "hot", I think it makes sense to eliminate the permission check in path_init and simply assume that the caller will eventually check the permissions before proceeding. Reported-by: Dave Wysochanski Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 5f4cdf3ad91..e245d88b4d6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1903,6 +1903,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, get_fs_pwd(current->fs, &nd->path); } } else { + /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(dfd); struct dentry *dentry; @@ -1916,12 +1917,6 @@ static int path_init(int dfd, const char *name, unsigned int flags, fdput(f); return -ENOTDIR; } - - retval = inode_permission(dentry->d_inode, MAY_EXEC); - if (retval) { - fdput(f); - return retval; - } } nd->path = f.file->f_path; -- cgit v1.2.3 From 741b7c3f77937b2fb7c10aeb4c5c621463582583 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Dec 2012 13:41:28 -0500 Subject: path_init(): make -ENOTDIR failure exits consistent Signed-off-by: Al Viro --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index e245d88b4d6..35195ff9d19 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1859,7 +1859,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (flags & LOOKUP_ROOT) { struct inode *inode = nd->root.dentry->d_inode; if (*name) { - if (!inode->i_op->lookup) + if (!can_lookup(inode)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1913,7 +1913,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!S_ISDIR(dentry->d_inode->i_mode)) { + if (!can_lookup(dentry->d_inode)) { fdput(f); return -ENOTDIR; } -- cgit v1.2.3 From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 28 Nov 2012 11:30:53 -0500 Subject: vfs: remove DCACHE_NEED_LOOKUP The code that relied on that flag was ripped out of btrfs quite some time ago, and never added back. Josef indicated that he was going to take a different approach to the problem in btrfs, and that we could just eliminate this flag. Cc: Josef Bacik Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 35195ff9d19..25a41e02984 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1275,9 +1275,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, *need_lookup = false; dentry = d_lookup(dir, name); if (dentry) { - if (d_need_lookup(dentry)) { - *need_lookup = true; - } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { + if (dentry->d_flags & DCACHE_OP_REVALIDATE) { error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (error < 0) { @@ -1383,8 +1381,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (unlikely(d_need_lookup(dentry))) - goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { @@ -1410,11 +1406,6 @@ unlazy: if (unlikely(!dentry)) goto need_lookup; - if (unlikely(d_need_lookup(dentry))) { - dput(dentry); - goto need_lookup; - } - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { -- cgit v1.2.3 From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:06 -0500 Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags passed in here are currently ignored. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 25a41e02984..8f8e41f6eb5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3030,12 +3030,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, return file; } -struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir) +struct dentry *kern_path_create(int dfd, const char *pathname, + struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct nameidata nd; int err2; - int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); + int error; + bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); + + /* + * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any + * other flags passed in are ignored! + */ + lookup_flags &= LOOKUP_REVAL; + + error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd); if (error) return ERR_PTR(error); @@ -3099,13 +3109,14 @@ void done_path_create(struct path *path, struct dentry *dentry) } EXPORT_SYMBOL(done_path_create); -struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) +struct dentry *user_path_create(int dfd, const char __user *pathname, + struct path *path, unsigned int lookup_flags) { struct filename *tmp = getname(pathname); struct dentry *res; if (IS_ERR(tmp)) return ERR_CAST(tmp); - res = kern_path_create(dfd, tmp->name, path, is_dir); + res = kern_path_create(dfd, tmp->name, path, lookup_flags); putname(tmp); return res; } @@ -3228,7 +3239,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) struct path path; int error; - dentry = user_path_create(dfd, pathname, &path, 1); + dentry = user_path_create(dfd, pathname, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); -- cgit v1.2.3 From 972567f14cbcd437e9a88a73836bbc2ee0720b5f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 16:00:10 -0500 Subject: vfs: fix mknodat to retry on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 8f8e41f6eb5..b70c191b7e2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3172,12 +3172,13 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, struct dentry *dentry; struct path path; int error; + unsigned int lookup_flags = 0; error = may_mknod(mode); if (error) return error; - - dentry = user_path_create(dfd, filename, &path, 0); +retry: + dentry = user_path_create(dfd, filename, &path, lookup_flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -3200,6 +3201,10 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, } out: done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } -- cgit v1.2.3 From b76d8b82266077dc7098dd13f321a616099a1bd8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 16:04:09 -0500 Subject: vfs: fix mkdirat to retry once on an ESTALE error Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index b70c191b7e2..1beebc1a38c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3243,8 +3243,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) struct dentry *dentry; struct path path; int error; + unsigned int lookup_flags = LOOKUP_DIRECTORY; - dentry = user_path_create(dfd, pathname, &path, LOOKUP_DIRECTORY); +retry: + dentry = user_path_create(dfd, pathname, &path, lookup_flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -3254,6 +3256,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) if (!error) error = vfs_mkdir(path.dentry->d_inode, dentry, mode); done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } -- cgit v1.2.3 From f46d3567b223e41e1f2faeb82d3b74a6d84fc508 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:08 -0500 Subject: vfs: fix symlinkat to retry on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 1beebc1a38c..b06a111591a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3521,12 +3521,13 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, struct filename *from; struct dentry *dentry; struct path path; + unsigned int lookup_flags = 0; from = getname(oldname); if (IS_ERR(from)) return PTR_ERR(from); - - dentry = user_path_create(newdfd, newname, &path, 0); +retry: + dentry = user_path_create(newdfd, newname, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putname; @@ -3535,6 +3536,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, if (!error) error = vfs_symlink(path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out_putname: putname(from); return error; -- cgit v1.2.3 From 442e31ca5a49e398351b2954b51f578353fdf210 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 16:15:38 -0500 Subject: vfs: fix linkat to retry once on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index b06a111591a..6868699272b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3626,12 +3626,13 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; - +retry: error = user_path_at(olddfd, oldname, how, &old_path); if (error) return error; - new_dentry = user_path_create(newdfd, newname, &new_path, 0); + new_dentry = user_path_create(newdfd, newname, &new_path, + (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out; @@ -3648,6 +3649,10 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); out_dput: done_path_create(&new_path, new_dentry); + if (retry_estale(error, how)) { + how |= LOOKUP_REVAL; + goto retry; + } out: path_put(&old_path); -- cgit v1.2.3 From 9e790bd65ce4cbfdff305a57b67b1a2cbe5d4335 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:09 -0500 Subject: vfs: add a flags argument to user_path_parent ...so we can pass in LOOKUP_REVAL. For now, nothing does yet. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 6868699272b..19190618695 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2175,15 +2175,19 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, * path-walking is complete. */ static struct filename * -user_path_parent(int dfd, const char __user *path, struct nameidata *nd) +user_path_parent(int dfd, const char __user *path, struct nameidata *nd, + unsigned int flags) { struct filename *s = getname(path); int error; + /* only LOOKUP_REVAL is allowed in extra flags */ + flags &= LOOKUP_REVAL; + if (IS_ERR(s)) return s; - error = filename_lookup(dfd, s, LOOKUP_PARENT, nd); + error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd); if (error) { putname(s); return ERR_PTR(error); @@ -3336,7 +3340,7 @@ static long do_rmdir(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; - name = user_path_parent(dfd, pathname, &nd); + name = user_path_parent(dfd, pathname, &nd, 0); if (IS_ERR(name)) return PTR_ERR(name); @@ -3432,7 +3436,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct nameidata nd; struct inode *inode = NULL; - name = user_path_parent(dfd, pathname, &nd); + name = user_path_parent(dfd, pathname, &nd, 0); if (IS_ERR(name)) return PTR_ERR(name); @@ -3827,13 +3831,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct filename *to; int error; - from = user_path_parent(olddfd, oldname, &oldnd); + from = user_path_parent(olddfd, oldname, &oldnd, 0); if (IS_ERR(from)) { error = PTR_ERR(from); goto exit; } - to = user_path_parent(newdfd, newname, &newnd); + to = user_path_parent(newdfd, newname, &newnd, 0); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; -- cgit v1.2.3 From c6ee920698301febdf10df0b57039173a1edbd43 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 16:28:33 -0500 Subject: vfs: make do_rmdir retry once on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 19190618695..fe06a2fd192 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3339,8 +3339,9 @@ static long do_rmdir(int dfd, const char __user *pathname) struct filename *name; struct dentry *dentry; struct nameidata nd; - - name = user_path_parent(dfd, pathname, &nd, 0); + unsigned int lookup_flags = 0; +retry: + name = user_path_parent(dfd, pathname, &nd, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); @@ -3382,6 +3383,10 @@ exit2: exit1: path_put(&nd.path); putname(name); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } -- cgit v1.2.3 From 5d18f8133cad85ccbb7fa6fd351d75025da32504 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 16:38:04 -0500 Subject: vfs: make do_unlinkat retry once on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index fe06a2fd192..8a262c2efff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3440,8 +3440,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; - - name = user_path_parent(dfd, pathname, &nd, 0); + unsigned int lookup_flags = 0; +retry: + name = user_path_parent(dfd, pathname, &nd, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); @@ -3479,6 +3480,11 @@ exit2: exit1: path_put(&nd.path); putname(name); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + inode = NULL; + goto retry; + } return error; slashes: -- cgit v1.2.3 From c6a9428401c00a27d3c17264934d14e284570c97 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:10 -0500 Subject: vfs: fix renameat to retry on ESTALE errors ...as always, rename is the messiest of the bunch. We have to track whether to retry or not via a separate flag since the error handling is already quite complex. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index 8a262c2efff..43a97ee1d4c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3840,15 +3840,17 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct nameidata oldnd, newnd; struct filename *from; struct filename *to; + unsigned int lookup_flags = 0; + bool should_retry = false; int error; - - from = user_path_parent(olddfd, oldname, &oldnd, 0); +retry: + from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); if (IS_ERR(from)) { error = PTR_ERR(from); goto exit; } - to = user_path_parent(newdfd, newname, &newnd, 0); + to = user_path_parent(newdfd, newname, &newnd, lookup_flags); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; @@ -3920,11 +3922,18 @@ exit3: unlock_rename(new_dir, old_dir); mnt_drop_write(oldnd.path.mnt); exit2: + if (retry_estale(error, lookup_flags)) + should_retry = true; path_put(&newnd.path); putname(to); exit1: path_put(&oldnd.path); putname(from); + if (should_retry) { + should_retry = false; + lookup_flags |= LOOKUP_REVAL; + goto retry; + } exit: return error; } -- cgit v1.2.3