From d18e9008c377dc6a6d2166a6840bf3a23a5867fd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:17 +0200 Subject: vfs: add i_op->atomic_open() Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/open.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 1540632d838..13bece4f36a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -810,6 +810,48 @@ out_err: } EXPORT_SYMBOL_GPL(lookup_instantiate_filp); +/** + * finish_open - finish opening a file + * @od: opaque open data + * @dentry: pointer to dentry + * @open: open callback + * + * This can be used to finish opening a file passed to i_op->atomic_open(). + * + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *finish_open(struct opendata *od, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + struct file *res; + + mntget(od->mnt); + dget(dentry); + + res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + if (!IS_ERR(res)) + *od->filp = NULL; + + return res; +} +EXPORT_SYMBOL(finish_open); + +/** + * finish_no_open - finish ->atomic_open() without opening the file + * + * @od: opaque open data + * @dentry: dentry or NULL (as returned from ->lookup()) + * + * This can be used to set the result of a successful lookup in ->atomic_open(). + * The filesystem's atomic_open() method shall return NULL after calling this. + */ +void finish_no_open(struct opendata *od, struct dentry *dentry) +{ + od->dentry = dentry; +} +EXPORT_SYMBOL(finish_no_open); + /** * nameidata_to_filp - convert a nameidata to an open filp. * @nd: pointer to nameidata -- cgit v1.2.3 From 015c3bbcd88df2305aae5b017a9c91c08b380aa1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:27 +0200 Subject: vfs: remove open intents from nameidata All users of open intents have been converted to use ->atomic_{open,create}. This patch gets rid of nd->intent.open and related infrastructure. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/open.c | 87 ++------------------------------------------------------------- 1 file changed, 2 insertions(+), 85 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 13bece4f36a..937f4ec2018 100644 --- a/fs/open.c +++ b/fs/open.c @@ -770,46 +770,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, return res; } -/** - * lookup_instantiate_filp - instantiates the open intent filp - * @nd: pointer to nameidata - * @dentry: pointer to dentry - * @open: open callback - * - * Helper for filesystems that want to use lookup open intents and pass back - * a fully instantiated struct file to the caller. - * This function is meant to be called from within a filesystem's - * lookup method. - * Beware of calling it for non-regular files! Those ->open methods might block - * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, - * leading to a deadlock, as nobody can open that fifo anymore, because - * another process to open fifo will block on locked parent when doing lookup). - * Note that in case of error, nd->intent.open.file is destroyed, but the - * path information remains valid. - * If the open callback is set to NULL, then the standard f_op->open() - * filesystem callback is substituted. - */ -struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) -{ - const struct cred *cred = current_cred(); - - if (IS_ERR(nd->intent.open.file)) - goto out; - if (IS_ERR(dentry)) - goto out_err; - nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), - nd->intent.open.file, - open, cred); -out: - return nd->intent.open.file; -out_err: - release_open_intent(nd); - nd->intent.open.file = ERR_CAST(dentry); - goto out; -} -EXPORT_SYMBOL_GPL(lookup_instantiate_filp); - /** * finish_open - finish opening a file * @od: opaque open data @@ -829,9 +789,9 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, mntget(od->mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) - *od->filp = NULL; + od->filp = NULL; return res; } @@ -852,49 +812,6 @@ void finish_no_open(struct opendata *od, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/** - * nameidata_to_filp - convert a nameidata to an open filp. - * @nd: pointer to nameidata - * @flags: open flags - * - * Note that this function destroys the original nameidata - */ -struct file *nameidata_to_filp(struct nameidata *nd) -{ - const struct cred *cred = current_cred(); - struct file *filp; - - /* Pick up the filp from the open intent */ - filp = nd->intent.open.file; - - /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry != NULL) { - nd->intent.open.file = NULL; - } else { - struct file *res; - - path_get(&nd->path); - res = do_dentry_open(nd->path.dentry, nd->path.mnt, - filp, NULL, cred); - if (!IS_ERR(res)) { - int error; - - nd->intent.open.file = NULL; - BUG_ON(res != filp); - - error = open_check_o_direct(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } else { - /* Allow nd->intent.open.file to be recycled */ - filp = res; - } - } - return filp; -} - /* * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an * error. -- cgit v1.2.3 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro --- fs/open.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 937f4ec2018..89589bd3993 100644 --- a/fs/open.c +++ b/fs/open.c @@ -782,7 +782,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * filesystem callback is substituted. */ struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; @@ -790,8 +791,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + if (!IS_ERR(res)) { + *opened |= FILE_OPENED; od->filp = NULL; + } return res; } -- cgit v1.2.3 From 3d8a00d2099ebc6d5a6e95fadaf861709d9919a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:04:43 -0400 Subject: don't modify od->filp at all make put_filp() conditional on flag set by finish_open() Signed-off-by: Al Viro --- fs/open.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 89589bd3993..c87f98201c2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -786,15 +786,14 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, int *opened) { struct file *res; + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(od->mnt); dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) { + if (!IS_ERR(res)) *opened |= FILE_OPENED; - od->filp = NULL; - } return res; } -- cgit v1.2.3 From a4a3bdd778715999ddfeefdc52ab76254580fa76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:55:37 -0400 Subject: kill opendata->{mnt,dentry} ->filp->f_path is there for purpose... Signed-off-by: Al Viro --- fs/open.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index c87f98201c2..2b1654d8bfb 100644 --- a/fs/open.c +++ b/fs/open.c @@ -788,10 +788,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->mnt); + mntget(od->filp->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) *opened |= FILE_OPENED; @@ -810,7 +810,7 @@ EXPORT_SYMBOL(finish_open); */ void finish_no_open(struct opendata *od, struct dentry *dentry) { - od->dentry = dentry; + od->filp->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); -- cgit v1.2.3 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro --- fs/open.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 2b1654d8bfb..fc829d6c3a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -781,21 +781,23 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. */ -struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened) +int finish_open(struct file *file, struct dentry *dentry, + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->filp->f_path.mnt); + mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!IS_ERR(res)) { *opened |= FILE_OPENED; + return 0; + } - return res; + return PTR_ERR(res); } EXPORT_SYMBOL(finish_open); @@ -808,9 +810,9 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct opendata *od, struct dentry *dentry) +void finish_no_open(struct file *file, struct dentry *dentry) { - od->filp->f_path.dentry = dentry; + file->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); -- cgit v1.2.3 From e45198a6ac24bd2c4ad4a43b670c2f1a23dd2df3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 06:48:09 -0400 Subject: make finish_no_open() return int namely, 1 ;-) That's what we want to return from ->atomic_open() instances after finish_no_open(). Signed-off-by: Al Viro --- fs/open.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index fc829d6c3a4..d51c1b71b06 100644 --- a/fs/open.c +++ b/fs/open.c @@ -810,9 +810,10 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct file *file, struct dentry *dentry) +int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; + return 1; } EXPORT_SYMBOL(finish_no_open); -- cgit v1.2.3 From 96b7e579addd3cdc806c1667bf5b6b126070827c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:22:04 -0400 Subject: switch do_dentry_open() to returning int Signed-off-by: Al Viro --- fs/open.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index d51c1b71b06..1241c597d31 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,10 +667,10 @@ int open_check_o_direct(struct file *f) return 0; } -static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -699,7 +699,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; - return f; + return 0; } f->f_op = fops_get(inode->i_fop); @@ -726,7 +726,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - return f; + return 0; cleanup_all: fops_put(f->f_op); @@ -749,7 +749,7 @@ cleanup_all: cleanup_file: dput(dentry); mntput(mnt); - return ERR_PTR(error); + return error; } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, @@ -757,17 +757,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int (*open)(struct inode *, struct file *), const struct cred *cred) { - struct file *res = do_dentry_open(dentry, mnt, f, open, cred); - if (!IS_ERR(res)) { - int error = open_check_o_direct(f); + int error; + error = do_dentry_open(dentry, mnt, f, open, cred); + if (!error) { + error = open_check_o_direct(f); if (error) { - fput(res); - res = ERR_PTR(error); + fput(f); + f = ERR_PTR(error); } - } else { + } else { put_filp(f); + f = ERR_PTR(error); } - return res; + return f; } /** @@ -785,19 +787,17 @@ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened) { - struct file *res; + int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); - if (!IS_ERR(res)) { + error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!error) *opened |= FILE_OPENED; - return 0; - } - return PTR_ERR(res); + return error; } EXPORT_SYMBOL(finish_open); -- cgit v1.2.3 From 2a027e7a1873812240cbdac0f55c4734ff0042a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:24:38 -0400 Subject: fold __dentry_open() into its sole caller Signed-off-by: Al Viro --- fs/open.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 1241c597d31..28fbacbd5e3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -752,26 +752,6 @@ cleanup_file: return error; } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) -{ - int error; - error = do_dentry_open(dentry, mnt, f, open, cred); - if (!error) { - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); - f = ERR_PTR(error); - } - return f; -} - /** * finish_open - finish opening a file * @od: opaque open data @@ -841,7 +821,18 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - return __dentry_open(dentry, mnt, f, NULL, cred); + error = do_dentry_open(dentry, mnt, f, NULL, cred); + if (!error) { + error = open_check_o_direct(f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } else { + put_filp(f); + f = ERR_PTR(error); + } + return f; } EXPORT_SYMBOL(dentry_open); -- cgit v1.2.3 From 02e5180d991f203441687cecd0b7e6a2ba0a34d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:32:45 -0400 Subject: do_dentry_open(): take initialization of file->f_path to caller ... and get rid of a couple of arguments and a pointless reassignment in finish_open() case. Signed-off-by: Al Viro --- fs/open.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 28fbacbd5e3..124ccb1d38a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,8 +667,7 @@ int open_check_o_direct(struct file *f) return 0; } -static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, +static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { @@ -682,9 +681,9 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; - inode = dentry->d_inode; + inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { - error = __get_file_write_access(inode, mnt); + error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) @@ -692,8 +691,6 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, } f->f_mapping = inode->i_mapping; - f->f_path.dentry = dentry; - f->f_path.mnt = mnt; f->f_pos = 0; file_sb_list_add(f, inode->i_sb); @@ -740,15 +737,14 @@ cleanup_all: * here, so just reset the state. */ file_reset_write(f); - mnt_drop_write(mnt); + mnt_drop_write(f->f_path.mnt); } } file_sb_list_del(f); - f->f_path.dentry = NULL; - f->f_path.mnt = NULL; cleanup_file: - dput(dentry); - mntput(mnt); + path_put(&f->f_path); + f->f_path.mnt = NULL; + f->f_path.dentry = NULL; return error; } @@ -771,9 +767,9 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); - dget(dentry); + file->f_path.dentry = dget(dentry); - error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -821,7 +817,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - error = do_dentry_open(dentry, mnt, f, NULL, cred); + f->f_path.mnt = mnt; + f->f_path.dentry = dentry; + error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); if (error) { -- cgit v1.2.3 From c3c4f69424db0760239762d36d0b1b6ae524008b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2012 22:49:45 +0400 Subject: do_dentry_open(): close the race with mark_files_ro() in failure exit we want to take it out of mark_files_ro() reach *before* we start checking if we ought to drop write access. Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 124ccb1d38a..764cc9c201a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -727,6 +727,7 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); + file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -740,7 +741,6 @@ cleanup_all: mnt_drop_write(f->f_path.mnt); } } - file_sb_list_del(f); cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; -- cgit v1.2.3 From 55e4def0a6e79e7eb53017c4935adfed76510cd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:09 +0100 Subject: VFS: Make chown() and lchown() call fchownat() Make the chown() and lchown() syscalls jump to the fchownat() syscall with the appropriate extra arguments. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/open.c | 41 +++++++---------------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 764cc9c201a..75bea868ef8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) return error; } -SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) -{ - struct path path; - int error; - - error = user_path(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; -} - SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag) { @@ -583,23 +564,15 @@ out: return error; } -SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { - struct path path; - int error; + return sys_fchownat(AT_FDCWD, filename, user, group, 0); +} - error = user_lpath(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; +SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +{ + return sys_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) -- cgit v1.2.3 From 765927b2d508712d320c8934db963bbe14c3fcec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 21:58:53 +0400 Subject: switch dentry_open() to struct path, make it grab references itself Signed-off-by: Al Viro --- fs/open.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 75bea868ef8..1e914b397e1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -766,11 +766,7 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/* - * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an - * error. - */ -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, +struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { int error; @@ -779,19 +775,16 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, validate_creds(cred); /* We must always pass in a valid mount pointer. */ - BUG_ON(!mnt); + BUG_ON(!path->mnt); error = -ENFILE; f = get_empty_filp(); - if (f == NULL) { - dput(dentry); - mntput(mnt); + if (f == NULL) return ERR_PTR(error); - } f->f_flags = flags; - f->f_path.mnt = mnt; - f->f_path.dentry = dentry; + f->f_path = *path; + path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); -- cgit v1.2.3 From b5bcdda32736b94a7d178d156d80a69f536ad468 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2012 23:28:46 +0400 Subject: take grabbing f->f_path to do_dentry_open() Signed-off-by: Al Viro --- fs/open.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 1e914b397e1..8d2c8970029 100644 --- a/fs/open.c +++ b/fs/open.c @@ -654,6 +654,7 @@ static int do_dentry_open(struct file *f, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; + path_get(&f->f_path); inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, f->f_path.mnt); @@ -739,9 +740,7 @@ int finish_open(struct file *file, struct dentry *dentry, int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(file->f_path.mnt); - file->f_path.dentry = dget(dentry); - + file->f_path.dentry = dentry; error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -784,7 +783,6 @@ struct file *dentry_open(const struct path *path, int flags, f->f_flags = flags; f->f_path = *path; - path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); -- cgit v1.2.3 From eb04c28288bb0098d0e75d81ba2a575239de71d8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:35 +0200 Subject: fs: Add freezing handling to mnt_want_write() / mnt_drop_write() Most of places where we want freeze protection coincides with the places where we also have remount-ro protection. So make mnt_want_write() and mnt_drop_write() (and their _file alternative) prevent freezing as well. For the few cases that are really interested only in remount-ro protection provide new function variants. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 8d2c8970029..9ddc1856550 100644 --- a/fs/open.c +++ b/fs/open.c @@ -620,7 +620,7 @@ static inline int __get_file_write_access(struct inode *inode, /* * Balanced in __fput() */ - error = mnt_want_write(mnt); + error = __mnt_want_write(mnt); if (error) put_write_access(inode); } -- cgit v1.2.3 From 14da9200140f8d722ad1767dfabadebd8b34f2ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Jun 2012 16:20:37 +0200 Subject: fs: Protect write paths by sb_start_write - sb_end_write There are several entry points which dirty pages in a filesystem. mmap (handled by block_page_mkwrite()), buffered write (handled by __generic_file_aio_write()), splice write (generic_file_splice_write), truncate, and fallocate (these can dirty last partial page - handled inside each filesystem separately). Protect these places with sb_start_write() and sb_end_write(). ->page_mkwrite() calls are particularly complex since they are called with mmap_sem held and thus we cannot use standard sb_start_write() due to lock ordering constraints. We solve the problem by using a special freeze protection sb_start_pagefault() which ranks below mmap_sem. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa Tested-by: Peter M. Petrakis Tested-by: Dann Frazier Tested-by: Massimo Morana Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/open.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 9ddc1856550..f3d96e7e7b1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) if (IS_APPEND(inode)) goto out_putf; + sb_start_write(inode->i_sb); error = locks_verify_truncate(inode, file, length); if (!error) error = security_path_truncate(&file->f_path); if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + sb_end_write(inode->i_sb); out_putf: fput(file); out: @@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!file->f_op->fallocate) return -EOPNOTSUPP; - return file->f_op->fallocate(file, mode, offset, len); + sb_start_write(inode->i_sb); + ret = file->f_op->fallocate(file, mode, offset, len); + sb_end_write(inode->i_sb); + return ret; } SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) -- cgit v1.2.3 From fe7c80518e34d1786f4a940ce673a0bfcbe53298 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Aug 2012 08:39:23 +0400 Subject: missed mnt_drop_write() in do_dentry_open() This one ought to be __mnt_drop_write(), to match __mnt_want_write() in the beginning... Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index f3d96e7e7b1..bc132e167d2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -717,7 +717,7 @@ cleanup_all: * here, so just reset the state. */ file_reset_write(f); - mnt_drop_write(f->f_path.mnt); + __mnt_drop_write(f->f_path.mnt); } } cleanup_file: -- cgit v1.2.3 From e68726ff72cf7ba5e7d789857fcd9a75ca573f03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 15 Aug 2012 13:01:24 +0200 Subject: vfs: canonicalize create mode in build_open_flags() Userspace can pass weird create mode in open(2) that we canonicalize to "(mode & S_IALLUGO) | S_IFREG" in vfs_create(). The problem is that we use the uncanonicalized mode before calling vfs_create() with unforseen consequences. So do the canonicalization early in build_open_flags(). Signed-off-by: Miklos Szeredi Tested-by: Richard W.M. Jones CC: stable@vger.kernel.org --- fs/open.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index bc132e167d2..e1f2cdb91a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -852,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; -- cgit v1.2.3 From d2b31ca644fdc8704de3367a6a56a5c958c77f53 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 1 Jun 2012 16:14:19 -0600 Subject: userns: Teach security_path_chown to take kuids and kgids Don't make the security modules deal with raw user space uid and gids instead pass in a kuid_t and a kgid_t so that security modules only have to deal with internal kernel uids and gids. Cc: Al Viro Cc: James Morris Cc: John Johansen Cc: Kentaro Takeda Cc: Tetsuo Handa Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index f3d96e7e7b1..2b2573980d0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -534,7 +534,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); - error = security_path_chown(path, user, group); + error = security_path_chown(path, uid, gid); if (!error) error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); -- cgit v1.2.3 From 56007cae94f349387c088e738c7dcb6bc513063b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 Aug 2012 21:03:26 -0400 Subject: move put_unused_fd() and fd_install() to fs/file.c Signed-off-by: Al Viro --- fs/open.c | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index e1f2cdb91a4..c525bd0e65b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -803,50 +803,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -static void __put_unused_fd(struct files_struct *files, unsigned int fd) -{ - struct fdtable *fdt = files_fdtable(files); - __clear_open_fd(fd, fdt); - if (fd < files->next_fd) - files->next_fd = fd; -} - -void put_unused_fd(unsigned int fd) -{ - struct files_struct *files = current->files; - spin_lock(&files->file_lock); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(put_unused_fd); - -/* - * Install a file pointer in the fd array. - * - * The VFS is full of places where we drop the files lock between - * setting the open_fds bitmap and installing the file in the file - * array. At any such point, we are vulnerable to a dup2() race - * installing a file in the array before us. We need to detect this and - * fput() the struct file we are about to overwrite in this case. - * - * It should never happen - if we allow dup2() do it, _really_ bad things - * will follow. - */ - -void fd_install(unsigned int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); - rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); -} - -EXPORT_SYMBOL(fd_install); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; -- cgit v1.2.3 From 483ce1d4b8c3b82bc9c9a1dd9dbc44f50b3aaf5a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Aug 2012 12:04:24 -0400 Subject: take descriptor-related part of close() to file.c Signed-off-by: Al Viro --- fs/open.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index c525bd0e65b..30760017dee 100644 --- a/fs/open.c +++ b/fs/open.c @@ -994,23 +994,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - struct file * filp; - struct files_struct *files = current->files; - struct fdtable *fdt; - int retval; - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (fd >= fdt->max_fds) - goto out_unlock; - filp = fdt->fd[fd]; - if (!filp) - goto out_unlock; - rcu_assign_pointer(fdt->fd[fd], NULL); - __clear_close_on_exec(fd, fdt); - __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); - retval = filp_close(filp, files); + int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -1020,10 +1004,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd) retval = -EINTR; return retval; - -out_unlock: - spin_unlock(&files->file_lock); - return -EBADF; } EXPORT_SYMBOL(sys_close); -- cgit v1.2.3 From c6f3d81115989e274c42a852222b80d2e14ced6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 11:01:04 -0400 Subject: don't leak O_CLOEXEC into ->f_flags Signed-off-by: Al Viro --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 30760017dee..03028d0e748 100644 --- a/fs/open.c +++ b/fs/open.c @@ -814,7 +814,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o op->mode = 0; /* Must never be set by userspace */ - flags &= ~FMODE_NONOTIFY; + flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only -- cgit v1.2.3 From bf2965d5b5950d09e934ea5d961d79d0ed1fae7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:13:36 -0400 Subject: switch ftruncate(2) to fget_light Signed-off-by: Al Viro --- fs/open.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 03028d0e748..9f61d7269d3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -132,16 +132,16 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { - struct inode * inode; + struct inode *inode; struct dentry *dentry; - struct file * file; - int error; + struct file *file; + int error, fput_needed; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; @@ -172,7 +172,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); sb_end_write(inode->i_sb); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3 From 6b48c5b2079af1f81d8f249ae07a988d8c45b32f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:15:40 -0400 Subject: switch fallocate(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 9f61d7269d3..da6d3f1ac24 100644 --- a/fs/open.c +++ b/fs/open.c @@ -277,12 +277,12 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { struct file *file; - int error = -EBADF; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { error = do_fallocate(file, mode, offset, len); - fput(file); + fput_light(file, fput_needed); } return error; -- cgit v1.2.3 From d6483b7a78438bc333560d11b69e6a6a6cf55940 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 26 Aug 2012 20:22:10 -0400 Subject: switch fchmod(2) to fget_light() Signed-off-by: Al Viro --- fs/open.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index da6d3f1ac24..3c741eae6b9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -582,23 +582,21 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file * file; - int error = -EBADF; - struct dentry * dentry; + struct file *file; + int error = -EBADF, fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) goto out; error = mnt_want_write_file(file); if (error) goto out_fput; - dentry = file->f_path.dentry; - audit_inode(NULL, dentry); + audit_inode(NULL, file->f_path.dentry); error = chown_common(&file->f_path, user, group); mnt_drop_write_file(file); out_fput: - fput(file); + fput_light(file, fput_needed); out: return error; } -- cgit v1.2.3 From 2903ff019b346ab8d36ebbf54853c3aaf6590608 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Aug 2012 12:52:22 -0400 Subject: switch simple cases of fget_light to fdget Signed-off-by: Al Viro --- fs/open.c | 64 ++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 3c741eae6b9..85603262d8d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -134,25 +134,25 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) { struct inode *inode; struct dentry *dentry; - struct file *file; - int error, fput_needed; + struct fd f; + int error; error = -EINVAL; if (length < 0) goto out; error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) goto out; /* explicitly opened as large or we are on 64-bit box */ - if (file->f_flags & O_LARGEFILE) + if (f.file->f_flags & O_LARGEFILE) small = 0; - dentry = file->f_path.dentry; + dentry = f.file->f_path.dentry; inode = dentry->d_inode; error = -EINVAL; - if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) + if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) goto out_putf; error = -EINVAL; @@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) goto out_putf; sb_start_write(inode->i_sb); - error = locks_verify_truncate(inode, file, length); + error = locks_verify_truncate(inode, f.file, length); if (!error) - error = security_path_truncate(&file->f_path); + error = security_path_truncate(&f.file->f_path); if (!error) - error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); + error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); sb_end_write(inode->i_sb); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (file) { - error = do_fallocate(file, mode, offset, len); - fput_light(file, fput_needed); + if (f.file) { + error = do_fallocate(f.file, mode, offset, len); + fdput(f); } - return error; } @@ -400,16 +398,15 @@ out: SYSCALL_DEFINE1(fchdir, unsigned int, fd) { - struct file *file; + struct fd f = fdget_raw(fd); struct inode *inode; - int error, fput_needed; + int error = -EBADF; error = -EBADF; - file = fget_raw_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - inode = file->f_path.dentry->d_inode; + inode = f.file->f_path.dentry->d_inode; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); if (!error) - set_fs_pwd(current->fs, &file->f_path); + set_fs_pwd(current->fs, &f.file->f_path); out_putf: - fput_light(file, fput_needed); + fdput(f); out: return error; } @@ -582,21 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { - struct file *file; - int error = -EBADF, fput_needed; + struct fd f = fdget(fd); + int error = -EBADF; - file = fget_light(fd, &fput_needed); - if (!file) + if (!f.file) goto out; - error = mnt_want_write_file(file); + error = mnt_want_write_file(f.file); if (error) goto out_fput; - audit_inode(NULL, file->f_path.dentry); - error = chown_common(&file->f_path, user, group); - mnt_drop_write_file(file); + audit_inode(NULL, f.file->f_path.dentry); + error = chown_common(&f.file->f_path, user, group); + mnt_drop_write_file(f.file); out_fput: - fput_light(file, fput_needed); + fdput(f); out: return error; } -- cgit v1.2.3 From bfcec7087458812f575d9022b2d151641f34ee84 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:23 -0400 Subject: audit: set the name_len in audit_inode for parent lookups Currently, this gets set mostly by happenstance when we call into audit_inode_child. While that might be a little more efficient, it seems wrong. If the syscall ends up failing before audit_inode_child ever gets called, then you'll have an audit_names record that shows the full path but has the parent inode info attached. Fix this by passing in a parent flag when we call audit_inode that gets set to the value of LOOKUP_PARENT. We can then fix up the pathname for the audit entry correctly from the get-go. While we're at it, clean up the no-op macro for audit_inode in the !CONFIG_AUDITSYSCALL case. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 44da0feeca2..a015437e153 100644 --- a/fs/open.c +++ b/fs/open.c @@ -478,7 +478,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) file = fget(fd); if (file) { - audit_inode(NULL, file->f_path.dentry); + audit_inode(NULL, file->f_path.dentry, 0); err = chmod_common(&file->f_path, mode); fput(file); } @@ -588,7 +588,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) error = mnt_want_write_file(f.file); if (error) goto out_fput; - audit_inode(NULL, f.file->f_path.dentry); + audit_inode(NULL, f.file->f_path.dentry, 0); error = chown_common(&f.file->f_path, user, group); mnt_drop_write_file(f.file); out_fput: -- cgit v1.2.3 From 91a27b2a756784714e924e5e854b919273082d26 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 15:25:28 -0400 Subject: vfs: define struct filename and have getname() return it getname() is intended to copy pathname strings from userspace into a kernel buffer. The result is just a string in kernel space. It would however be quite helpful to be able to attach some ancillary info to the string. For instance, we could attach some audit-related info to reduce the amount of audit-related processing needed. When auditing is enabled, we could also call getname() on the string more than once and not need to recopy it from userspace. This patchset converts the getname()/putname() interfaces to return a struct instead of a string. For now, the struct just tracks the string in kernel space and the original userland pointer for it. Later, we'll add other information to the struct as it becomes convenient. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index a015437e153..81dd92ac10f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -895,13 +895,13 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; int lookup = build_open_flags(flags, mode, &op); - char *tmp = getname(filename); + struct filename *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, &op, lookup); + struct file *f = do_filp_open(dfd, tmp->name, &op, lookup); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); -- cgit v1.2.3 From 669abf4e5539c8aa48bf28c965be05c0a7b58a27 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Oct 2012 16:43:10 -0400 Subject: vfs: make path_openat take a struct filename pointer ...and fix up the callers. For do_file_open_root, just declare a struct filename on the stack and fill out the .name field. For do_filp_open, make it also take a struct filename pointer, and fix up its callers to call it appropriately. For filp_open, add a variant that takes a struct filename pointer and turn filp_open into a wrapper around it. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 81dd92ac10f..59071f55bf7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -858,6 +858,24 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o return lookup_flags; } +/** + * file_open_name - open file and return file pointer + * + * @name: struct filename containing path to open + * @flags: open flags as per the open(2) second argument + * @mode: mode for the new file if O_CREAT is set, else ignored + * + * This is the helper to open a file from kernelspace if you really + * have to. But in generally you should not do this, so please move + * along, nothing to see here.. + */ +struct file *file_open_name(struct filename *name, int flags, umode_t mode) +{ + struct open_flags op; + int lookup = build_open_flags(flags, mode, &op); + return do_filp_open(AT_FDCWD, name, &op, lookup); +} + /** * filp_open - open file and return file pointer * @@ -871,9 +889,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o */ struct file *filp_open(const char *filename, int flags, umode_t mode) { - struct open_flags op; - int lookup = build_open_flags(flags, mode, &op); - return do_filp_open(AT_FDCWD, filename, &op, lookup); + struct filename name = {.name = filename}; + return file_open_name(&name, flags, mode); } EXPORT_SYMBOL(filp_open); @@ -901,7 +918,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp->name, &op, lookup); + struct file *f = do_filp_open(dfd, tmp, &op, lookup); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); -- cgit v1.2.3 From a85fb273c94648cbf20a5f9bcf8bbbb075f271ad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 31 Jul 2012 01:14:12 -0700 Subject: vfs: Allow chroot if you have CAP_SYS_CHROOT in your user namespace Once you are confined to a user namespace applications can not gain privilege and escape the user namespace so there is no longer a reason to restrict chroot. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 59071f55bf7..182d8667b7b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) goto dput_and_out; error = -EPERM; - if (!capable(CAP_SYS_CHROOT)) + if (!nsown_capable(CAP_SYS_CHROOT)) goto dput_and_out; error = security_path_chroot(&path); if (error) -- cgit v1.2.3 From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: VFS: Make more complete truncate operation available to CacheFiles Make a more complete truncate operation available to CacheFiles (including security checks and suchlike) so that it can use this to clear invalidated cache files. Signed-off-by: David Howells Acked-by: Al Viro --- fs/open.c | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 182d8667b7b..c819bbdab47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return ret; } -static long do_sys_truncate(const char __user *pathname, loff_t length) +long vfs_truncate(struct path *path, loff_t length) { - struct path path; struct inode *inode; - int error; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; + long error; - error = user_path(pathname, &path); - if (error) - goto out; - inode = path.dentry->d_inode; + inode = path->dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ - error = -EISDIR; if (S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = -EINVAL; + return -EISDIR; if (!S_ISREG(inode->i_mode)) - goto dput_and_out; + return -EINVAL; - error = mnt_want_write(path.mnt); + error = mnt_want_write(path->mnt); if (error) - goto dput_and_out; + goto out; error = inode_permission(inode, MAY_WRITE); if (error) @@ -111,19 +100,34 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) - error = security_path_truncate(&path); + error = security_path_truncate(path); if (!error) - error = do_truncate(path.dentry, length, 0, NULL); + error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: - mnt_drop_write(path.mnt); -dput_and_out: - path_put(&path); + mnt_drop_write(path->mnt); out: return error; } +EXPORT_SYMBOL_GPL(vfs_truncate); + +static long do_sys_truncate(const char __user *pathname, loff_t length) +{ + struct path path; + int error; + + if (length < 0) /* sorry, but loff_t says... */ + return -EINVAL; + + error = user_path(pathname, &path); + if (!error) { + error = vfs_truncate(&path, length); + path_put(&path); + } + return error; +} SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { -- cgit v1.2.3 From 48f7530d3f722617aa7cfea62b09b0c1a8d0173e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:11 -0500 Subject: vfs: have do_sys_truncate retry once on an ESTALE error Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index c819bbdab47..07449b911a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -115,17 +115,23 @@ EXPORT_SYMBOL_GPL(vfs_truncate); static long do_sys_truncate(const char __user *pathname, loff_t length) { + unsigned int lookup_flags = LOOKUP_FOLLOW; struct path path; int error; if (length < 0) /* sorry, but loff_t says... */ return -EINVAL; - error = user_path(pathname, &path); +retry: + error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (!error) { error = vfs_truncate(&path, length); path_put(&path); } + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } return error; } -- cgit v1.2.3 From 87fa55952b7347175c6e2f03874869ad2c055adb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:11 -0500 Subject: vfs: have faccessat retry once on an ESTALE error Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 07449b911a4..a994ccf39b4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -316,6 +316,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) struct path path; struct inode *inode; int res; + unsigned int lookup_flags = LOOKUP_FOLLOW; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; @@ -338,8 +339,8 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) } old_cred = override_creds(override_cred); - - res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); +retry: + res = user_path_at(dfd, filename, lookup_flags, &path); if (res) goto out; @@ -374,6 +375,10 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) out_path_release: path_put(&path); + if (retry_estale(res, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: revert_creds(old_cred); put_cred(override_cred); -- cgit v1.2.3 From 0291c0a551d5b0856627f2cb294da05f122414a0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:12 -0500 Subject: vfs: have chdir retry lookup and call once on ESTALE error Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index a994ccf39b4..402dfcb6720 100644 --- a/fs/open.c +++ b/fs/open.c @@ -394,8 +394,9 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; - - error = user_path_dir(filename, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: + error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; @@ -407,6 +408,10 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) dput_and_out: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } -- cgit v1.2.3 From 2771261ec5b677a38f0cd5fcfc6cefd5393787ef Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Dec 2012 17:08:32 -0500 Subject: vfs: have chroot retry once on ESTALE error Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 402dfcb6720..a13a54d3e69 100644 --- a/fs/open.c +++ b/fs/open.c @@ -445,8 +445,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; - - error = user_path_dir(filename, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +retry: + error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); if (error) goto out; @@ -465,6 +466,10 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) error = 0; dput_and_out: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } -- cgit v1.2.3 From 14ff690c0f94cf2e37f7c448f4f09bf0b4006d62 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:13 -0500 Subject: vfs: make fchmodat retry once on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index a13a54d3e69..99c3ce5f897 100644 --- a/fs/open.c +++ b/fs/open.c @@ -514,11 +514,16 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode { struct path path; int error; - - error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); + unsigned int lookup_flags = LOOKUP_FOLLOW; +retry: + error = user_path_at(dfd, filename, lookup_flags, &path); if (!error) { error = chmod_common(&path, mode); path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } } return error; } -- cgit v1.2.3 From 99a5df37a03c99e57d0da4f847a515b658963fbb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Dec 2012 12:10:13 -0500 Subject: vfs: make fchownat retry once on ESTALE errors Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- fs/open.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 99c3ce5f897..9b33c0cbfac 100644 --- a/fs/open.c +++ b/fs/open.c @@ -582,6 +582,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; +retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; @@ -592,6 +593,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, mnt_drop_write(path.mnt); out_release: path_put(&path); + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } out: return error; } -- cgit v1.2.3