From 3819219b592159725069eb16a7a46f58e4ecef32 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:24 +0200 Subject: ceph: remove unused arg from ceph_lookup_open() What was the purpose of this? Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro --- fs/ceph/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 988d4f302e4..4bf9773e6a3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -219,8 +219,7 @@ out: * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE */ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode, - int locked_dir) + struct nameidata *nd, int mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; -- cgit v1.2.3 From 2d83bde9a16e18eafdc73a3a1f4a8eb110e49672 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:25 +0200 Subject: ceph: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic lookup+open+create operation implemented via ->lookup and ->create operations. Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro --- fs/ceph/file.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4bf9773e6a3..e34dc22e75a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,21 +213,15 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -/* - * flags - * path_lookup_open -> LOOKUP_OPEN - * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE - */ -struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode) +struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file; + struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; - int flags = nd->intent.open.flags; dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); @@ -253,14 +247,19 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open); if (IS_ERR(file)) err = PTR_ERR(file); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); dout("ceph_lookup_open result=%p\n", ret); - return ret; + + if (IS_ERR(ret)) + return ERR_CAST(ret); + + dput(ret); + return err ? ERR_PTR(err) : file; } int ceph_release(struct inode *inode, struct file *file) -- cgit v1.2.3 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro --- fs/ceph/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e34dc22e75a..4c304a90d04 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,7 +214,8 @@ out: * ceph_release gets called). So fear not! */ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode) + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -247,7 +248,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open, opened); if (IS_ERR(file)) err = PTR_ERR(file); out: -- cgit v1.2.3 From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:39:14 +0400 Subject: make ->atomic_open() return int Change of calling conventions: old new NULL 1 file 0 ERR_PTR(-ve) -ve Caller *knows* that struct file *; no need to return it. Signed-off-by: Al Viro --- fs/ceph/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4c304a90d04..b8cc3ee5401 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,9 +213,9 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, - int *opened) +int ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -230,7 +230,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_CAST(req); + return PTR_ERR(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { @@ -257,10 +257,10 @@ out: dout("ceph_lookup_open result=%p\n", ret); if (IS_ERR(ret)) - return ERR_CAST(ret); + return PTR_ERR(ret); dput(ret); - return err ? ERR_PTR(err) : file; + return err; } int ceph_release(struct inode *inode, struct file *file) -- cgit v1.2.3 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro --- fs/ceph/file.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b8cc3ee5401..1b81d6c3187 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,12 +214,11 @@ out: * ceph_release gets called). So fear not! */ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; @@ -248,9 +247,7 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open, opened); - if (IS_ERR(file)) - err = PTR_ERR(file); + err = finish_open(file, req->r_dentry, ceph_open, opened); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); -- cgit v1.2.3 From 5ef50c3bec20060bc114f62d6503c5d86d70bdd7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 31 Jul 2012 11:27:36 -0700 Subject: ceph: simplify+fix atomic_open The initial ->atomic_open op was carried over from the old intent code, which was incomplete and didn't really work. Replace it with a fresh method. In particular: * always attempt to do an atomic open+lookup, both for the create case and for lookups of existing files. * fix symlink handling by returning 1 to the VFS so that we can follow the link to its destination. This fixes a longstanding ceph bug (#2392). Signed-off-by: Sage Weil --- fs/ceph/file.c | 62 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 1b81d6c3187..ecebbc09bfc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -106,9 +107,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) } /* - * If the filp already has private_data, that means the file was - * already opened by intent during lookup, and we do nothing. - * * If we already have the requisite capabilities, we can satisfy * the open request locally (no need to request new caps from the * MDS). We do, however, need to inform the MDS (asynchronously) @@ -207,24 +205,29 @@ out: /* - * Do a lookup + open with a single request. - * - * If this succeeds, but some subsequent check in the vfs - * may_open() fails, the struct *file gets cleaned up (i.e. - * ceph_release gets called). So fear not! + * Do a lookup + open with a single request. If we get a non-existent + * file or symlink, return 1 so the VFS can retry. */ -int ceph_lookup_open(struct inode *dir, struct dentry *dentry, +int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct dentry *ret; + struct dentry *dn; int err; - dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", - dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); + dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", + dir, dentry, dentry->d_name.len, dentry->d_name.name, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + + if (dentry->d_name.len > NAME_MAX) + return -ENAMETOOLONG; + + err = ceph_init_dentry(dentry); + if (err < 0) + return err; /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); @@ -241,22 +244,31 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); err = ceph_handle_snapdir(req, dentry, err); - if (err) - goto out; - if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (err) - goto out; - err = finish_open(file, req->r_dentry, ceph_open, opened); -out: - ret = ceph_finish_lookup(req, dentry, err); - ceph_mdsc_put_request(req); - dout("ceph_lookup_open result=%p\n", ret); - if (IS_ERR(ret)) - return PTR_ERR(ret); + if (d_unhashed(dentry)) { + dn = ceph_finish_lookup(req, dentry, err); + if (IS_ERR(dn)) + err = PTR_ERR(dn); + } else { + /* we were given a hashed negative dentry */ + dn = NULL; + } + if (err) + goto out_err; + if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) { + /* make vfs retry on splice, ENOENT, or symlink */ + dout("atomic_open finish_no_open on dn %p\n", dn); + err = finish_no_open(file, dn); + } else { + dout("atomic_open finish_open on dn %p\n", dn); + err = finish_open(file, dentry, ceph_open, opened); + } - dput(ret); +out_err: + ceph_mdsc_put_request(req); + dout("atomic_open result=%d\n", err); return err; } -- cgit v1.2.3 From 6816282dab3a72efe8c0d182c1bc2960d87f4322 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:01:02 -0700 Subject: ceph: propagate layout error on osd request creation If we are creating an osd request and get an invalid layout, return an EINVAL to the caller. We switch up the return to have an error code instead of NULL implying -ENOMEM. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ecebbc09bfc..5840d2aaed1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -536,8 +536,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); -- cgit v1.2.3 From 22cddde104d715600a4c218bf9224923208afe90 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 5 Nov 2012 11:07:23 -0800 Subject: ceph: Fix i_size update race ceph_aio_write() has an optimization that marks cap EPH_CAP_FILE_WR dirty before data is copied to page cache and inode size is updated. If ceph_check_caps() flushes the dirty cap before the inode size is updated, MDS can miss the new inode size. The fix is move ceph_{get,put}_cap_refs() into ceph_write_{begin,end}() and call __ceph_mark_dirty_caps() after inode size is updated. Signed-off-by: Yan, Zheng Signed-off-by: Sage Weil --- fs/ceph/file.c | 73 ++++++++++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5840d2aaed1..d415096800a 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -712,63 +712,53 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; loff_t endoff = pos + iov->iov_len; - int want, got = 0; - int ret, err; + int got = 0; + int ret, err, written; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; retry_snap: + written = 0; if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) return -ENOSPC; __ceph_do_pending_vmtruncate(inode); - dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - inode->i_size); - if (fi->fmode & CEPH_FILE_MODE_LAZY) - want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; - else - want = CEPH_CAP_FILE_BUFFER; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); - if (ret < 0) - goto out_put; - - dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - ceph_cap_string(got)); - - if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_filp->f_flags & O_DIRECT) || - (inode->i_sb->s_flags & MS_SYNCHRONOUS) || - (fi->flags & CEPH_F_SYNC)) { - ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, - &iocb->ki_pos); - } else { - /* - * buffered write; drop Fw early to avoid slow - * revocation if we get stuck on balance_dirty_pages - */ - int dirty; - - spin_lock(&ci->i_ceph_lock); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); - spin_unlock(&ci->i_ceph_lock); - ceph_put_cap_refs(ci, got); + /* + * try to do a buffered write. if we don't have sufficient + * caps, we'll get -EAGAIN from generic_file_aio_write, or a + * short write if we only get caps for some pages. + */ + if (!(iocb->ki_filp->f_flags & O_DIRECT) && + !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && + !(fi->flags & CEPH_F_SYNC)) { ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + if (ret >= 0) + written = ret; + if ((ret >= 0 || ret == -EIOCBQUEUED) && ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { - err = vfs_fsync_range(file, pos, pos + ret - 1, 1); + err = vfs_fsync_range(file, pos, pos + written - 1, 1); if (err < 0) ret = err; } + if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff) + goto out; + } - if (dirty) - __mark_inode_dirty(inode, dirty); + dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, inode->i_size); + ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff); + if (ret < 0) goto out; - } + dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, ceph_cap_string(got)); + ret = ceph_sync_write(file, iov->iov_base + written, + iov->iov_len - written, &iocb->ki_pos); if (ret >= 0) { int dirty; spin_lock(&ci->i_ceph_lock); @@ -777,13 +767,10 @@ retry_snap: if (dirty) __mark_inode_dirty(inode, dirty); } - -out_put: dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, - ceph_cap_string(got)); + inode, ceph_vinop(inode), pos + written, + (unsigned)iov->iov_len - written, ceph_cap_string(got)); ceph_put_cap_refs(ci, got); - out: if (ret == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", -- cgit v1.2.3 From 965c8e59cfcf845ecde2265a1d1bfee5f011d302 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 17 Dec 2012 15:59:39 -0800 Subject: lseek: the "whence" argument is called "whence" But the kernel decided to call it "origin" instead. Fix most of the sites. Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ceph/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5840d2aaed1..d4dfdcf76d7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -797,7 +797,7 @@ out: /* * llseek. be sure to verify file size on SEEK_END. */ -static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) +static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; int ret; @@ -805,7 +805,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&inode->i_mutex); __ceph_do_pending_vmtruncate(inode); - if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) { + if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); if (ret < 0) { offset = ret; @@ -813,7 +813,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin) } } - switch (origin) { + switch (whence) { case SEEK_END: offset += inode->i_size; break; -- cgit v1.2.3