From c98f533c9497e285109a047bfb955d683f33f7e4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 9 Aug 2012 10:33:26 -0700 Subject: ceph: let path portion of mount "device" be optional A recent change to /sbin/mountall causes any trailing '/' character in the "device" (or fs_spec) field in /etc/fstab to be stripped. As a result, an entry for a ceph mount that intends to mount the root of the name space ends up with now path portion, and the ceph mount option processing code rejects this. That is, an entry in /etc/fstab like: cephserver:port:/ /mnt ceph defaults 0 0 provides to the ceph code just "cephserver:port:" as the "device," and that gets rejected. Although this is a bug in /sbin/mountall, we can have the ceph mount code support an empty/nonexistent path, interpreting it to mean the root of the name space. RFC 5952 offers recommendations for how to express IPv6 addresses, and recommends the usage found in RFC 3986 (which specifies the format for URI's) for representing both IPv4 and IPv6 addresses that include port numbers. (See in particular the definition of "authority" found in the Appendix of RFC 3986.) According to those standards, no host specification will ever contain a '/' character. As a result, it is sufficient to scan a provided "device" from an /etc/fstab entry for the first '/' character, and if it's found, treat that as the beginning of the path. If no '/' character is present, we can treat the entire string as the monitor host specification(s), and assume the path to be the root of the name space. We'll still require a ':' to separate the host portion from the (possibly empty) path portion. This means that we can more formally define how ceph will interpret the "device" it's provided when processing a mount request: "device" will look like: [,...]:[] where is [:] is optional, but if present must begin with '/' This addresses http://tracker.newdream.net/issues/2919 Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- fs/ceph/super.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f..2f586b0e5e0 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -307,7 +307,10 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, { struct ceph_mount_options *fsopt; const char *dev_name_end; - int err = -ENOMEM; + int err; + + if (!dev_name || !*dev_name) + return -EINVAL; fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); if (!fsopt) @@ -328,21 +331,33 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; fsopt->congestion_kb = default_congestion_kb(); - /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ + /* + * Distinguish the server list from the path in "dev_name". + * Internally we do not include the leading '/' in the path. + * + * "dev_name" will look like: + * [,...]:[] + * where + * is [:] + * is optional, but if present must begin with '/' + */ + dev_name_end = strchr(dev_name, '/'); + if (dev_name_end) { + /* skip over leading '/' for path */ + *path = dev_name_end + 1; + } else { + /* path is empty */ + dev_name_end = dev_name + strlen(dev_name); + *path = dev_name_end; + } err = -EINVAL; - if (!dev_name) - goto out; - *path = strstr(dev_name, ":/"); - if (*path == NULL) { - pr_err("device name is missing path (no :/ in %s)\n", + dev_name_end--; /* back up to ':' separator */ + if (*dev_name_end != ':') { + pr_err("device name is missing path (no : separator in %s)\n", dev_name); goto out; } - dev_name_end = *path; dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); - - /* path on server */ - *path += 2; dout("server path '%s'\n", *path); *popt = ceph_parse_options(options, dev_name, dev_name_end, -- cgit v1.2.3 From 3e8f43a089f06279c5f76a9ccd42578eebf7bfa5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 20 Sep 2012 17:42:25 +0800 Subject: ceph: Fix oops when handling mdsmap that decreases max_mds When i >= newmap->m_max_mds, ceph_mdsmap_get_addr(newmap, i) return NULL. Passing NULL to memcmp() triggers oops. Signed-off-by: Yan, Zheng Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a5a735422aa..1bcf712655d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2625,7 +2625,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", session_state_name(s->s_state)); - if (memcmp(ceph_mdsmap_get_addr(oldmap, i), + if (i >= newmap->m_max_mds || + memcmp(ceph_mdsmap_get_addr(oldmap, i), ceph_mdsmap_get_addr(newmap, i), sizeof(struct ceph_entity_addr))) { if (s->s_state == CEPH_MDS_SESSION_OPENING) { -- cgit v1.2.3 From b905a7f8b7a61c192927d0324f2ea6c998f451ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Sep 2012 12:59:16 +0800 Subject: ceph: convert to use le32_add_cpu() Convert cpu_to_le32(le32_to_cpu(E1) + E2) to use le32_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 620daad201d..3251e9cc640 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1005,7 +1005,7 @@ static void __queue_cap_release(struct ceph_mds_session *session, BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + le32_add_cpu(&head->num, 1); item = msg->front.iov_base + msg->front.iov_len; item->ino = cpu_to_le64(ino); item->cap_id = cpu_to_le64(cap_id); -- cgit v1.2.3 From 6816282dab3a72efe8c0d182c1bc2960d87f4322 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:01:02 -0700 Subject: ceph: propagate layout error on osd request creation If we are creating an osd request and get an invalid layout, return an EINVAL to the caller. We switch up the return to have an error code instead of NULL implying -ENOMEM. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/addr.c | 8 ++++---- fs/ceph/file.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 452e71a1b75..4469b63c9b7 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -308,8 +308,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, NULL, false, 1, 0); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* build page vector */ nr_pages = len >> PAGE_CACHE_SHIFT; @@ -832,8 +832,8 @@ get_more_pages: ci->i_truncate_size, &inode->i_mtime, true, 1, 0); - if (!req) { - rc = -ENOMEM; + if (IS_ERR(req)) { + rc = PTR_ERR(req); unlock_page(page); break; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ecebbc09bfc..5840d2aaed1 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -536,8 +536,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); -- cgit v1.2.3 From 457712a0bc5389b75d2c93840a684fd77df2aabb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 24 Sep 2012 21:04:57 -0700 Subject: ceph: return EIO on invalid layout on GET_DATALOC ioctl If the user calls GET_DATALOC on a file with an invalid (e.g., zeroed) layout, return EIO to userland. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/ioctl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 1396ceb4679..36549a46e31 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -187,14 +187,18 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) u64 tmp; struct ceph_object_layout ol; struct ceph_pg pgid; + int r; /* copy and validate */ if (copy_from_user(&dl, arg, sizeof(dl))) return -EFAULT; down_read(&osdc->map_sem); - ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, - &dl.object_no, &dl.object_offset, &olen); + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + &dl.object_no, &dl.object_offset, + &olen); + if (r < 0) + return -EIO; dl.file_offset -= dl.object_offset; dl.object_size = ceph_file_layout_object_size(ci->i_layout); dl.block_size = ceph_file_layout_su(ci->i_layout); -- cgit v1.2.3 From 6285bc231277419255f3498d3eb5ddc9f8e7fe79 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 2 Oct 2012 10:25:51 -0500 Subject: ceph: avoid 32-bit page index overflow A pgoff_t is defined (by default) to have type (unsigned long). On architectures such as i686 that's a 32-bit type. The ceph address space code was attempting to produce 64 bit offsets by shifting a page's index by PAGE_CACHE_SHIFT, but the result was not what was desired because the shift occurred before the result got promoted to 64 bits. Fix this by converting all uses of page->index used in this way to use the page_offset() macro, which ensures the 64-bit result has the intended value. This fixes http://tracker.newdream.net/issues/3112 Reported-by: Mohamed Pakkeer Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- fs/ceph/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4469b63c9b7..22b6e4583fa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -205,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - page->index << PAGE_CACHE_SHIFT, &len, + (u64) page_offset(page), &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -286,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int nr_pages = 0; int ret; - off = page->index << PAGE_CACHE_SHIFT; + off = (u64) page_offset(page); /* count pages */ next_index = page->index; @@ -426,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_inode_info *ci; struct ceph_fs_client *fsc; struct ceph_osd_client *osdc; - loff_t page_off = page->index << PAGE_CACHE_SHIFT; + loff_t page_off = page_offset(page); int len = PAGE_CACHE_SIZE; loff_t i_size; int err = 0; @@ -817,8 +817,7 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = (unsigned long long)page->index - << PAGE_CACHE_SHIFT; + offset = (u64) page_offset(page); len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, @@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = vmf->page; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - loff_t off = page->index << PAGE_CACHE_SHIFT; + loff_t off = page_offset(page); loff_t size, len; int ret; -- cgit v1.2.3