diff options
author | Patrick McHardy <kaber@trash.net> | 2013-03-31 18:10:34 +0200 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2013-03-31 18:10:34 +0200 |
commit | 70711d223510ba1773cfe1d7770a56141c815ff8 (patch) | |
tree | 4a71f38a3a554ddecaa31b7d8c6bc49b7d1705b4 /fs/nfs | |
parent | d53b4ed072d9779cdf53582c46436dec06d0961f (diff) | |
parent | 19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff) |
Merge tag 'v3.8' of /home/kaber/src/repos/linux
Linux 3.8
Signed-off-by: Patrick McHardy <kaber@trash.net>
Conflicts:
include/linux/Kbuild
include/linux/netlink.h
Diffstat (limited to 'fs/nfs')
63 files changed, 6176 insertions, 3980 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f90f4f5cd42..13ca196385f 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -30,7 +30,7 @@ config NFS_FS If unsure, say N. config NFS_V2 - bool "NFS client support for NFS version 2" + tristate "NFS client support for NFS version 2" depends on NFS_FS default y help @@ -40,7 +40,7 @@ config NFS_V2 If unsure, say Y. config NFS_V3 - bool "NFS client support for NFS version 3" + tristate "NFS client support for NFS version 3" depends on NFS_FS default y help @@ -72,7 +72,7 @@ config NFS_V3_ACL If unsure, say N. config NFS_V4 - bool "NFS client support for NFS version 4" + tristate "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS select KEYS @@ -86,11 +86,18 @@ config NFS_V4 If unsure, say Y. +config NFS_SWAP + bool "Provide swap over NFS support" + default n + depends on NFS_FS + select SUNRPC_SWAP + help + This option enables swapon to work on files located on NFS mounts. + config NFS_V4_1 - bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_FS && NFS_V4 && EXPERIMENTAL + bool "NFS client support for NFSv4.1" + depends on NFS_V4 select SUNRPC_BACKCHANNEL - select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol (RFC 5661) in the kernel's NFS client. @@ -99,15 +106,17 @@ config NFS_V4_1 config PNFS_FILE_LAYOUT tristate + depends on NFS_V4_1 + default m config PNFS_BLOCK tristate - depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM + depends on NFS_V4_1 && BLK_DEV_DM default m config PNFS_OBJLAYOUT tristate - depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD + depends on NFS_V4_1 && SCSI_OSD_ULD default m config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7ddd45d9f17..cce2c057bd2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,17 +9,23 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o -nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o -nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o +obj-$(CONFIG_NFS_V2) += nfsv2.o +nfsv2-y := nfs2super.o proc.o nfs2xdr.o + +obj-$(CONFIG_NFS_V3) += nfsv3.o +nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o + +obj-$(CONFIG_NFS_V4) += nfsv4.o +nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ + delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs4getroot.o nfs4client.o +nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7ae8a608956..4fa788c93f4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -37,8 +37,10 @@ #include <linux/bio.h> /* struct bio */ #include <linux/buffer_head.h> /* various write calls */ #include <linux/prefetch.h> +#include <linux/pagevec.h> #include "../pnfs.h" +#include "../nfs4session.h" #include "../internal.h" #include "blocklayout.h" @@ -162,25 +164,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, return bio; } -static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, +static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, void (*end_io)(struct bio *, int err), - struct parallel_io *par) + struct parallel_io *par, + unsigned int offset, int len) { + isect = isect + (offset >> SECTOR_SHIFT); + dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, + npg, rw, (unsigned long long)isect, offset, len); retry: if (!bio) { bio = bl_alloc_init_bio(npg, isect, be, end_io, par); if (!bio) return ERR_PTR(-ENOMEM); } - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + if (bio_add_page(bio, page, len, offset) < len) { bio = bl_submit_bio(rw, bio); goto retry; } return bio; } +static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, + sector_t isect, struct page *page, + struct pnfs_block_extent *be, + void (*end_io)(struct bio *, int err), + struct parallel_io *par) +{ + return do_add_page_to_bio(bio, npg, rw, isect, page, be, + end_io, par, 0, PAGE_CACHE_SIZE); +} + /* This is basically copied from mpage_end_io_read */ static void bl_end_io_read(struct bio *bio, int err) { @@ -238,8 +254,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; + size_t bytes_left = rdata->args.count; + unsigned int pg_offset, pg_len; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); @@ -273,36 +292,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) extent_length = min(extent_length, cow_length); } } + + if (is_dio) { + pg_offset = f_offset & ~PAGE_CACHE_MASK; + if (pg_offset + bytes_left > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = bytes_left; + + f_offset += pg_len; + bytes_left -= pg_len; + isect += (pg_offset >> SECTOR_SHIFT); + } else { + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } + hole = is_hole(be, isect); if (hole && !cow_read) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ dprintk("%s Zeroing page for hole\n", __func__); - zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); + zero_user_segment(pages[i], pg_offset, pg_len); print_page(pages[i]); SetPageUptodate(pages[i]); } else { struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, rdata->pages.npages - i, READ, isect, pages[i], be_read, - bl_end_io_read, par); + bl_end_io_read, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } } - isect += PAGE_CACHE_SECTORS; + isect += (pg_len >> SECTOR_SHIFT); extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - f_offset; + rdata->res.count = header->inode->i_size - rdata->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; + rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; } out: bl_put_extent(be); @@ -450,6 +486,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) return; } +static void +bl_read_single_end_io(struct bio *bio, int error) +{ + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct page *page = bvec->bv_page; + + /* Only one page in bvec */ + unlock_page(page); +} + +static int +bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int offset, unsigned int len) +{ + struct bio *bio; + struct page *shadow_page; + sector_t isect; + char *kaddr, *kshadow_addr; + int ret = 0; + + dprintk("%s: offset %u len %u\n", __func__, offset, len); + + shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (shadow_page == NULL) + return -ENOMEM; + + bio = bio_alloc(GFP_NOIO, 1); + if (bio == NULL) + return -ENOMEM; + + isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + + (offset / SECTOR_SIZE); + + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = bl_read_single_end_io; + + lock_page(shadow_page); + if (bio_add_page(bio, shadow_page, + SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { + unlock_page(shadow_page); + bio_put(bio); + return -EIO; + } + + submit_bio(READ, bio); + wait_on_page_locked(shadow_page); + if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { + ret = -EIO; + } else { + kaddr = kmap_atomic(page); + kshadow_addr = kmap_atomic(shadow_page); + memcpy(kaddr + offset, kshadow_addr + offset, len); + kunmap_atomic(kshadow_addr); + kunmap_atomic(kaddr); + } + __free_page(shadow_page); + bio_put(bio); + + return ret; +} + +static int +bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int dirty_offset, unsigned int dirty_len, + bool full_page) +{ + int ret = 0; + unsigned int start, end; + + if (full_page) { + start = 0; + end = PAGE_CACHE_SIZE; + } else { + start = round_down(dirty_offset, SECTOR_SIZE); + end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); + } + + dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); + if (!be) { + zero_user_segments(page, start, dirty_offset, + dirty_offset + dirty_len, end); + if (start == 0 && end == PAGE_CACHE_SIZE && + trylock_page(page)) { + SetPageUptodate(page); + unlock_page(page); + } + return ret; + } + + if (start != dirty_offset) + ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); + + if (!ret && (dirty_offset + dirty_len < end)) + ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, + end - dirty_offset - dirty_len); + + return ret; +} + /* Given an unmapped page, zero it or read in page for COW, page is locked * by caller. */ @@ -483,7 +619,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) SetPageUptodate(page); cleanup: - bl_put_extent(cow_read); if (bh) free_buffer_head(bh); if (ret) { @@ -552,9 +687,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par; + struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; struct page **pages = wdata->args.pages; struct page *page; pgoff_t index; @@ -563,6 +699,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + + if (header->dreq != NULL && + (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || + !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { + dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); + goto out_mds; + } /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -659,10 +802,11 @@ next_page: if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); + bl_put_extent(cow_read); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), - isect, NULL); + isect, &cow_read); if (!be || !is_writable(be, isect)) { header->pnfs_error = -EINVAL; goto out; @@ -679,7 +823,26 @@ next_page: extent_length = be->be_length - (isect - be->be_f_offset); } - if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + + dprintk("%s offset %lld count %Zu\n", __func__, offset, count); + pg_offset = offset & ~PAGE_CACHE_MASK; + if (pg_offset + count > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = count; + + saved_len = pg_len; + if (be->be_state == PNFS_BLOCK_INVALID_DATA && + !bl_is_sector_init(be->be_inval, isect)) { + ret = bl_read_partial_page_sync(pages[i], cow_read, + pg_offset, pg_len, true); + if (ret) { + dprintk("%s bl_read_partial_page_sync fail %d\n", + __func__, ret); + header->pnfs_error = ret; + goto out; + } + ret = bl_mark_sectors_init(be->be_inval, isect, PAGE_CACHE_SECTORS); if (unlikely(ret)) { @@ -688,15 +851,35 @@ next_page: header->pnfs_error = ret; goto out; } + + /* Expand to full page write */ + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } else if ((pg_offset & (SECTOR_SIZE - 1)) || + (pg_len & (SECTOR_SIZE - 1))){ + /* ahh, nasty case. We have to do sync full sector + * read-modify-write cycles. + */ + unsigned int saved_offset = pg_offset; + ret = bl_read_partial_page_sync(pages[i], be, pg_offset, + pg_len, false); + pg_offset = round_down(pg_offset, SECTOR_SIZE); + pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) + - pg_offset; } - bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + + + bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, isect, pages[i], be, - bl_end_io_write, par); + bl_end_io_write, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } + offset += saved_len; + count -= saved_len; isect += PAGE_CACHE_SECTORS; last_isect = isect; extent_length -= PAGE_CACHE_SECTORS; @@ -714,17 +897,16 @@ next_page: } write_done: - wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); - if (count < wdata->res.count) { - wdata->res.count = count; - } + wdata->res.count = wdata->args.count; out: bl_put_extent(be); + bl_put_extent(cow_read); bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; out_mds: bl_put_extent(be); + bl_put_extent(cow_read); kfree(par); return PNFS_NOT_ATTEMPTED; } @@ -859,7 +1041,7 @@ static void free_blk_mountid(struct block_mount_id *mid) } } -/* This is mostly copied from the filelayout's get_device_info function. +/* This is mostly copied from the filelayout_get_device_info function. * It seems much of this should be at the generic pnfs level. */ static struct pnfs_block_dev * @@ -996,15 +1178,95 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static bool +is_aligned_req(struct nfs_page *req, unsigned int alignment) +{ + return IS_ALIGNED(req->wb_offset, alignment) && + IS_ALIGNED(req->wb_bytes, alignment); +} + +static void +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); +} + +static bool +bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); +} + +/* + * Return the number of contiguous bytes for a given inode + * starting at page frame idx. + */ +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) +{ + struct address_space *mapping = inode->i_mapping; + pgoff_t end; + + /* Optimize common case that writes from 0 to end of file */ + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + + if (!end) + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); + else + return (end - idx) << PAGE_CACHE_SHIFT; +} + +static void +bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) { + nfs_pageio_reset_write_mds(pgio); + } else { + u64 wb_size; + if (pgio->pg_dreq == NULL) + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, + req->wb_index); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); + } +} + +static bool +bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, - .pg_test = pnfs_generic_pg_test, + .pg_init = bl_pg_init_read, + .pg_test = bl_pg_test_read, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, - .pg_test = pnfs_generic_pg_test, + .pg_init = bl_pg_init_write, + .pg_test = bl_pg_test_write, .pg_doio = pnfs_generic_pg_writepages, }; diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 03350690118..f4891bde885 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -41,6 +41,7 @@ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) struct block_mount_id { spinlock_t bm_lock; /* protects list */ @@ -172,7 +173,6 @@ struct bl_msg_hdr { /* blocklayoutdev.c */ ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); void bl_pipe_destroy_msg(struct rpc_pipe_msg *); -struct block_device *nfs4_blkdev_get(dev_t dev); int nfs4_blkdev_put(struct block_device *bdev); struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, struct pnfs_device *dev); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index c96554245cc..a86c5bdad9e 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -53,22 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) return 0; } -/* Open a block_device by device number. */ -struct block_device *nfs4_blkdev_get(dev_t dev) -{ - struct block_device *bd; - - dprintk("%s enter\n", __func__); - bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); - if (IS_ERR(bd)) - goto fail; - return bd; -fail: - dprintk("%s failed to open device : %ld\n", - __func__, PTR_ERR(bd)); - return NULL; -} - /* * Release the block device */ @@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server, goto out; } - bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); + bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), + FMODE_READ, NULL); if (IS_ERR(bd)) { - rc = PTR_ERR(bd); - dprintk("%s failed to open device : %d\n", __func__, rc); - rv = ERR_PTR(rc); + dprintk("%s failed to open device : %ld\n", __func__, + PTR_ERR(bd)); + rv = ERR_CAST(bd); goto out; } diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1f9a6032796..9c3e117c3ed 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); p = xdr_encode_hyper(p, 0LL); *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); - list_del(&lce->bse_node); - list_add_tail(&lce->bse_node, &bl->bl_committing); + list_move_tail(&lce->bse_node, &bl->bl_committing); bl->bl_count--; count++; } diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index dded2636811..862a2f16db6 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -118,7 +118,6 @@ int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) struct dentry *dir; dir = rpc_d_lookup_sb(sb, "cache"); - BUG_ON(dir == NULL); ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); dput(dir); return ret; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 23ff18fe080..5088b57b078 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -12,6 +12,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include <linux/errno.h> #include <linux/mutex.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -23,6 +24,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -37,31 +39,32 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; -unsigned short nfs_callback_tcpport6; -#define NFS_CALLBACK_MAXPORTNR (65535U) - -static int param_set_portnr(const char *val, const struct kernel_param *kp) +static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) { - unsigned long num; int ret; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + ret = svc_create_xprt(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret <= 0) + goto out_err; + nn->nfs_callback_tcpport = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nn->nfs_callback_tcpport, PF_INET, net); - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) - return -EINVAL; - *((unsigned int *)kp->arg) = num; + ret = svc_create_xprt(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret > 0) { + nn->nfs_callback_tcpport6 = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nn->nfs_callback_tcpport6, PF_INET6, net); + } else if (ret != -EAFNOSUPPORT) + goto out_err; return 0; -} -static struct kernel_param_ops param_ops_portnr = { - .set = param_set_portnr, - .get = param_get_uint, -}; -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); -module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +out_err: + return (ret) ? ret : -ENOMEM; +} /* * This is the NFSv4 callback kernel thread. @@ -69,7 +72,7 @@ module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); static int nfs4_callback_svc(void *vrqstp) { - int err, preverr = 0; + int err; struct svc_rqst *rqstp = vrqstp; set_freezable(); @@ -79,20 +82,8 @@ nfs4_callback_svc(void *vrqstp) * Listen for a request on the socket */ err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); - if (err == -EAGAIN || err == -EINTR) { - preverr = err; - continue; - } - if (err < 0) { - if (err != preverr) { - printk(KERN_WARNING "NFS: %s: unexpected error " - "from svc_recv (%d)\n", __func__, err); - preverr = err; - } - schedule_timeout_uninterruptible(HZ); + if (err == -EAGAIN || err == -EINTR) continue; - } - preverr = err; svc_process(rqstp); } return 0; @@ -102,38 +93,23 @@ nfs4_callback_svc(void *vrqstp) * Prepare to bring up the NFSv4 callback service */ static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs4_callback_up(struct svc_serv *serv) { - int ret; - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret <= 0) - goto out_err; - nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport, PF_INET); - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret > 0) { - nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport6, PF_INET6); - } else if (ret == -EAFNOSUPPORT) - ret = 0; - else - goto out_err; - return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - -out_err: - if (ret == 0) - ret = -ENOMEM; - return ERR_PTR(ret); } #if defined(CONFIG_NFS_V4_1) +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) +{ + /* + * Create an svc_sock for the back channel service that shares the + * fore channel connection. + * Returns the input port (0) and sets the svc_serv bc_xprt on success + */ + return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); +} + /* * The callback service for NFSv4.1 callbacks */ @@ -173,28 +149,9 @@ nfs41_callback_svc(void *vrqstp) * Bring up the NFSv4.1 callback service */ static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs41_callback_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - int ret; - - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); - if (ret < 0) { - rqstp = ERR_PTR(ret); - goto out; - } - - /* - * Save the svc_serv in the transport so that it can - * be referenced when the session backchannel is initialized - */ - xprt->bc_serv = serv; INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); @@ -204,86 +161,74 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } -out: dprintk("--> %s return %ld\n", __func__, IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); return rqstp; } -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { - if (minorversion) { - *rqstpp = nfs41_callback_up(serv, xprt); - *callback_svc = nfs41_callback_svc; - } - return minorversion; + *rqstpp = nfs41_callback_up(serv); + *callback_svc = nfs41_callback_svc; } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { if (minorversion) - xprt->bc_serv = cb_info->serv; + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + xprt->bc_serv = serv; } #else -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) { return 0; } +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + *rqstpp = ERR_PTR(-ENOTSUPP); + *callback_svc = ERR_PTR(-ENOTSUPP); +} + static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { } #endif /* CONFIG_NFS_V4_1 */ -/* - * Bring up the callback thread if it is not already up. - */ -int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) +static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, + struct svc_serv *serv) { - struct svc_serv *serv = NULL; struct svc_rqst *rqstp; int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; char svc_name[12]; - int ret = 0; - int minorversion_setup; - struct net *net = &init_net; + int ret; - mutex_lock(&nfs_callback_mutex); - if (cb_info->users++ || cb_info->task != NULL) { - nfs_callback_bc_serv(minorversion, xprt, cb_info); - goto out; - } - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); - if (!serv) { - ret = -ENOMEM; - goto out_err; - } + nfs_callback_bc_serv(minorversion, xprt, serv); - ret = svc_bind(serv, net); - if (ret < 0) { - printk(KERN_WARNING "NFS: bind callback service failed\n"); - goto out_err; - } + if (cb_info->task) + return 0; - minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, - serv, xprt, &rqstp, &callback_svc); - if (!minorversion_setup) { + switch (minorversion) { + case 0: /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv, xprt); + rqstp = nfs4_callback_up(serv); callback_svc = nfs4_callback_svc; + break; + default: + nfs_minorversion_callback_svc_setup(serv, + &rqstp, &callback_svc); } - if (IS_ERR(rqstp)) { - ret = PTR_ERR(rqstp); - goto out_err; - } + if (IS_ERR(rqstp)) + return PTR_ERR(rqstp); svc_sock_update_bufs(serv); @@ -296,41 +241,165 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) svc_exit_thread(cb_info->rqst); cb_info->rqst = NULL; cb_info->task = NULL; - goto out_err; + return ret; + } + dprintk("nfs_callback_up: service started\n"); + return 0; +} + +static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + if (--nn->cb_users[minorversion]) + return; + + dprintk("NFS: destroy per-net callback data; net=%p\n", net); + svc_shutdown_net(serv, net); +} + +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + int ret; + + if (nn->cb_users[minorversion]++) + return 0; + + dprintk("NFS: create per-net callback data; net=%p\n", net); + + ret = svc_bind(serv, net); + if (ret < 0) { + printk(KERN_WARNING "NFS: bind callback service failed\n"); + goto err_bind; + } + + switch (minorversion) { + case 0: + ret = nfs4_callback_up_net(serv, net); + break; + case 1: + ret = nfs41_callback_up_net(serv, net); + break; + default: + printk(KERN_ERR "NFS: unknown callback version: %d\n", + minorversion); + ret = -EINVAL; + break; + } + + if (ret < 0) { + printk(KERN_ERR "NFS: callback service start failed\n"); + goto err_socks; + } + return 0; + +err_socks: + svc_rpcb_cleanup(serv, net); +err_bind: + dprintk("NFS: Couldn't create callback socket: err = %d; " + "net = %p\n", ret, net); + return ret; +} + +static struct svc_serv *nfs_callback_create_svc(int minorversion) +{ + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + struct svc_serv *serv; + + /* + * Check whether we're already up and running. + */ + if (cb_info->task) { + /* + * Note: increase service usage, because later in case of error + * svc_destroy() will be called. + */ + svc_get(cb_info->serv); + return cb_info->serv; + } + + /* + * Sanity check: if there's no task, + * we should be the first user ... + */ + if (cb_info->users) + printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", + cb_info->users); + + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + if (!serv) { + printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); + return ERR_PTR(-ENOMEM); + } + /* As there is only one thread we need to over-ride the + * default maximum of 80 connections + */ + serv->sv_maxconn = 1024; + dprintk("nfs_callback_create_svc: service created\n"); + return serv; +} + +/* + * Bring up the callback thread if it is not already up. + */ +int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) +{ + struct svc_serv *serv; + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + int ret; + struct net *net = xprt->xprt_net; + + mutex_lock(&nfs_callback_mutex); + + serv = nfs_callback_create_svc(minorversion); + if (IS_ERR(serv)) { + ret = PTR_ERR(serv); + goto err_create; } -out: + + ret = nfs_callback_up_net(minorversion, serv, net); + if (ret < 0) + goto err_net; + + ret = nfs_callback_start_svc(minorversion, xprt, serv); + if (ret < 0) + goto err_start; + + cb_info->users++; /* * svc_create creates the svc_serv with sv_nrthreads == 1, and then * svc_prepare_thread increments that. So we need to call svc_destroy * on both success and failure so that the refcount is 1 when the * thread exits. */ - if (serv) - svc_destroy(serv); +err_net: + svc_destroy(serv); +err_create: mutex_unlock(&nfs_callback_mutex); return ret; -out_err: - dprintk("NFS: Couldn't create callback socket or server thread; " - "err = %d\n", ret); - cb_info->users--; - if (serv) - svc_shutdown_net(serv, net); - goto out; + +err_start: + nfs_callback_down_net(minorversion, serv, net); + dprintk("NFS: Couldn't create server thread; err = %d\n", ret); + goto err_net; } /* * Kill the callback thread if it's no longer being used. */ -void nfs_callback_down(int minorversion) +void nfs_callback_down(int minorversion, struct net *net) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; mutex_lock(&nfs_callback_mutex); + nfs_callback_down_net(minorversion, cb_info->serv, net); cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, &init_net); + dprintk("nfs_callback_down: service stopped\n"); svc_exit_thread(cb_info->rqst); + dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; cb_info->rqst = NULL; cb_info->task = NULL; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a5527c90a5a..efd54f0a4c4 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -142,7 +142,7 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_recallslotargs { struct sockaddr *crsa_addr; - uint32_t crsa_target_max_slots; + uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, @@ -167,8 +167,6 @@ extern __be32 nfs4_callback_layoutrecall( struct cb_layoutrecallargs *args, void *dummy, struct cb_process_state *cps); -extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); - struct cb_devicenotifyitem { uint32_t cbd_notify_type; uint32_t cbd_layout_type; @@ -192,9 +190,9 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_process_state *cps); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, struct cb_process_state *cps); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); -extern void nfs_callback_down(int minorversion); +extern void nfs_callback_down(int minorversion, struct net *net); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid); extern int nfs4_set_callback_sessionid(struct nfs_client *clp); @@ -209,6 +207,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; -extern unsigned short nfs_callback_tcpport6; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 1b5d809a105..264d1aa935f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -14,6 +14,7 @@ #include "delegation.h" #include "internal.h" #include "pnfs.h" +#include "nfs4session.h" #ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -122,7 +123,15 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, ino = igrab(lo->plh_inode); if (!ino) continue; - get_layout_hdr(lo); + spin_lock(&ino->i_lock); + /* Is this layout in the process of being freed? */ + if (NFS_I(ino)->layout != lo) { + spin_unlock(&ino->i_lock); + iput(ino); + continue; + } + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); return lo; } } @@ -158,7 +167,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || - mark_matching_lsegs_invalid(lo, &free_me_list, + pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &args->cbl_range)) rv = NFS4ERR_DELAY; else @@ -166,7 +175,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); iput(ino); return rv; } @@ -196,10 +205,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, continue; list_for_each_entry(lo, &server->layouts, plh_layouts) { - if (!igrab(lo->plh_inode)) + ino = igrab(lo->plh_inode); + if (!ino) + continue; + spin_lock(&ino->i_lock); + /* Is this layout in the process of being freed? */ + if (NFS_I(ino)->layout != lo) { + spin_unlock(&ino->i_lock); + iput(ino); continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); + } + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); list_add(&lo->plh_bulk_recall, &recall_list); } } @@ -211,12 +228,12 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) + if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); iput(ino); } return rv; @@ -545,23 +562,16 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), - args->crsa_target_max_slots); + args->crsa_target_highest_slotid); fc_tbl = &cps->clp->cl_session->fc_slot_table; - status = htonl(NFS4ERR_BAD_HIGH_SLOT); - if (args->crsa_target_max_slots > fc_tbl->max_slots || - args->crsa_target_max_slots < 1) - goto out; - status = htonl(NFS4_OK); - if (args->crsa_target_max_slots == fc_tbl->max_slots) - goto out; - fc_tbl->target_max_slots = args->crsa_target_max_slots; - nfs41_handle_recall_slot(cps->clp); + nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid); + nfs41_server_notify_target_slotid_update(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e64b01d2a33..59461c957d9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -16,6 +16,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "nfs4session.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) @@ -520,7 +521,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); - args->crsa_target_max_slots = ntohl(*p++); + args->crsa_target_highest_slotid = ntohl(*p++); return 0; } @@ -762,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * A single slot, so highest used slotid is either 0 or -1 */ tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_check_drain_bc_complete(session); + nfs4_session_drain_complete(session, tbl); spin_unlock(&tbl->slot_tbl_lock); } @@ -863,7 +864,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r .drc_status = 0, .clp = NULL, .slotid = NFS4_NO_SLOT, - .net = rqstp->rq_xprt->xpt_net, + .net = SVC_NET(rqstp), }; unsigned int nops = 0; @@ -879,7 +880,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f005b5bebdc..9f3c66438d0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -51,54 +51,23 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); -#ifdef CONFIG_NFS_V4 - -/* - * Get a unique NFSv4.0 callback identifier which will be used - * by the V4.0 callback service to lookup the nfs_client struct - */ -static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) -{ - int ret = 0; - struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - - if (clp->rpc_ops->version != 4 || minorversion != 0) - return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; - spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; -} -#endif /* CONFIG_NFS_V4 */ - -/* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; +static DEFINE_SPINLOCK(nfs_version_lock); +static DEFINE_MUTEX(nfs_version_mutex); +static LIST_HEAD(nfs_versions); /* * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { -#ifdef CONFIG_NFS_V2 - [2] = &nfs_version2, -#endif -#ifdef CONFIG_NFS_V3 - [3] = &nfs_version3, -#endif -#ifdef CONFIG_NFS_V4 - [4] = &nfs_version4, -#endif + [2] = NULL, + [3] = NULL, + [4] = NULL, }; const struct rpc_program nfs_program = { @@ -114,32 +83,64 @@ struct rpc_stat nfs_rpcstat = { .program = &nfs_program }; +static struct nfs_subversion *find_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs; + spin_lock(&nfs_version_lock); -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static const struct rpc_version *nfsacl_version[] = { - [3] = &nfsacl_version3, -}; + list_for_each_entry(nfs, &nfs_versions, list) { + if (nfs->rpc_ops->version == version) { + spin_unlock(&nfs_version_lock); + return nfs; + } + } -const struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - -struct nfs_client_initdata { - unsigned long init_flags; - const char *hostname; - const struct sockaddr *addr; - size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; - int proto; - u32 minorversion; - struct net *net; -}; + spin_unlock(&nfs_version_lock); + return ERR_PTR(-EPROTONOSUPPORT); +} + +struct nfs_subversion *get_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs = find_nfs_version(version); + + if (IS_ERR(nfs)) { + mutex_lock(&nfs_version_mutex); + request_module("nfsv%d", version); + nfs = find_nfs_version(version); + mutex_unlock(&nfs_version_mutex); + } + + if (!IS_ERR(nfs)) + try_module_get(nfs->owner); + return nfs; +} + +void put_nfs_version(struct nfs_subversion *nfs) +{ + module_put(nfs->owner); +} + +void register_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + list_add(&nfs->list, &nfs_versions); + nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(register_nfs_version); + +void unregister_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + nfs_version[nfs->rpc_ops->version] = NULL; + list_del(&nfs->list); + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(unregister_nfs_version); /* * Allocate a shared client record @@ -147,7 +148,7 @@ struct nfs_client_initdata { * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) +struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; struct rpc_cred *cred; @@ -156,7 +157,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - clp->rpc_ops = cl_init->rpc_ops; + clp->cl_nfs_mod = cl_init->nfs_mod; + try_module_get(clp->cl_nfs_mod->owner); + + clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -177,18 +181,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_proto = cl_init->proto; clp->cl_net = get_net(cl_init->net); -#ifdef CONFIG_NFS_V4 - err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); - if (err) - goto error_cleanup; - - spin_lock_init(&clp->cl_lock); - INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; - clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; -#endif cred = rpc_lookup_machine_cred("*"); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; @@ -197,51 +189,14 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ return clp; error_cleanup: + put_nfs_version(clp->cl_nfs_mod); kfree(clp); error_0: return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(nfs_alloc_client); -#ifdef CONFIG_NFS_V4 -#ifdef CONFIG_NFS_V4_1 -static void nfs4_shutdown_session(struct nfs_client *clp) -{ - if (nfs4_has_session(clp)) { - nfs4_destroy_session(clp->cl_session); - nfs4_destroy_clientid(clp); - } - -} -#else /* CONFIG_NFS_V4_1 */ -static void nfs4_shutdown_session(struct nfs_client *clp) -{ -} -#endif /* CONFIG_NFS_V4_1 */ - -/* - * Destroy the NFS4 callback service - */ -static void nfs4_destroy_callback(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version); -} - -static void nfs4_shutdown_client(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) - nfs4_kill_renewd(clp); - nfs4_shutdown_session(clp); - nfs4_destroy_callback(clp); - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) - nfs_idmap_delete(clp); - - rpc_destroy_wait_queue(&clp->cl_rpcwaitq); - kfree(clp->cl_serverowner); - kfree(clp->cl_serverscope); - kfree(clp->cl_implid); -} - +#if IS_ENABLED(CONFIG_NFS_V4) /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -264,16 +219,7 @@ static void pnfs_init_server(struct nfs_server *server) rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); } -static void nfs4_destroy_server(struct nfs_server *server) -{ - nfs4_purge_state_owners(server); -} - #else -static void nfs4_shutdown_client(struct nfs_client *clp) -{ -} - void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -291,12 +237,10 @@ static void pnfs_init_server(struct nfs_server *server) /* * Destroy a shared client record */ -static void nfs_free_client(struct nfs_client *clp) +void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs4_shutdown_client(clp); - nfs_fscache_release_client_cookie(clp); /* -EIO all pending I/O */ @@ -307,11 +251,13 @@ static void nfs_free_client(struct nfs_client *clp) put_rpccred(clp->cl_machine_cred); put_net(clp->cl_net); + put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp); dprintk("<-- nfs_free_client()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_client); /* * Release a reference to a shared client record @@ -331,9 +277,9 @@ void nfs_put_client(struct nfs_client *clp) nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); - BUG_ON(!list_empty(&clp->cl_superblocks)); + WARN_ON_ONCE(!list_empty(&clp->cl_superblocks)); - nfs_free_client(clp); + clp->rpc_ops->free_client(clp); } } EXPORT_SYMBOL_GPL(nfs_put_client); @@ -412,8 +358,8 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. */ -static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, - const struct sockaddr *sa2) +int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) { if (sa1->sa_family != sa2->sa_family) return 0; @@ -426,6 +372,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr); #endif /* CONFIG_NFS_V4_1 */ /* @@ -447,33 +394,6 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } -#if defined(CONFIG_NFS_V4_1) -/* Common match routine for v4.0 and v4.1 callback services */ -static bool nfs4_cb_match_client(const struct sockaddr *addr, - struct nfs_client *clp, u32 minorversion) -{ - struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; - - /* Don't match clients that failed to initialise */ - if (!(clp->cl_cons_state == NFS_CS_READY || - clp->cl_cons_state == NFS_CS_SESSION_INITING)) - return false; - - smp_rmb(); - - /* Match the version and minorversion */ - if (clp->rpc_ops->version != 4 || - clp->cl_minorversion != minorversion) - return false; - - /* Match only the IP address, not the port number */ - if (!nfs_sockaddr_match_ipaddr(addr, clap)) - return false; - - return true; -} -#endif /* CONFIG_NFS_V4_1 */ - /* * Find an nfs_client on the list that matches the initialisation data * that is supplied. @@ -491,7 +411,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops != data->rpc_ops) + if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; if (clp->cl_proto != data->proto) @@ -519,6 +439,7 @@ int nfs_wait_client_init_complete(const struct nfs_client *clp) return wait_event_killable(nfs_client_active_wq, nfs_client_init_is_complete(clp)); } +EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete); /* * Found an existing client. Make sure it's ready before returning. @@ -552,7 +473,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -static struct nfs_client * +struct nfs_client * nfs_get_client(const struct nfs_client_initdata *cl_init, const struct rpc_timeout *timeparms, const char *ip_addr, @@ -560,9 +481,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); + const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", cl_init->rpc_ops->version); + cl_init->hostname ?: "", rpc_ops->version); /* see if the client already exists */ do { @@ -572,27 +494,28 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (clp) { spin_unlock(&nn->nfs_client_lock); if (new) - nfs_free_client(new); + new->rpc_ops->free_client(new); return nfs_found_client(cl_init, clp); } if (new) { - list_add(&new->cl_share_link, &nn->nfs_client_list); + list_add_tail(&new->cl_share_link, + &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return cl_init->rpc_ops->init_client(new, - timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr, + authflavour); } spin_unlock(&nn->nfs_client_lock); - new = nfs_alloc_client(cl_init); + new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", cl_init->hostname ?: "", PTR_ERR(new)); return new; } +EXPORT_SYMBOL_GPL(nfs_get_client); /* * Mark a server as ready or failed @@ -603,11 +526,12 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } +EXPORT_SYMBOL_GPL(nfs_mark_client_ready); /* * Initialise the timeout values for a connection */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, +void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) { to->to_initval = timeo * HZ / 10; @@ -644,13 +568,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, BUG(); } } +EXPORT_SYMBOL_GPL(nfs_init_timeout_values); /* * Create an RPC client handle */ -static int nfs_create_rpc_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - rpc_authflavor_t flavor) +int nfs_create_rpc_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + rpc_authflavor_t flavor) { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { @@ -683,14 +608,14 @@ static int nfs_create_rpc_client(struct nfs_client *clp, clp->cl_rpcclient = clnt; return 0; } +EXPORT_SYMBOL_GPL(nfs_create_rpc_client); /* * Version 2 or 3 client destruction */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || - !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) + if (server->nlm_host) nlmclnt_done(server->nlm_host); } @@ -735,45 +660,16 @@ static int nfs_start_lockd(struct nfs_server *server) } /* - * Initialise an NFSv3 ACL client connection - */ -#ifdef CONFIG_NFS_V3_ACL -static void nfs_init_server_aclclient(struct nfs_server *server) -{ - if (server->nfs_client->rpc_ops->version != 3) - goto out_noacl; - if (server->flags & NFS_MOUNT_NOACL) - goto out_noacl; - - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - if (IS_ERR(server->client_acl)) - goto out_noacl; - - /* No errors! Assume that Sun nfsacls are supported */ - server->caps |= NFS_CAP_ACLS; - return; - -out_noacl: - server->caps &= ~NFS_CAP_ACLS; -} -#else -static inline void nfs_init_server_aclclient(struct nfs_server *server) -{ - server->flags &= ~NFS_MOUNT_NOACL; - server->caps &= ~NFS_CAP_ACLS; -} -#endif - -/* * Create a general RPC client */ -static int nfs_init_server_rpcclient(struct nfs_server *server, +int nfs_init_server_rpcclient(struct nfs_server *server, const struct rpc_timeout *timeo, rpc_authflavor_t pseudoflavour) { struct nfs_client *clp = server->nfs_client; - server->client = rpc_clone_client(clp->cl_rpcclient); + server->client = rpc_clone_client_set_auth(clp->cl_rpcclient, + pseudoflavour); if (IS_ERR(server->client)) { dprintk("%s: couldn't create rpc_client!\n", __func__); return PTR_ERR(server->client); @@ -783,22 +679,13 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, timeo, sizeof(server->client->cl_timeout_default)); server->client->cl_timeout = &server->client->cl_timeout_default; - - if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(pseudoflavour, server->client); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __func__); - return PTR_ERR(auth); - } - } server->client->cl_softrtry = 0; if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; return 0; } +EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); /** * nfs_init_client - Initialise an NFS2 or NFS3 client @@ -838,18 +725,20 @@ error: dprintk("<-- nfs_init_client() = xerror %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_init_client); /* * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + const struct nfs_parsed_mount_data *data, + struct nfs_subversion *nfs_mod) { struct nfs_client_initdata cl_init = { .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, - .rpc_ops = NULL, + .nfs_mod = nfs_mod, .proto = data->nfs_server.protocol, .net = data->net, }; @@ -859,25 +748,12 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); - switch (data->version) { -#ifdef CONFIG_NFS_V2 - case 2: - cl_init.rpc_ops = &nfs_v2_clientops; - break; -#endif -#ifdef CONFIG_NFS_V3 - case 3: - cl_init.rpc_ops = &nfs_v3_clientops; - break; -#endif - default: - return -EPROTONOSUPPORT; - } - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (server->options & NFS_OPTION_MIGRATION) + set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); @@ -927,8 +803,6 @@ static int nfs_init_server(struct nfs_server *server, server->mountd_protocol = data->mount_server.protocol; server->namelen = data->namlen; - /* Create a client RPC handle for the NFSv3 ACL management interface */ - nfs_init_server_aclclient(server); dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; @@ -974,8 +848,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); @@ -1001,7 +873,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, /* * Probe filesystem information, including the FSID on v2/v3 */ -static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -1041,11 +913,12 @@ out_error: dprintk("nfs_probe_fsinfo: error = %d\n", -error); return error; } +EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); /* * Copy useful information when duplicating a server record */ -static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; target->rsize = source->rsize; @@ -1057,8 +930,9 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve target->caps = source->caps; target->options = source->options; } +EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); -static void nfs_server_insert_lists(struct nfs_server *server) +void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); @@ -1070,6 +944,7 @@ static void nfs_server_insert_lists(struct nfs_server *server) spin_unlock(&nn->nfs_client_lock); } +EXPORT_SYMBOL_GPL(nfs_server_insert_lists); static void nfs_server_remove_lists(struct nfs_server *server) { @@ -1092,7 +967,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) /* * Allocate and initialise a server record */ -static struct nfs_server *nfs_alloc_server(void) +struct nfs_server *nfs_alloc_server(void) { struct nfs_server *server; @@ -1129,6 +1004,7 @@ static struct nfs_server *nfs_alloc_server(void) return server; } +EXPORT_SYMBOL_GPL(nfs_alloc_server); /* * Free up a server record @@ -1138,7 +1014,6 @@ void nfs_free_server(struct nfs_server *server) dprintk("--> nfs_free_server()\n"); nfs_server_remove_lists(server); - unset_pnfs_layoutdriver(server); if (server->destroy != NULL) server->destroy(server); @@ -1158,13 +1033,14 @@ void nfs_free_server(struct nfs_server *server) nfs_release_automount_timer(); dprintk("<-- nfs_free_server()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_server); /* * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) +struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; struct nfs_fattr *fattr; @@ -1180,22 +1056,18 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data); + error = nfs_init_server(server, mount_info->parsed, nfs_mod); if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -1203,7 +1075,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; @@ -1225,522 +1097,7 @@ error: nfs_free_server(server); return ERR_PTR(error); } - -#ifdef CONFIG_NFS_V4 -/* - * NFSv4.0 callback thread helper - * - * Find a client by callback identifier - */ -struct nfs_client * -nfs4_find_client_ident(struct net *net, int cb_ident) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - clp = idr_find(&nn->cb_ident_idr, cb_ident); - if (clp) - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; -} - -#if defined(CONFIG_NFS_V4_1) -/* - * NFSv4.1 callback thread helper - * For CB_COMPOUND calls, find a client by IP address, protocol version, - * minorversion, and sessionID - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 1) == false) - continue; - - if (!nfs4_has_session(clp)) - continue; - - /* Match sessionid*/ - if (memcmp(clp->cl_session->sess_id.data, - sid->data, NFS4_MAX_SESSIONID_LEN) != 0) - continue; - - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; -} - -#else /* CONFIG_NFS_V4_1 */ - -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - return NULL; -} -#endif /* CONFIG_NFS_V4_1 */ - -/* - * Initialize the NFS4 callback service - */ -static int nfs4_init_callback(struct nfs_client *clp) -{ - int error; - - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; - - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); - return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); - } - return 0; -} - -/* - * Initialize the minor version specific parts of an NFS4 client record - */ -static int nfs4_init_client_minor_version(struct nfs_client *clp) -{ -#if defined(CONFIG_NFS_V4_1) - if (clp->cl_mvops->minor_version) { - struct nfs4_session *session = NULL; - /* - * Create the session and mark it expired. - * When a SEQUENCE operation encounters the expired session - * it will do session recovery to initialize it. - */ - session = nfs4_alloc_session(clp); - if (!session) - return -ENOMEM; - - clp->cl_session = session; - /* - * The create session reply races with the server back - * channel probe. Mark the client NFS_CS_SESSION_INITING - * so that the client back channel can find the - * nfs_client struct - */ - nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); - } -#endif /* CONFIG_NFS_V4_1 */ - - return nfs4_init_callback(clp); -} - -/** - * nfs4_init_client - Initialise an NFS4 client record - * - * @clp: nfs_client to initialise - * @timeparms: timeout parameters for underlying RPC transport - * @ip_addr: callback IP address in presentation format - * @authflavor: authentication flavor for underlying RPC transport - * - * Returns pointer to an NFS client, or an ERR_PTR value. - */ -struct nfs_client *nfs4_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour) -{ - char buf[INET6_ADDRSTRLEN + 1]; - int error; - - if (clp->cl_cons_state == NFS_CS_READY) { - /* the client is initialised already */ - dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); - return clp; - } - - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v4_clientops; - - __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - error = nfs_create_rpc_client(clp, timeparms, authflavour); - if (error < 0) - goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); - - error = nfs_idmap_new(clp); - if (error < 0) { - dprintk("%s: failed to create idmapper. Error = %d\n", - __func__, error); - goto error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - - error = nfs4_init_client_minor_version(clp); - if (error < 0) - goto error; - - if (!nfs4_has_session(clp)) - nfs_mark_client_ready(clp, NFS_CS_READY); - return clp; - -error: - nfs_mark_client_ready(clp, error); - nfs_put_client(clp); - dprintk("<-- nfs4_init_client() = xerror %d\n", error); - return ERR_PTR(error); -} - -/* - * Set up an NFS4 client - */ -static int nfs4_set_client(struct nfs_server *server, - const char *hostname, - const struct sockaddr *addr, - const size_t addrlen, - const char *ip_addr, - rpc_authflavor_t authflavour, - int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) -{ - struct nfs_client_initdata cl_init = { - .hostname = hostname, - .addr = addr, - .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = proto, - .minorversion = minorversion, - .net = net, - }; - struct nfs_client *clp; - int error; - - dprintk("--> nfs4_set_client()\n"); - - if (server->flags & NFS_MOUNT_NORESVPORT) - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - - /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); - if (IS_ERR(clp)) { - error = PTR_ERR(clp); - goto error; - } - - /* - * Query for the lease time on clientid setup or renewal - * - * Note that this will be set on nfs_clients that were created - * only for the DS role and did not set this bit, but now will - * serve a dual role. - */ - set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); - - server->nfs_client = clp; - dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); - return 0; -error: - dprintk("<-- nfs4_set_client() = xerror %d\n", error); - return error; -} - -/* - * Set up a pNFS Data Server client. - * - * Return any existing nfs_client that matches server address,port,version - * and minorversion. - * - * For a new nfs_client, use a soft mount (default), a low retrans and a - * low timeout interval so that if a connection is lost, we retry through - * the MDS. - */ -struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, - const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) -{ - struct nfs_client_initdata cl_init = { - .addr = ds_addr, - .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = ds_proto, - .minorversion = mds_clp->cl_minorversion, - .net = mds_clp->cl_net, - }; - struct rpc_timeout ds_timeout; - struct nfs_client *clp; - - /* - * Set an authflavor equual to the MDS value. Use the MDS nfs_client - * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS - * (section 13.1 RFC 5661). - */ - nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, - mds_clp->cl_rpcclient->cl_auth->au_flavor); - - dprintk("<-- %s %p\n", __func__, clp); - return clp; -} -EXPORT_SYMBOL_GPL(nfs4_set_ds_client); - -/* - * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. - */ -static void nfs4_session_set_rwsize(struct nfs_server *server) -{ -#ifdef CONFIG_NFS_V4_1 - struct nfs4_session *sess; - u32 server_resp_sz; - u32 server_rqst_sz; - - if (!nfs4_has_session(server->nfs_client)) - return; - sess = server->nfs_client->cl_session; - server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; - server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - - if (server->rsize > server_resp_sz) - server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) - server->wsize = server_rqst_sz; -#endif /* CONFIG_NFS_V4_1 */ -} - -static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh) -{ - struct nfs_fattr *fattr; - int error; - - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - /* data servers support only a subset of NFSv4.1 */ - if (is_ds_only_client(server->nfs_client)) - return -EPROTONOSUPPORT; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* We must ensure the session is initialised first */ - error = nfs4_init_session(server); - if (error < 0) - goto out; - - /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); - if (error < 0) - goto out; - - dprintk("Server FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); - - nfs4_session_set_rwsize(server); - - error = nfs_probe_fsinfo(server, mntfh, fattr); - if (error < 0) - goto out; - - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - nfs_server_insert_lists(server); - server->mount_time = jiffies; - server->destroy = nfs4_destroy_server; -out: - nfs_free_fattr(fattr); - return error; -} - -/* - * Create a version 4 volume record - */ -static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) -{ - struct rpc_timeout timeparms; - int error; - - dprintk("--> nfs4_init_server()\n"); - - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - - /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; - server->options = data->options; - - /* Get a client record */ - error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->auth_flavors[0], - data->nfs_server.protocol, - &timeparms, - data->minorversion, - data->net); - if (error < 0) - goto error; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - - server->port = data->nfs_server.port; - - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); - -error: - /* Done */ - dprintk("<-- nfs4_init_server() = %d\n", error); - return error; -} - -/* - * Create a version 4 volume record - * - keyed on server and FSID - */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) -{ - struct nfs_server *server; - int error; - - dprintk("--> nfs4_create_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - /* set up the general RPC client */ - error = nfs4_init_server(server, data); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs4_create_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_server() = error %d\n", error); - return ERR_PTR(error); -} - -/* - * Create an NFS4 referral server record - */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) -{ - struct nfs_client *parent_client; - struct nfs_server *server, *parent_server; - int error; - - dprintk("--> nfs4_create_referral_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - parent_server = NFS_SB(data->sb); - parent_client = parent_server->nfs_client; - - /* Initialise the client representation from the parent server */ - nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; - - /* Get a client representation. - * Note: NFSv4 always uses TCP, */ - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, - parent_client->cl_ipaddr, - data->authflavor, - rpc_protocol(parent_server->client), - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_net); - if (error < 0) - goto error; - - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs_create_referral_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_referral_server() = error %d\n", error); - return ERR_PTR(error); -} - -#endif /* CONFIG_NFS_V4 */ +EXPORT_SYMBOL_GPL(nfs_create_server); /* * Clone an NFS2, NFS3 or NFS4 server record @@ -1780,8 +1137,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, flavor); if (error < 0) goto out_free_server; - if (!IS_ERR(source->client_acl)) - nfs_init_server_aclclient(server); /* probe the filesystem info for this server filesystem */ error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); @@ -1812,6 +1167,7 @@ out_free_server: dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_clone_server); void nfs_clients_init(struct net *net) { @@ -1819,7 +1175,7 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); @@ -2091,7 +1447,3 @@ void nfs_fs_proc_exit(void) } #endif /* CONFIG_PROC_FS */ - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bd3a9601d32..81c5eec3cf3 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -47,7 +47,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t flags) { struct nfs_delegation *delegation; int ret = 0; @@ -388,7 +388,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) * * Returns zero on success, or a negative errno value. */ -int nfs_inode_return_delegation(struct inode *inode) +int nfs4_inode_return_delegation(struct inode *inode) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); @@ -417,9 +417,8 @@ static void nfs_mark_return_delegation(struct nfs_server *server, * @sb: sb to process * */ -void nfs_super_return_all_delegations(struct super_block *sb) +void nfs_server_return_all_delegations(struct nfs_server *server) { - struct nfs_server *server = NFS_SB(sb); struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 72709c4193f..bbc6a4dba0d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -8,7 +8,7 @@ #ifndef FS_NFS_DELEGATION_H #define FS_NFS_DELEGATION_H -#if defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V4) /* * NFSv4 delegation */ @@ -33,12 +33,12 @@ enum { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); -void nfs_super_return_all_delegations(struct super_block *sb); +void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); @@ -56,24 +56,13 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs_have_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t flags); -#else -static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) -{ - return 0; -} - -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} #endif static inline int nfs_have_delegated_attributes(struct inode *inode) { - return nfs_have_delegation(inode, FMODE_READ) && + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057ff3b..1b2d7eb9379 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -17,6 +17,7 @@ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ +#include <linux/module.h> #include <linux/time.h> #include <linux/errno.h> #include <linux/stat.h> @@ -46,16 +47,6 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); -static int nfs_mkdir(struct inode *, struct dentry *, umode_t); -static int nfs_rmdir(struct inode *, struct dentry *); -static int nfs_unlink(struct inode *, struct dentry *); -static int nfs_symlink(struct inode *, struct dentry *, const char *); -static int nfs_link(struct dentry *, struct inode *, struct dentry *); -static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); -static int nfs_rename(struct inode *, struct dentry *, - struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct page*); @@ -69,71 +60,10 @@ const struct file_operations nfs_dir_operations = { .fsync = nfs_fsync_dir, }; -const struct inode_operations nfs_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_V3 */ - -#ifdef CONFIG_NFS_V4 - -static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd); -const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_open_create, - .lookup = nfs_atomic_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .getxattr = generic_getxattr, - .setxattr = generic_setxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -}; - -#endif /* CONFIG_NFS_V4 */ - static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) { struct nfs_open_dir_context *ctx; @@ -520,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_refresh_inode(dentry->d_inode, entry->fattr); goto out; } else { - d_drop(dentry); + if (d_invalidate(dentry) != 0) + goto out; dput(dentry); } } @@ -940,7 +871,7 @@ out: return res; } -static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -949,10 +880,10 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - offset, origin); + offset, whence); mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case 1: offset += filp->f_pos; case 0: @@ -1006,6 +937,7 @@ void nfs_force_lookup_revalidate(struct inode *dir) { NFS_I(dir)->cache_change_attribute++; } +EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); /* * A check for whether or not the parent directory has changed. @@ -1029,27 +961,14 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) } /* - * Return the intent data that applies to this particular path component - * - * Note that the current set of intents only apply to the very last - * component of the path and none of them is set before that last - * component. - */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, - unsigned int mask) -{ - return nd->flags & mask; -} - -/* * Use intent information to check whether or not we're going to do * an O_EXCL create using this path component. */ -static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); + return flags & LOOKUP_EXCL; } /* @@ -1060,28 +979,28 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) * particular file and the "nocto" mount flag is not set. * */ -static inline -int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +static +int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); + int ret; if (IS_AUTOMOUNT(inode)) return 0; - if (nd != NULL) { - /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) - goto out_force; - /* This is an open(2) */ - if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) - goto out_force; - return 0; - } - return nfs_revalidate_inode(server, inode); + /* VFS wants an on-the-wire revalidation */ + if (flags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + goto out_force; +out: + return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: - return __nfs_revalidate_inode(server, inode); + ret = __nfs_revalidate_inode(server, inode); + if (ret != 0) + return ret; + goto out; } /* @@ -1093,10 +1012,10 @@ out_force: */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) + if (flags & LOOKUP_CREATE) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1114,7 +1033,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *dir; struct inode *inode; @@ -1123,7 +1042,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); @@ -1132,7 +1051,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, nd)) + if (nfs_neg_need_reval(dir, dentry, flags)) goto out_bad; goto out_valid_noent; } @@ -1144,12 +1063,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_bad; } - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, nd)) + if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) goto out_zap_parent; goto out_valid; } @@ -1187,6 +1106,8 @@ out_set_verifier: out_zap_parent: nfs_zap_caches(dir); out_bad: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ @@ -1199,8 +1120,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, @@ -1242,11 +1161,14 @@ static int nfs_dentry_delete(const struct dentry *dentry) } +/* Ensure that we revalidate inode->i_nlink */ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); - if (inode->i_nlink > 0) - drop_nlink(inode); + /* drop the inode if we're reasonably sure this is the last link */ + if (inode->i_nlink == 1) + clear_nlink(inode); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } @@ -1261,8 +1183,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - drop_nlink(inode); nfs_complete_unlink(dentry, inode); + nfs_drop_nlink(inode); } iput(inode); } @@ -1285,8 +1207,9 @@ const struct dentry_operations nfs_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs_dentry_operations); -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1307,7 +1230,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, nd)) { + if (nfs_is_exclusive_create(dir, flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1352,9 +1275,10 @@ out: nfs_free_fhandle(fhandle); return res; } +EXPORT_SYMBOL_GPL(nfs_lookup); -#ifdef CONFIG_NFS_V4 -static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); +#if IS_ENABLED(CONFIG_NFS_V4) +static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -1363,24 +1287,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; - -/* - * Use intent information to determine whether we need to substitute - * the NFSv4-style stateful OPEN for the LOOKUP call - */ -static int is_atomic_open(struct nameidata *nd) -{ - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) - return 0; - /* NFS does not (yet) have a stateful open for directories */ - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ - if (__mnt_is_readonly(nd->path.mnt) && - (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) - return 0; - return 1; -} +EXPORT_SYMBOL_GPL(nfs4_dentry_operations); static fmode_t flags_to_mode(int flags) { @@ -1403,136 +1310,144 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +static int nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct file *file, unsigned open_flags, + int *opened) { - struct file *filp; - int ret = 0; + int err; + + if (ctx->dentry != dentry) { + dput(ctx->dentry); + ctx->dentry = dget(dentry); + } /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { - ret = nfs_may_open(ctx->dentry->d_inode, - ctx->cred, - nd->intent.open.flags); - if (ret < 0) + err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); + if (err < 0) goto out; } - filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); - if (IS_ERR(filp)) - ret = PTR_ERR(filp); - else - nfs_file_set_open_context(filp, ctx); + + err = finish_open(file, dentry, do_open, opened); + if (err) + goto out; + nfs_file_set_open_context(file, ctx); + out: put_nfs_open_context(ctx); - return ret; + return err; } -static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; - struct iattr attr; - struct dentry *res = NULL; + struct dentry *res; + struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - int open_flags; int err; - dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + /* Expect a negative dentry */ + BUG_ON(dentry->d_inode); + + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - /* Check that we are indeed trying to open this file */ - if (!is_atomic_open(nd)) + /* NFS only supports OPEN on regular files */ + if ((open_flags & O_DIRECTORY)) { + if (!d_unhashed(dentry)) { + /* + * Hashed negative dentry with O_DIRECTORY: dentry was + * revalidated and is fine, no need to perform lookup + * again + */ + return -ENOENT; + } goto no_open; - - if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { - res = ERR_PTR(-ENAMETOOLONG); - goto out; - } - - /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash - * the dentry. */ - if (nd->flags & LOOKUP_EXCL) { - d_instantiate(dentry, NULL); - goto out; } - open_flags = nd->intent.open.flags; - attr.ia_valid = ATTR_OPEN; - - ctx = create_nfs_open_context(dentry, open_flags); - res = ERR_CAST(ctx); - if (IS_ERR(ctx)) - goto out; + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + return -ENAMETOOLONG; - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; + if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; - attr.ia_mode &= ~current_umask(); - } else - open_flags &= ~(O_EXCL | O_CREAT); - + attr.ia_mode = mode & ~current_umask(); + } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; } - /* Open the file on the server */ + ctx = create_nfs_open_context(dentry, open_flags); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + d_drop(dentry); if (IS_ERR(inode)) { nfs_unblock_sillyrename(dentry->d_parent); put_nfs_open_context(ctx); - switch (PTR_ERR(inode)) { - /* Make a negative dentry */ - case -ENOENT: - d_add(dentry, NULL); - res = NULL; - goto out; - /* This turned out not to be a regular file */ - case -EISDIR: - case -ENOTDIR: + err = PTR_ERR(inode); + switch (err) { + case -ENOENT: + d_add(dentry, NULL); + break; + case -EISDIR: + case -ENOTDIR: + goto no_open; + case -ELOOP: + if (!(open_flags & O_NOFOLLOW)) goto no_open; - case -ELOOP: - if (!(nd->intent.open.flags & O_NOFOLLOW)) - goto no_open; + break; /* case -EINVAL: */ - default: - res = ERR_CAST(inode); - goto out; + default: + break; } + goto out; } res = d_add_unique(dentry, inode); - nfs_unblock_sillyrename(dentry->d_parent); - if (res != NULL) { - dput(ctx->dentry); - ctx->dentry = dget(res); + if (res != NULL) dentry = res; - } - err = nfs_intent_set_file(nd, ctx); - if (err < 0) { - if (res != NULL) - dput(res); - return ERR_PTR(err); - } -out: + + nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - return res; + + err = nfs_finish_open(ctx, dentry, file, open_flags, opened); + + dput(res); +out: + return err; + no_open: - return nfs_lookup(dir, dentry, nd); + res = nfs_lookup(dir, dentry, 0); + err = PTR_ERR(res); + if (IS_ERR(res)) + goto out; + + return finish_no_open(file, res); } +EXPORT_SYMBOL_GPL(nfs_atomic_open); -static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - int openflags, ret = 0; + int ret = 0; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; - inode = dentry->d_inode; - if (!is_atomic_open(nd) || d_mountpoint(dentry)) + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) + goto no_open; + if (d_mountpoint(dentry)) goto no_open; + inode = dentry->d_inode; parent = dget_parent(dentry); dir = parent->d_inode; @@ -1540,7 +1455,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) * optimize away revalidation of negative dentries. */ if (inode == NULL) { - if (!nfs_neg_need_reval(dir, dentry, nd)) + if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; goto out; } @@ -1548,9 +1463,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open_dput; - openflags = nd->intent.open.flags; /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + if (flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ @@ -1563,48 +1477,7 @@ out: no_open_dput: dput(parent); no_open: - return nfs_lookup_revalidate(dentry, nd); -} - -static int nfs_open_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) -{ - struct nfs_open_context *ctx = NULL; - struct iattr attr; - int error; - int open_flags = O_CREAT|O_EXCL; - - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - - attr.ia_mode = mode; - attr.ia_valid = ATTR_MODE; - - if (nd) - open_flags = nd->intent.open.flags; - - ctx = create_nfs_open_context(dentry, open_flags); - error = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out_err_drop; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); - if (error != 0) - goto out_put_ctx; - if (nd) { - error = nfs_intent_set_file(nd, ctx); - if (error < 0) - goto out_err; - } else { - put_nfs_open_context(ctx); - } - return 0; -out_put_ctx: - put_nfs_open_context(ctx); -out_err_drop: - d_drop(dentry); -out_err: - return error; + return nfs_lookup_revalidate(dentry, flags); } #endif /* CONFIG_NFSV4 */ @@ -1650,6 +1523,7 @@ out_error: dput(parent); return error; } +EXPORT_SYMBOL_GPL(nfs_instantiate); /* * Following a failed create operation, we drop the dentry rather @@ -1657,12 +1531,12 @@ out_error: * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) +int nfs_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) { struct iattr attr; + int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1670,10 +1544,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd) - open_flags = nd->intent.open.flags; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; return 0; @@ -1681,11 +1552,12 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_create); /* * See comments for nfs_proc_create regarding failed operations. */ -static int +int nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; @@ -1708,11 +1580,12 @@ out_err: d_drop(dentry); return status; } +EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ -static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; int error; @@ -1731,6 +1604,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { @@ -1738,7 +1612,7 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry) d_delete(dentry); } -static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -1754,6 +1628,7 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } +EXPORT_SYMBOL_GPL(nfs_rmdir); /* * Remove a file after making sure there are no pending writes, @@ -1778,12 +1653,10 @@ static int nfs_safe_remove(struct dentry *dentry) } if (inode != NULL) { - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - /* The VFS may want to delete this inode */ if (error == 0) nfs_drop_nlink(inode); - nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) @@ -1797,7 +1670,7 @@ out: * * If sillyrename() returns 0, we do nothing, otherwise we unlink. */ -static int nfs_unlink(struct inode *dir, struct dentry *dentry) +int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; int need_rehash = 0; @@ -1825,6 +1698,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) d_rehash(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_unlink); /* * To create a symbolic link, most file systems instantiate a new inode, @@ -1841,7 +1715,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct pagevec lru_pvec; struct page *page; @@ -1895,8 +1769,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym return 0; } +EXPORT_SYMBOL_GPL(nfs_symlink); -static int +int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; @@ -1906,7 +1781,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); @@ -1916,6 +1791,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } return error; } +EXPORT_SYMBOL_GPL(nfs_link); /* * RENAME @@ -1941,7 +1817,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) * If these conditions are met, we can drop the dentries before doing * the rename. */ -static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; @@ -1990,9 +1866,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - nfs_inode_return_delegation(old_inode); + NFS_PROTO(old_inode)->return_delegation(old_inode); if (new_inode != NULL) - nfs_inode_return_delegation(new_inode); + NFS_PROTO(new_inode)->return_delegation(new_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); @@ -2014,6 +1890,7 @@ out: dput(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_rename); static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); @@ -2114,6 +1991,7 @@ void nfs_access_zap_cache(struct inode *inode) spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); } +EXPORT_SYMBOL_GPL(nfs_access_zap_cache); static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) { @@ -2201,7 +2079,7 @@ found: nfs_access_free_entry(entry); } -static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -2227,6 +2105,20 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s spin_unlock(&nfs_access_lru_lock); } } +EXPORT_SYMBOL_GPL(nfs_access_add_cache); + +void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) +{ + entry->mask = 0; + if (access_result & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (access_result & + (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; +} +EXPORT_SYMBOL_GPL(nfs_access_set_mask); static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { @@ -2261,12 +2153,16 @@ static int nfs_open_permission_mask(int openflags) { int mask = 0; - if ((openflags & O_ACCMODE) != O_WRONLY) - mask |= MAY_READ; - if ((openflags & O_ACCMODE) != O_RDONLY) - mask |= MAY_WRITE; - if (openflags & __FMODE_EXEC) - mask |= MAY_EXEC; + if (openflags & __FMODE_EXEC) { + /* ONLY check exec rights */ + mask = MAY_EXEC; + } else { + if ((openflags & O_ACCMODE) != O_WRONLY) + mask |= MAY_READ; + if ((openflags & O_ACCMODE) != O_RDONLY) + mask |= MAY_WRITE; + } + return mask; } @@ -2274,6 +2170,7 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } +EXPORT_SYMBOL_GPL(nfs_may_open); int nfs_permission(struct inode *inode, int mask) { @@ -2333,6 +2230,7 @@ out_notsup: res = generic_permission(inode, mask); goto out; } +EXPORT_SYMBOL_GPL(nfs_permission); /* * Local variables: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 48253372ab1..0bd7a55a5f0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -46,6 +46,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/task_io_accounting_ops.h> +#include <linux/module.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> @@ -78,6 +79,7 @@ struct nfs_direct_req { atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ ssize_t count, /* bytes actually processed */ + bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -115,17 +117,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * @nr_segs: size of iovec array * * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, we shunt off direct - * read and write requests before the VFS gets them, so this method - * should never be called. + * the NFS client supports direct I/O. However, for most direct IO, we + * shunt off direct read and write requests before the VFS gets them, + * so this method is only ever called for swap. */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { +#ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, (long long) pos, nr_segs); return -EINVAL; +#else + VM_BUG_ON(iocb->ki_left != PAGE_SIZE); + VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + + if (rw == READ || rw == KERNEL_READ) + return nfs_file_direct_read(iocb, iov, nr_segs, pos, + rw == READ ? true : false); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, + rw == WRITE ? true : false); +#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -179,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +{ + return dreq->bytes_left; +} +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); + /* * Collects and returns the final error value/byte-count. */ @@ -247,21 +266,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } - if (!PageCompound(page)) { - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - if (bytes < hdr->good_bytes) - set_page_dirty(page); - } else - set_page_dirty(page); - } + if (!PageCompound(page) && bytes < hdr->good_bytes) + set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); nfs_direct_readpage_release(req); @@ -303,7 +309,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { */ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -331,12 +337,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de GFP_KERNEL); if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 1, pagevec); + if (WARN_ON(result != 1)) + break; + } + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { @@ -371,6 +385,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -386,21 +401,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_read(&desc, dreq->inode, + NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -426,28 +441,33 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (dreq == NULL) goto out; dreq->inode = inode; + dreq->bytes_left = iov_length(iov, nr_segs); dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + NFS_I(inode)->read_io += iov_length(iov, nr_segs); + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); - NFS_I(inode)->read_io += result; out_release: nfs_direct_req_release(dreq); out: @@ -460,7 +480,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -478,7 +498,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -610,7 +630,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -638,12 +658,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + npages, 0, 0, pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 0, pagevec); + if (WARN_ON(result != 1)) + break; + } if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; @@ -680,6 +707,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -774,7 +802,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -782,15 +810,16 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, + NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); atomic_inc(&inode->i_dio_count); + NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos); + result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -799,7 +828,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } nfs_pageio_complete(&desc); - NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; /* * If no bytes were started, return the error, and let the @@ -818,25 +846,30 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, - size_t count) + size_t count, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (!dreq) goto out; dreq->inode = inode; + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -867,7 +900,7 @@ out: * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -892,7 +925,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, task_io_account_read(count); - retval = nfs_direct_read(iocb, iov, nr_segs, pos); + retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); if (retval > 0) iocb->ki_pos = pos + retval; @@ -923,7 +956,7 @@ out: * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -955,7 +988,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); if (retval > 0) { struct inode *inode = mapping->host; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index b3924b8a600..ca4b11ec87a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -8,6 +8,7 @@ #ifdef CONFIG_NFS_USE_KERNEL_DNS +#include <linux/module.h> #include <linux/sunrpc/clnt.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -27,9 +28,11 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, kfree(ip_addr); return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #else +#include <linux/module.h> #include <linux/hash.h> #include <linux/string.h> #include <linux/kmod.h> @@ -214,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) { char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; struct nfs_dns_ent key, *item; - unsigned long ttl; + unsigned int ttl; ssize_t len; int ret = -EINVAL; @@ -237,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) key.namelen = len; memset(&key.h, 0, sizeof(key.h)); - ttl = get_expiry(&buf); + if (get_uint(&buf, &ttl) < 0) + goto out; if (ttl == 0) goto out; key.h.expiry_time = ttl + seconds_since_boot(); @@ -345,6 +349,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = -ESRCH; return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); int nfs_dns_resolver_cache_init(struct net *net) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a6708e6b438..3c2b893665b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -16,6 +16,7 @@ * nfs regular file handling functions */ +#include <linux/module.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -35,42 +36,24 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" -#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE static const struct vm_operations_struct nfs_file_vm_ops; -const struct inode_operations nfs_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_v3 */ - /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) #endif -static int nfs_check_flags(int flags) +int nfs_check_flags(int flags) { if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(nfs_check_flags); /* * Open file @@ -93,7 +76,7 @@ nfs_file_open(struct inode *inode, struct file *filp) return res; } -static int +int nfs_file_release(struct inode *inode, struct file *filp) { dprintk("NFS: release(%s/%s)\n", @@ -103,6 +86,7 @@ nfs_file_release(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } +EXPORT_SYMBOL_GPL(nfs_file_release); /** * nfs_revalidate_size - Revalidate the file size @@ -135,18 +119,18 @@ force_reval: return __nfs_revalidate_inode(server, inode); } -static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, - offset, origin); + offset, whence); /* - * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length */ - if (origin != SEEK_SET && origin != SEEK_CUR) { + if (whence != SEEK_SET && whence != SEEK_CUR) { struct inode *inode = filp->f_mapping->host; int retval = nfs_revalidate_file_size(inode, filp); @@ -154,13 +138,14 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return (loff_t)retval; } - return generic_file_llseek(filp, offset, origin); + return generic_file_llseek(filp, offset, whence); } +EXPORT_SYMBOL_GPL(nfs_file_llseek); /* * Flush all dirty pages, and check for write errors. */ -static int +int nfs_file_flush(struct file *file, fl_owner_t id) { struct dentry *dentry = file->f_path.dentry; @@ -178,14 +163,15 @@ nfs_file_flush(struct file *file, fl_owner_t id) * If we're holding a write delegation, then just start the i/o * but don't wait for completion (or send a commit). */ - if (nfs_have_delegation(inode, FMODE_WRITE)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return filemap_fdatawrite(file->f_mapping); /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } +EXPORT_SYMBOL_GPL(nfs_file_flush); -static ssize_t +ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -194,7 +180,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -208,8 +194,9 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, } return result; } +EXPORT_SYMBOL_GPL(nfs_file_read); -static ssize_t +ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) @@ -230,8 +217,9 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, } return res; } +EXPORT_SYMBOL_GPL(nfs_file_splice_read); -static int +int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { struct dentry *dentry = file->f_path.dentry; @@ -251,6 +239,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) } return status; } +EXPORT_SYMBOL_GPL(nfs_file_mmap); /* * Flush any dirty pages for this process, and check for write errors. @@ -264,36 +253,63 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * nfs_file_write() that a write error occurred, and hence cause it to * fall back to doing a synchronous write. */ -static int -nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +int +nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int have_error, status; + int have_error, do_resend, status; int ret = 0; dprintk("NFS: fsync file(%s/%s) datasync %d\n", dentry->d_parent->d_name.name, dentry->d_name.name, datasync); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status >= 0 && ret < 0) - status = ret; have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) + if (have_error) { ret = xchg(&ctx->error, 0); - if (!ret && status < 0) + if (ret) + goto out; + } + if (status < 0) { ret = status; - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); - mutex_unlock(&inode->i_mutex); + goto out; + } + do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); + if (do_resend) + ret = -EAGAIN; +out: + return ret; +} +EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); + +static int +nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + mutex_unlock(&inode->i_mutex); + /* + * If nfs_file_fsync_commit detected a server reboot, then + * resend all dirty pages that might have been covered by + * the NFS_CONTEXT_RESEND_WRITES flag + */ + start = 0; + end = LLONG_MAX; + } while (ret == -EAGAIN); + return ret; } @@ -442,7 +458,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_cancel(page->mapping->host, page); + nfs_wb_page_cancel(page_file_mapping(page)->host, page); nfs_fscache_invalidate_page(page, page->mapping->host); } @@ -459,8 +475,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ @@ -484,7 +503,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) */ static int nfs_launder_page(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", @@ -494,6 +513,20 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } +#ifdef CONFIG_NFS_SWAP +static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + *span = sis->pages; + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); +} + +static void nfs_swap_deactivate(struct file *file) +{ + xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); +} +#endif + const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -508,6 +541,10 @@ const struct address_space_operations nfs_file_aops = { .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, .error_remove_page = generic_error_remove_page, +#ifdef CONFIG_NFS_SWAP + .swap_activate = nfs_swap_activate, + .swap_deactivate = nfs_swap_deactivate, +#endif }; /* @@ -533,7 +570,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); lock_page(page); - mapping = page->mapping; + mapping = page_file_mapping(page); if (mapping != dentry->d_inode->i_mapping) goto out_unlock; @@ -558,6 +595,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nfs_need_sync_write(struct file *filp, struct inode *inode) @@ -572,8 +610,8 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) return 0; } -static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; @@ -582,7 +620,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -623,10 +661,11 @@ out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); goto out; } +EXPORT_SYMBOL_GPL(nfs_file_write); -static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) +ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -654,6 +693,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_splice_write); static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) @@ -670,7 +710,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->fl_type = saved_type; - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) goto out_noconflict; if (is_local) @@ -765,7 +805,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) { + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { if (is_time_granular(&NFS_SERVER(inode)->time_delta)) __nfs_revalidate_inode(NFS_SERVER(inode), inode); else @@ -778,7 +818,7 @@ out: /* * Lock a (portion of) a file */ -static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; @@ -814,11 +854,12 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) out_err: return ret; } +EXPORT_SYMBOL_GPL(nfs_lock); /* * Lock a (portion of) a file */ -static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int is_local = 0; @@ -831,6 +872,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + /* + * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of + * any standard. In principle we might be able to support LOCK_MAND + * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the + * NFS code is not set up for it. + */ + if (fl->fl_type & LOCK_MAND) + return -EINVAL; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; @@ -843,18 +893,20 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } +EXPORT_SYMBOL_GPL(nfs_flock); /* * There is no protocol support for leases, so we have no way to implement * them correctly in the face of opens by other clients. */ -static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { dprintk("NFS: setlease(%s/%s, arg=%ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, arg); return -EINVAL; } +EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, @@ -874,104 +926,4 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; - -#ifdef CONFIG_NFS_V4 -static int -nfs4_file_open(struct inode *inode, struct file *filp) -{ - struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; - struct dentry *parent = NULL; - struct inode *dir; - unsigned openflags = filp->f_flags; - struct iattr attr; - int err; - - BUG_ON(inode != dentry->d_inode); - /* - * If no cached dentry exists or if it's negative, NFSv4 handled the - * opens in ->lookup() or ->create(). - * - * We only get this far for a cached positive dentry. We skipped - * revalidation, so handle it here by dropping the dentry and returning - * -EOPENSTALE. The VFS will retry the lookup/create/open. - */ - - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); - - if ((openflags & O_ACCMODE) == 3) - openflags--; - - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - parent = dget_parent(dentry); - dir = parent->d_inode; - - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); - err = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - switch (err) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; - - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs_file_set_open_context(filp, ctx); - err = 0; - -out_put_ctx: - put_nfs_open_context(ctx); -out: - dput(parent); - return err; - -out_drop: - d_drop(dentry); - err = -EOPENSTALE; - goto out_put_ctx; -} - -const struct file_operations nfs4_file_operations = { - .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, - .mmap = nfs_file_mmap, - .open = nfs4_file_open, - .flush = nfs_file_flush, - .release = nfs_file_release, - .fsync = nfs_file_fsync, - .lock = nfs_lock, - .flock = nfs_flock, - .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, - .check_flags = nfs_check_flags, - .setlease = nfs_setlease, -}; -#endif /* CONFIG_NFS_V4 */ +EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c817787fbdb..24d1d1c5fca 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -307,6 +307,7 @@ void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) nfs_fscache_inode_unlock(inode); } } +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); /* * Replace a per-inode cookie due to revalidation detecting a file having diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index c5b11b53ff3..4ecb76652eb 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -153,6 +153,22 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, } /* + * Invalidate the contents of fscache for this inode. This will not sleep. + */ +static inline void nfs_fscache_invalidate(struct inode *inode) +{ + fscache_invalidate(NFS_I(inode)->fscache); +} + +/* + * Wait for an object to finish being invalidated. + */ +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) +{ + fscache_wait_on_invalidate(NFS_I(inode)->fscache); +} + +/* * indicate the client caching state as readable text */ static inline const char *nfs_server_fscache_state(struct nfs_server *server) @@ -162,7 +178,6 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server) return "no "; } - #else /* CONFIG_NFS_FSCACHE */ static inline int nfs_fscache_register(void) { return 0; } static inline void nfs_fscache_unregister(void) {} @@ -205,6 +220,10 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} + +static inline void nfs_fscache_invalidate(struct inode *inode) {} +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {} + static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8abfb19bd3a..033803c3664 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -23,19 +23,15 @@ #include <linux/sunrpc/stats.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> -#include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> #include <linux/seq_file.h> #include <linux/mount.h> -#include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/namei.h> #include <linux/security.h> #include <asm/uaccess.h> -#include "nfs4_fs.h" -#include "delegation.h" #include "internal.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -62,7 +58,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - list_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } @@ -135,47 +131,3 @@ out: nfs_free_fattr(fsinfo.fattr); return ret; } - -#ifdef CONFIG_NFS_V4 - -int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) -{ - struct nfs_fsinfo fsinfo; - int ret = -ENOMEM; - - dprintk("--> nfs4_get_rootfh()\n"); - - fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) - goto out; - - /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); - if (ret < 0) { - dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); - goto out; - } - - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) - || !S_ISDIR(fsinfo.fattr->mode)) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot encountered non-directory\n"); - ret = -ENOTDIR; - goto out; - } - - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); -out: - nfs_free_fattr(fsinfo.fattr); - dprintk("<-- nfs4_get_rootfh() = %d\n", ret); - return ret; -} - -#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 864c51e4b40..bc3968fa81e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,14 +52,19 @@ #define NFS_UINT_MAXLEN 11 -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600; static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; +struct idmap_legacy_upcalldata { + struct rpc_pipe_msg pipe_msg; + struct idmap_msg idmap_msg; + struct key_construction *key_cons; + struct idmap *idmap; +}; + struct idmap { struct rpc_pipe *idmap_pipe; - struct key_construction *idmap_key_cons; + struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; }; @@ -154,7 +159,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re return 0; memcpy(buf, name, namelen); buf[namelen] = '\0'; - if (strict_strtoul(buf, 0, &val) != 0) + if (kstrtoul(buf, 0, &val) != 0) return 0; *res = val; return 1; @@ -188,22 +193,22 @@ static int nfs_idmap_init_keyring(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + ret = register_key_type(&key_type_id_resolver); if (ret < 0) goto failed_put_key; - ret = register_key_type(&key_type_id_resolver); + ret = register_key_type(&key_type_id_resolver_legacy); if (ret < 0) - goto failed_put_key; + goto failed_reg_legacy; set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; @@ -211,6 +216,8 @@ static int nfs_idmap_init_keyring(void) id_resolver_cache = cred; return 0; +failed_reg_legacy: + unregister_key_type(&key_type_id_resolver); failed_put_key: key_put(keyring); failed_put_cred: @@ -222,6 +229,7 @@ static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); + unregister_key_type(&key_type_id_resolver_legacy); put_cred(id_resolver_cache); } @@ -352,14 +360,13 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ if (data_size <= 0) { ret = -EINVAL; } else { - ret = strict_strtol(id_str, 10, &id_long); + ret = kstrtol(id_str, 10, &id_long); *id = (__u32)id_long; } return ret; } /* idmap classic begins here */ -module_param(nfs_idmap_cache_timeout, int, 0644); enum { Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err @@ -376,16 +383,18 @@ static const match_table_t nfs_idmap_tokens = { static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); +static void idmap_release_pipe(struct inode *); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, + .release_pipe = idmap_release_pipe, .destroy_msg = idmap_pipe_destroy_msg, }; static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", + .name = "id_legacy", .instantiate = user_instantiate, .match = user_match, .revoke = user_revoke, @@ -452,8 +461,6 @@ nfs_idmap_new(struct nfs_client *clp) struct rpc_pipe *pipe; int error; - BUG_ON(clp->cl_idmap != NULL); - idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); if (idmap == NULL) return -ENOMEM; @@ -497,7 +504,6 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, switch (event) { case RPC_PIPEFS_MOUNT: - BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, clp->cl_idmap, clp->cl_idmap->idmap_pipe); @@ -612,15 +618,13 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, +static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, + struct idmap_msg *im, struct rpc_pipe_msg *msg) { substring_t substr; int token, ret; - memset(im, 0, sizeof(*im)); - memset(msg, 0, sizeof(*msg)); - im->im_type = IDMAP_TYPE_GROUP; token = match_token(desc, nfs_idmap_tokens, &substr); @@ -651,10 +655,40 @@ out: return ret; } +static bool +nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, + struct idmap_legacy_upcalldata *data) +{ + if (idmap->idmap_upcall_data != NULL) { + WARN_ON_ONCE(1); + return false; + } + idmap->idmap_upcall_data = data; + return true; +} + +static void +nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) +{ + struct key_construction *cons = idmap->idmap_upcall_data->key_cons; + + kfree(idmap->idmap_upcall_data); + idmap->idmap_upcall_data = NULL; + complete_request_key(cons, ret); +} + +static void +nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) +{ + if (idmap->idmap_upcall_data != NULL) + nfs_idmap_complete_pipe_upcall_locked(idmap, ret); +} + static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) { + struct idmap_legacy_upcalldata *data; struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; @@ -662,33 +696,32 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - goto out0; - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (!im) + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) goto out1; - ret = nfs_idmap_prepare_message(key->description, im, msg); + msg = &data->pipe_msg; + im = &data->idmap_msg; + data->idmap = idmap; + data->key_cons = cons; + + ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) goto out2; - idmap->idmap_key_cons = cons; + ret = -EAGAIN; + if (!nfs_idmap_prepare_pipe_upcall(idmap, data)) + goto out2; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - goto out2; + nfs_idmap_abort_pipe_upcall(idmap, ret); return ret; - out2: - kfree(im); + kfree(data); out1: - kfree(msg); -out0: - key_revoke(cons->key); - key_revoke(cons->authkey); + complete_request_key(cons, ret); return ret; } @@ -699,21 +732,32 @@ static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *dat authkey); } -static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) +static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, + struct idmap_msg *upcall, + struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; - int ret = -EINVAL; + int ret = -ENOKEY; + /* ret = -ENOKEY */ + if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv) + goto out; switch (im->im_conv) { case IDMAP_CONV_NAMETOID: + if (strcmp(upcall->im_name, im->im_name) != 0) + break; sprintf(id_str, "%d", im->im_id); ret = nfs_idmap_instantiate(key, authkey, id_str); break; case IDMAP_CONV_IDTONAME: + if (upcall->im_id != im->im_id) + break; ret = nfs_idmap_instantiate(key, authkey, im->im_name); break; + default: + ret = -EINVAL; } - +out: return ret; } @@ -722,10 +766,19 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons; struct idmap_msg im; size_t namelen_in; - int ret; + int ret = -ENOKEY; + + /* If instantiation is successful, anyone waiting for key construction + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ + if (idmap->idmap_upcall_data == NULL) + goto out_noupcall; + + cons = idmap->idmap_upcall_data->key_cons; if (mlen != sizeof(im)) { ret = -ENOSPC; @@ -738,35 +791,49 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { ret = -EINVAL; goto out; - } +} - ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); + ret = nfs_idmap_read_and_verify_message(&im, + &idmap->idmap_upcall_data->idmap_msg, + cons->key, cons->authkey); if (ret >= 0) { key_set_timeout(cons->key, nfs_idmap_cache_timeout); ret = mlen; } out: - complete_request_key(idmap->idmap_key_cons, ret); -out_incomplete: + nfs_idmap_complete_pipe_upcall_locked(idmap, ret); +out_noupcall: return ret; } static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { - /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + struct idmap_legacy_upcalldata *data = container_of(msg, + struct idmap_legacy_upcalldata, + pipe_msg); + struct idmap *idmap = data->idmap; + + if (msg->errno) + nfs_idmap_abort_pipe_upcall(idmap, msg->errno); +} + +static void +idmap_release_pipe(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + + nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f7296983eba..ebeb94ce1b0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -32,7 +32,6 @@ #include <linux/lockd/bind.h> #include <linux/seq_file.h> #include <linux/mount.h> -#include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> #include <linux/nfs_xdr.h> @@ -51,6 +50,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -82,6 +82,7 @@ int nfs_wait_bit_killable(void *word) freezable_schedule(); return 0; } +EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** * nfs_compat_user_ino64 - returns the user-visible inode number @@ -106,17 +107,24 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -static void nfs_clear_inode(struct inode *inode) +int nfs_drop_inode(struct inode *inode) +{ + return NFS_STALE(inode) || generic_drop_inode(inode); +} +EXPORT_SYMBOL_GPL(nfs_drop_inode); + +void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... */ - BUG_ON(nfs_have_writebacks(inode)); - BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + WARN_ON_ONCE(nfs_have_writebacks(inode)); + WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); nfs_fscache_release_inode_cookie(inode); } +EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { @@ -152,11 +160,13 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - else + nfs_fscache_invalidate(inode); + } else { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + } } void nfs_zap_caches(struct inode *inode) @@ -171,6 +181,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); spin_unlock(&inode->i_lock); } } @@ -186,6 +197,7 @@ void nfs_zap_acl_cache(struct inode *inode) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { @@ -193,6 +205,7 @@ void nfs_invalidate_atime(struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* * Invalidate, but do not unhash, the inode. @@ -391,6 +404,7 @@ out_no_inode: dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } +EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) @@ -430,7 +444,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) * Return any delegations if we're going to change ACLs */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) nfs_refresh_inode(inode, fattr); @@ -438,6 +452,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) out: return error; } +EXPORT_SYMBOL_GPL(nfs_setattr); /** * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall @@ -496,6 +511,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) nfs_vmtruncate(inode, attr->ia_size); } } +EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -535,12 +551,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) out: return err; } +EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { atomic_set(&l_ctx->count, 1); - l_ctx->lockowner = current->files; - l_ctx->pid = current->tgid; + l_ctx->lockowner.l_owner = current->files; + l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); } @@ -549,9 +566,9 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context struct nfs_lock_context *pos; list_for_each_entry(pos, &ctx->lock_context.list, list) { - if (pos->lockowner != current->files) + if (pos->lockowner.l_owner != current->files) continue; - if (pos->pid != current->tgid) + if (pos->lockowner.l_pid != current->tgid) continue; atomic_inc(&pos->count); return pos; @@ -570,7 +587,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) spin_unlock(&inode->i_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) - return NULL; + return ERR_PTR(-ENOMEM); nfs_init_lock_context(new); spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); @@ -623,6 +640,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_close_context); struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) { @@ -649,6 +667,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mdsthreshold = NULL; return ctx; } +EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { @@ -656,6 +675,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) atomic_inc(&ctx->lock_context.count); return ctx; } +EXPORT_SYMBOL_GPL(get_nfs_open_context); static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { @@ -674,7 +694,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); - nfs_sb_deactive(sb); + if (is_sync) + nfs_sb_deactive(sb); + else + nfs_sb_deactive_async(sb); kfree(ctx->mdsthreshold); kfree(ctx); } @@ -683,6 +706,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 0); } +EXPORT_SYMBOL_GPL(put_nfs_open_context); /* * Ensure that mmap has a recent RPC credential for use when writing out @@ -698,6 +722,7 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics @@ -842,6 +867,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { @@ -858,7 +884,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - nfs_fscache_reset_inode_cookie(inode); + nfs_fscache_wait_on_invalidate(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -883,6 +909,10 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + goto out; + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) @@ -930,6 +960,10 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return ret; } @@ -1028,6 +1062,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->owner_name = NULL; fattr->group_name = NULL; } +EXPORT_SYMBOL_GPL(nfs_fattr_init); struct nfs_fattr *nfs_alloc_fattr(void) { @@ -1038,6 +1073,7 @@ struct nfs_fattr *nfs_alloc_fattr(void) nfs_fattr_init(fattr); return fattr; } +EXPORT_SYMBOL_GPL(nfs_alloc_fattr); struct nfs_fh *nfs_alloc_fhandle(void) { @@ -1048,6 +1084,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) fh->size = 0; return fh; } +EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); #ifdef NFS_DEBUG /* @@ -1168,14 +1205,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); + } if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); @@ -1204,6 +1244,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache @@ -1255,6 +1296,7 @@ out_noforce: spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after @@ -1457,10 +1499,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!nfs_have_delegation(inode, FMODE_READ) || + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + if (invalid & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return 0; out_err: /* @@ -1472,27 +1517,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) return -ESTALE; } - -#ifdef CONFIG_NFS_V4 - -/* - * Clean out any remaining NFSv4 state that might be left over due - * to open() calls that passed nfs_atomic_lookup, but failed to call - * nfs_open(). - */ -void nfs4_evict_inode(struct inode *inode) -{ - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); - pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation_noreclaim(inode); - /* First call standard NFS clear_inode() code */ - nfs_clear_inode(inode); -} -#endif - struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; @@ -1505,11 +1529,12 @@ struct inode *nfs_alloc_inode(struct super_block *sb) nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); #endif -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } +EXPORT_SYMBOL_GPL(nfs_alloc_inode); static void nfs_i_callback(struct rcu_head *head) { @@ -1521,10 +1546,11 @@ void nfs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, nfs_i_callback); } +EXPORT_SYMBOL_GPL(nfs_destroy_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) INIT_LIST_HEAD(&nfsi->open_states); nfsi->delegation = NULL; nfsi->delegation_state = 0; @@ -1566,10 +1592,16 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } struct workqueue_struct *nfsiod_workqueue; +EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* * start up the nfsiod workqueue @@ -1628,81 +1660,76 @@ static int __init init_nfs_fs(void) { int err; - err = nfs_idmap_init(); - if (err < 0) - goto out10; - err = nfs_dns_resolver_init(); if (err < 0) - goto out9; + goto out10;; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out8; + goto out9; err = nfs_fscache_register(); if (err < 0) - goto out7; + goto out8; err = nfsiod_start(); if (err) - goto out6; + goto out7; err = nfs_fs_proc_init(); if (err) - goto out5; + goto out6; err = nfs_init_nfspagecache(); if (err) - goto out4; + goto out5; err = nfs_init_inodecache(); if (err) - goto out3; + goto out4; err = nfs_init_readpagecache(); if (err) - goto out2; + goto out3; err = nfs_init_writepagecache(); if (err) - goto out1; + goto out2; err = nfs_init_directcache(); if (err) - goto out0; + goto out1; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif if ((err = register_nfs_fs()) != 0) - goto out; + goto out0; + return 0; -out: +out0: #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out0: - nfs_destroy_writepagecache(); out1: - nfs_destroy_readpagecache(); + nfs_destroy_writepagecache(); out2: - nfs_destroy_inodecache(); + nfs_destroy_readpagecache(); out3: - nfs_destroy_nfspagecache(); + nfs_destroy_inodecache(); out4: - nfs_fs_proc_exit(); + nfs_destroy_nfspagecache(); out5: - nfsiod_stop(); + nfs_fs_proc_exit(); out6: - nfs_fscache_unregister(); + nfsiod_stop(); out7: - unregister_pernet_subsys(&nfs_net_ops); + nfs_fscache_unregister(); out8: - nfs_dns_resolver_destroy(); + unregister_pernet_subsys(&nfs_net_ops); out9: - nfs_idmap_quit(); + nfs_dns_resolver_destroy(); out10: return err; } @@ -1717,7 +1744,6 @@ static void __exit exit_nfs_fs(void) nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); - nfs_idmap_quit(); #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 18f99ef7134..f0e6c7df1a0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -18,27 +18,6 @@ struct nfs_string; */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -/* - * Determine if sessions are in use. - */ -static inline int nfs4_has_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (clp->cl_session) - return 1; -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - -static inline int nfs4_has_persistent_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) - return (clp->cl_session->flags & SESSION4_PERSIST); -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) @@ -85,16 +64,27 @@ struct nfs_clone_mount { */ #define NFS_MAX_READDIR_PAGES 8 +struct nfs_client_initdata { + unsigned long init_flags; + const char *hostname; + const struct sockaddr *addr; + size_t addrlen; + struct nfs_subversion *nfs_mod; + int proto; + u32 minorversion; + struct net *net; +}; + /* * In-kernel mount arguments */ struct nfs_parsed_mount_data { int flags; - int rsize, wsize; - int timeo, retrans; - int acregmin, acregmax, + unsigned int rsize, wsize; + unsigned int timeo, retrans; + unsigned int acregmin, acregmax, acdirmin, acdirmax; - int namlen; + unsigned int namlen; unsigned int options; unsigned int bsize; unsigned int auth_flavor_len; @@ -142,25 +132,45 @@ struct nfs_mount_request { struct net *net; }; +struct nfs_mount_info { + void (*fill_super)(struct super_block *, struct nfs_mount_info *); + int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); + struct nfs_parsed_mount_data *parsed; + struct nfs_clone_mount *cloned; + struct nfs_fh *mntfh; +}; + extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); +extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); +int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, + const struct rpc_timeout *, const char *, + rpc_authflavor_t); +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); +void nfs_server_insert_lists(struct nfs_server *); +void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); +int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, + rpc_authflavor_t); +struct nfs_server *nfs_alloc_server(void); +void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); +extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); -extern struct nfs_server *nfs_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); @@ -188,6 +198,17 @@ static inline void nfs_fs_proc_exit(void) } #endif +#ifdef CONFIG_NFS_V4_1 +int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); +#endif + +/* nfs3client.c */ +#if IS_ENABLED(CONFIG_NFS_V3) +struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; @@ -220,7 +241,7 @@ extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs4xdr.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); #endif @@ -230,12 +251,10 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct rpc_procinfo nfs4_procedures[]; #endif -extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); - /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, @@ -245,25 +264,64 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc); +struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); +int nfs_create(struct inode *, struct dentry *, umode_t, bool); +int nfs_mkdir(struct inode *, struct dentry *, umode_t); +int nfs_rmdir(struct inode *, struct dentry *); +int nfs_unlink(struct inode *, struct dentry *); +int nfs_symlink(struct inode *, struct dentry *, const char *); +int nfs_link(struct dentry *, struct inode *, struct dentry *); +int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); + +/* file.c */ +int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); +loff_t nfs_file_llseek(struct file *, loff_t, int); +int nfs_file_flush(struct file *, fl_owner_t); +ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, + size_t, unsigned int); +int nfs_file_mmap(struct file *, struct vm_area_struct *); +ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +int nfs_file_release(struct inode *, struct file *); +int nfs_lock(struct file *, int, struct file_lock *); +int nfs_flock(struct file *, int, struct file_lock *); +ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, + size_t, unsigned int); +int nfs_check_flags(int); +int nfs_setlease(struct file *, long, struct file_lock **); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); +extern int nfs_drop_inode(struct inode *); +extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); -#ifdef CONFIG_NFS_V4 -extern void nfs4_evict_inode(struct inode *); -#endif void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern const struct super_operations nfs_sops; +extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, + struct nfs_subversion *); +void nfs_initialise_sb(struct super_block *); +int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, + struct nfs_mount_info *, struct nfs_subversion *); +struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); +struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, + const char *, struct nfs_mount_info *); +void nfs_kill_super(struct super_block *); +void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -271,10 +329,12 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); +extern void nfs_sb_deactive_async(struct super_block *sb); /* namespace.c */ +#define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, - char *buffer, ssize_t buflen); + char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); @@ -284,7 +344,7 @@ struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, const char *); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); @@ -304,12 +364,20 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); +/* super.c */ +void nfs_clone_super(struct super_block *, struct nfs_mount_info *); +void nfs_umount_begin(struct super_block *); +int nfs_statfs(struct dentry *, struct kstatfs *); +int nfs_show_options(struct seq_file *, struct dentry *); +int nfs_show_devname(struct seq_file *, struct dentry *); +int nfs_show_path(struct seq_file *, struct dentry *); +int nfs_show_stats(struct seq_file *, struct dentry *); +void nfs_put_super(struct super_block *); +int nfs_remount(struct super_block *sb, int *flags, char *raw_data); + /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, @@ -318,9 +386,6 @@ extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); @@ -373,6 +438,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) { inode_dio_wait(inode); } +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); @@ -380,18 +446,12 @@ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour); -extern int _nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); -extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); +extern int nfs40_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); +extern int nfs41_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); /* * Determine the device name as a string @@ -400,7 +460,7 @@ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; - return nfs_path(&dummy, dentry, buffer, buflen); + return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* @@ -463,13 +523,14 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page->mapping->host); + loff_t i_size = i_size_read(page_file_mapping(page)->host); if (i_size > 0) { + pgoff_t page_index = page_file_index(page); pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index < end_index) + if (page_index < end_index) return PAGE_CACHE_SIZE; - if (page->index == end_index) + if (page_index == end_index) return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; } return 0; diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 8e65c7f1f87..91a6faf811a 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -169,6 +169,9 @@ int nfs_mount(struct nfs_mount_request *info) (info->hostname ? info->hostname : "server"), info->dirpath); + if (strlen(info->dirpath) > MNTPATHLEN) + return -ENAMETOOLONG; + if (info->noresvport) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; @@ -181,7 +184,7 @@ int nfs_mount(struct nfs_mount_request *info) else msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; - status = rpc_call_sync(mnt_clnt, &msg, 0); + status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT); rpc_shutdown_client(mnt_clnt); if (status < 0) @@ -242,6 +245,9 @@ void nfs_umount(const struct nfs_mount_request *info) struct rpc_clnt *clnt; int status; + if (strlen(info->dirpath) > MNTPATHLEN) + return; + if (info->noresvport) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; @@ -283,7 +289,6 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) const u32 pathname_len = strlen(pathname); __be32 *p; - BUG_ON(pathname_len > MNTPATHLEN); p = xdr_reserve_space(xdr, 4 + pathname_len); xdr_encode_opaque(p, pathname, pathname_len); } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 08b9c93675d..fc8dc20fdeb 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -7,6 +7,7 @@ * NFS namespace */ +#include <linux/module.h> #include <linux/dcache.h> #include <linux/gfp.h> #include <linux/mount.h> @@ -32,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer + * @flags - options (see below) * * Helper function for constructing the server pathname * by arbitrary hashed dentry. @@ -39,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition * and in generating /proc/mounts and friends. + * + * Supported flags: + * NFS_PATH_CANONICAL: ensure there is exactly one slash after + * the original device (export) name + * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) +char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, + unsigned flags) { char *end; int namelen; @@ -73,7 +81,7 @@ rename_retry: rcu_read_unlock(); goto rename_retry; } - if (*end != '/') { + if ((flags & NFS_PATH_CANONICAL) && *end != '/') { if (--buflen < 0) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); @@ -90,9 +98,11 @@ rename_retry: return end; } namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; + if (flags & NFS_PATH_CANONICAL) { + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + } buflen -= namelen; if (buflen < 0) { spin_unlock(&dentry->d_lock); @@ -112,6 +122,7 @@ Elong_unlock: Elong: return ERR_PTR(-ENAMETOOLONG); } +EXPORT_SYMBOL_GPL(nfs_path); /* * nfs_d_automount - Handle crossing a mountpoint on the server @@ -166,11 +177,31 @@ out_nofree: return mnt; } +static int +nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_getattr(mnt, dentry, stat); + generic_fillattr(dentry->d_inode, stat); + return 0; +} + +static int +nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_setattr(dentry, attr); + return -EACCES; +} + const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, + .setattr = nfs_setattr, }; const struct inode_operations nfs_referral_inode_operations = { + .getattr = nfs_namespace_getattr, + .setattr = nfs_namespace_setattr, }; static void nfs_expire_automounts(struct work_struct *work) @@ -195,20 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { -#ifdef CONFIG_NFS_V4 - struct vfsmount *mnt = ERR_PTR(-EINVAL); - switch (server->nfs_client->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); - break; - case 4: - mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); - } - return mnt; -#else return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); -#endif } /** @@ -253,6 +271,7 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } +EXPORT_SYMBOL_GPL(nfs_do_submount); struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) @@ -268,3 +287,4 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); } +EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8a6394edb8b..8ee1fab8326 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -5,6 +5,7 @@ #ifndef __NFS_NETNS_H__ #define __NFS_NETNS_H__ +#include <linux/nfs4.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -20,8 +21,11 @@ struct nfs_net { wait_queue_head_t bl_wq; struct list_head nfs_client_list; struct list_head nfs_volume_list; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ + unsigned short nfs_callback_tcpport; + unsigned short nfs_callback_tcpport6; + int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif spinlock_t nfs_client_lock; struct timespec boot_time; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h new file mode 100644 index 00000000000..43679df56cd --- /dev/null +++ b/fs/nfs/nfs.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + * + * Function and structures exported by the NFS module + * for use by NFS version-specific modules. + */ +#ifndef __LINUX_INTERNAL_NFS_H +#define __LINUX_INTERNAL_NFS_H + +#include <linux/fs.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs_xdr.h> + +struct nfs_subversion { + struct module *owner; /* THIS_MODULE pointer */ + struct file_system_type *nfs_fs; /* NFS filesystem type */ + const struct rpc_version *rpc_vers; /* NFS version information */ + const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + const struct super_operations *sops; /* NFS Super operations */ + const struct xattr_handler **xattr; /* NFS xattr handlers */ + struct list_head list; /* List of NFS versions */ +}; + +struct nfs_subversion *get_nfs_version(unsigned int); +void put_nfs_version(struct nfs_subversion *); +void register_nfs_version(struct nfs_subversion *); +void unregister_nfs_version(struct nfs_subversion *); + +#endif /* __LINUX_INTERNAL_NFS_H */ diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c new file mode 100644 index 00000000000..0a9782c9171 --- /dev/null +++ b/fs/nfs/nfs2super.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v2 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version2, + .rpc_ops = &nfs_v2_clientops, + .sops = &nfs_sops, +}; + +static int __init init_nfs_v2(void) +{ + register_nfs_version(&nfs_v2); + return 0; +} + +static void __exit exit_nfs_v2(void) +{ + unregister_nfs_version(&nfs_v2); +} + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v2); +module_exit(exit_nfs_v2); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index baf759bccd0..06b9df49f7f 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -106,19 +106,16 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_overflow; count = be32_to_cpup(p); - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; out: - xdr_read_pages(xdr, count); result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */ result->count = count; return count; @@ -198,7 +195,6 @@ static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh) { __be32 *p; - BUG_ON(fh->size != NFS2_FHSIZE); p = xdr_reserve_space(xdr, NFS2_FHSIZE); memcpy(p, fh->data, NFS2_FHSIZE); } @@ -391,7 +387,7 @@ static void encode_filename(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(length > NFS2_MAXNAMLEN); + WARN_ON_ONCE(length > NFS2_MAXNAMLEN); p = xdr_reserve_space(xdr, 4 + length); xdr_encode_opaque(p, name, length); } @@ -431,7 +427,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) { __be32 *p; - BUG_ON(length > NFS2_MAXPATHLEN); p = xdr_reserve_space(xdr, 4); *p = cpu_to_be32(length); xdr_write_pages(xdr, pages, 0, length); @@ -440,7 +435,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) static int decode_path(struct xdr_stream *xdr) { u32 length, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -449,12 +443,9 @@ static int decode_path(struct xdr_stream *xdr) length = be32_to_cpup(p); if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN)) goto out_size; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, length); if (unlikely(length > recvd)) goto out_cheating; - - xdr_read_pages(xdr, length); xdr_terminate_string(xdr->buf, length); return 0; out_size: @@ -972,22 +963,7 @@ out_overflow: */ static int decode_readdirok(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: - xdr_read_pages(xdr, pglen); - return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e4498dc351a..4a1aafba6a2 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, if (type == ACL_TYPE_ACCESS && acl->a_count == 0) error = -ENODATA; else - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); } else error = -ENODATA; @@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name, else return -EOPNOTSUPP; - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); error = nfs3_proc_setacl(inode, type, acl); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c new file mode 100644 index 00000000000..b3fc65ef39c --- /dev/null +++ b/fs/nfs/nfs3client.c @@ -0,0 +1,65 @@ +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include "internal.h" + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static const struct rpc_version *nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +const struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; + +/* + * Initialise an NFSv3 ACL client connection + */ +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + /* Create a client RPC handle for the NFS v3 ACL management interface */ + if (!IS_ERR(server)) + nfs_init_server_aclclient(server); + return server; +} + +struct nfs_server *nfs3_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor); + if (!IS_ERR(server) && !IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + return server; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0fd2bf..70efb63b1e4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -24,14 +24,14 @@ #define NFSDBG_FACILITY NFSDBG_PROC -/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */ +/* A wrapper to handle the EJUKEBOX error messages */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { int res; do { res = rpc_call_sync(clnt, msg, flags); - if (res != -EJUKEBOX && res != -EKEYEXPIRED) + if (res != -EJUKEBOX) break; freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; @@ -44,7 +44,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) static int nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { - if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED) + if (task->tk_status != -EJUKEBOX) return 0; if (task->tk_status == -EJUKEBOX) nfs_inc_stats(inode, NFSIOS_DELAY); @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); @@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs3_createdata *data; umode_t mode = sattr->ia_mode; @@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - __be32 *verf = NFS_COOKIEVERF(dir); + __be32 *verf = NFS_I(dir)->cookieverf; struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, @@ -877,6 +877,46 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } +static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + +static int nfs3_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + +static const struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + +static const struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -885,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, @@ -910,9 +951,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, @@ -921,5 +964,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, + .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, + .free_client = nfs_free_client, + .create_server = nfs3_create_server, + .clone_server = nfs3_clone_server, }; diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c new file mode 100644 index 00000000000..cc471c72523 --- /dev/null +++ b/fs/nfs/nfs3super.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v3 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version3, + .rpc_ops = &nfs_v3_clientops, + .sops = &nfs_sops, +}; + +static int __init init_nfs_v3(void) +{ + register_nfs_version(&nfs_v3); + return 0; +} + +static void __exit exit_nfs_v3(void) +{ + unregister_nfs_version(&nfs_v3); +} + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v3); +module_exit(exit_nfs_v3); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 902de489ec9..bffc32406fb 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -198,7 +198,7 @@ static void encode_filename3(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(length > NFS3_MAXNAMLEN); + WARN_ON_ONCE(length > NFS3_MAXNAMLEN); p = xdr_reserve_space(xdr, 4 + length); xdr_encode_opaque(p, name, length); } @@ -238,7 +238,6 @@ out_overflow: static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, const u32 length) { - BUG_ON(length > NFS3_MAXPATHLEN); encode_uint32(xdr, length); xdr_write_pages(xdr, pages, 0, length); } @@ -246,7 +245,6 @@ static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, static int decode_nfspath3(struct xdr_stream *xdr) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -255,12 +253,9 @@ static int decode_nfspath3(struct xdr_stream *xdr) count = be32_to_cpup(p); if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN)) goto out_nametoolong; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - - xdr_read_pages(xdr, count); xdr_terminate_string(xdr->buf, count); return 0; @@ -329,14 +324,14 @@ static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier) memcpy(p, verifier, NFS3_CREATEVERFSIZE); } -static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier) +static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) { __be32 *p; p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE); if (unlikely(p == NULL)) goto out_overflow; - memcpy(verifier, p, NFS3_WRITEVERFSIZE); + memcpy(verifier->data, p, NFS3_WRITEVERFSIZE); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -392,7 +387,6 @@ out_overflow: */ static void encode_ftype3(struct xdr_stream *xdr, const u32 type) { - BUG_ON(type > NF3FIFO); encode_uint32(xdr, type); } @@ -447,7 +441,7 @@ static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh) { __be32 *p; - BUG_ON(fh->size > NFS3_FHSIZE); + WARN_ON_ONCE(fh->size > NFS3_FHSIZE); p = xdr_reserve_space(xdr, 4 + fh->size); xdr_encode_opaque(p, fh->data, fh->size); } @@ -1343,6 +1337,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? args->acl_access : NULL, 1, 0); + /* FIXME: this is just broken */ BUG_ON(error < 0); error = nfsacl_encode(xdr->buf, base + error, args->inode, (args->mask & NFS_DFACL) ? @@ -1587,7 +1582,6 @@ static int decode_read3resok(struct xdr_stream *xdr, struct nfs_readres *result) { u32 eof, count, ocount, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4 + 4 + 4); @@ -1598,13 +1592,10 @@ static int decode_read3resok(struct xdr_stream *xdr, ocount = be32_to_cpup(p++); if (unlikely(ocount != count)) goto out_mismatch; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - out: - xdr_read_pages(xdr, count); result->eof = eof; result->count = count; return count; @@ -1676,20 +1667,22 @@ static int decode_write3resok(struct xdr_stream *xdr, { __be32 *p; - p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE); + p = xdr_inline_decode(xdr, 4 + 4); if (unlikely(p == NULL)) goto out_overflow; result->count = be32_to_cpup(p++); result->verf->committed = be32_to_cpup(p++); if (unlikely(result->verf->committed > NFS_FILE_SYNC)) goto out_badvalue; - memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE); + if (decode_writeverf3(xdr, &result->verf->verifier)) + goto out_eio; return result->count; out_badvalue: dprintk("NFS: bad stable_how value: %u\n", result->verf->committed); return -EIO; out_overflow: print_overflow_msg(__func__, xdr); +out_eio: return -EIO; } @@ -2039,22 +2032,7 @@ out_truncated: */ static int decode_dirlist3(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: - xdr_read_pages(xdr, pglen); - return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readdir3resok(struct xdr_stream *xdr, @@ -2337,7 +2315,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, goto out; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, result->verf->verifier); + error = decode_writeverf3(xdr, &result->verf->verifier); out: return error; out_status: @@ -2364,7 +2342,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr, if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) goto out; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + hdrlen = xdr_stream_pos(xdr); acl = NULL; if (result->mask & NFS_ACL) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cc5900ac61b..a3f488b074a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,7 +9,9 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) + +#define NFS4_MAX_LOOP_ON_RECOVER (10) struct idmap; @@ -21,18 +23,12 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, NFS4CLNT_SESSION_RESET, - NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, NFS4CLNT_BIND_CONN_TO_SESSION, }; -enum nfs4_session_state { - NFS4_SESSION_INITING, - NFS4_SESSION_DRAINING, -}; - #define NFS4_RENEW_TIMEOUT 0x01 #define NFS4_RENEW_DELEGATION_CB 0x02 @@ -43,8 +39,7 @@ struct nfs4_minor_version_ops { struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); + struct nfs4_sequence_res *res); bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, @@ -132,8 +127,8 @@ struct nfs4_lock_owner { struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ -#define NFS_LOCK_INITIALIZED 1 - int ls_flags; +#define NFS_LOCK_INITIALIZED 0 + unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; atomic_t ls_count; @@ -191,6 +186,8 @@ struct nfs4_state_recovery_ops { int (*establish_clid)(struct nfs_client *, struct rpc_cred *); struct rpc_cred * (*get_clid_cred)(struct nfs_client *); int (*reclaim_complete)(struct nfs_client *); + int (*detect_trunking)(struct nfs_client *, struct nfs_client **, + struct rpc_cred *); }; struct nfs4_state_maintenance_ops { @@ -200,7 +197,13 @@ struct nfs4_state_maintenance_ops { }; extern const struct dentry_operations nfs4_dentry_operations; -extern const struct inode_operations nfs4_dir_inode_operations; + +/* dir.c */ +int nfs_atomic_open(struct inode *, struct dentry *, struct file *, + unsigned, umode_t, int *); + +/* super.c */ +extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); @@ -217,7 +220,7 @@ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); +extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); @@ -233,18 +236,14 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } -extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); -extern void nfs4_destroy_session(struct nfs4_session *session); -extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); -extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, @@ -272,11 +271,7 @@ static inline int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) { - return 0; -} - -static inline int nfs4_init_session(struct nfs_server *server) -{ + rpc_call_start(task); return 0; } @@ -301,6 +296,10 @@ extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[2]; +void nfs4_free_client(struct nfs_client *); + +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); + /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); @@ -309,11 +308,20 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **); +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) -struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); +extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); +extern void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp); + #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { @@ -331,17 +339,18 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); +extern int nfs4_wait_clnt_recover(struct nfs_client *clp); +extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); -extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, - fmode_t, fl_owner_t, pid_t); + fmode_t, const struct nfs_lockowner *); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); @@ -354,6 +363,32 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta extern const nfs4_stateid zero_stateid; +/* nfs4super.c */ +struct nfs_mount_info; +extern struct nfs_subversion nfs_v4; +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); +extern bool nfs4_disable_idmapping; +extern unsigned short max_session_slots; +extern unsigned short send_implementation_id; + +#define NFS4_CLIENT_ID_UNIQ_LEN (64) +extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; + +/* nfs4sysctl.c */ +#ifdef CONFIG_SYSCTL +int nfs4_register_sysctl(void); +void nfs4_unregister_sysctl(void); +#else +static inline int nfs4_register_sysctl(void) +{ + return 0; +} + +static inline void nfs4_unregister_sysctl(void) +{ +} +#endif + /* nfs4xdr.c */ extern struct rpc_procinfo nfs4_procedures[]; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c new file mode 100644 index 00000000000..2e9779b58b7 --- /dev/null +++ b/fs/nfs/nfs4client.c @@ -0,0 +1,897 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_idmap.h> +#include <linux/nfs_mount.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/bc_xprt.h> +#include "internal.h" +#include "callback.h" +#include "delegation.h" +#include "nfs4session.h" +#include "pnfs.h" +#include "netns.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +/* + * Get a unique NFSv4.0 callback identifier which will be used + * by the V4.0 callback service to lookup the nfs_client struct + */ +static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) +{ + int ret = 0; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + + if (clp->rpc_ops->version != 4 || minorversion != 0) + return ret; +retry: + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) + return -ENOMEM; + spin_lock(&nn->nfs_client_lock); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nn->nfs_client_lock); + if (ret == -EAGAIN) + goto retry; + return ret; +} + +#ifdef CONFIG_NFS_V4_1 +static void nfs4_shutdown_session(struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + nfs4_destroy_clientid(clp); + } + +} +#else /* CONFIG_NFS_V4_1 */ +static void nfs4_shutdown_session(struct nfs_client *clp) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) +{ + int err; + struct nfs_client *clp = nfs_alloc_client(cl_init); + if (IS_ERR(clp)) + return clp; + + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error; + + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; + clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + return clp; + +error: + nfs_free_client(clp); + return ERR_PTR(err); +} + +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net); +} + +static void nfs4_shutdown_client(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + nfs4_shutdown_session(clp); + nfs4_destroy_callback(clp); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); + kfree(clp->cl_serverowner); + kfree(clp->cl_serverscope); + kfree(clp->cl_implid); +} + +void nfs4_free_client(struct nfs_client *clp) +{ + nfs4_shutdown_client(clp); + nfs_free_client(clp); +} + +/* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_mvops->minor_version) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/** + * nfs4_init_client - Initialise an NFS4 client record + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: callback IP address in presentation format + * @authflavor: authentication flavor for underlying RPC transport + * + * Returns pointer to an NFS client, or an ERR_PTR value. + */ +struct nfs_client *nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour) +{ + char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_client *old; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return clp; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + error = nfs_create_rpc_client(clp, timeparms, authflavour); + if (error < 0) + goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (error < 0) + goto error; + error = rpc_ntop(sap, buf, sizeof(buf)); + if (error < 0) + goto error; + ip_addr = (const char *)buf; + } + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, error); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); + + error = nfs4_discover_server_trunking(clp, &old); + if (error < 0) + goto error; + nfs_put_client(clp); + if (clp != old) { + clp->cl_preserve_clid = true; + clp = old; + } + + return clp; + +error: + nfs_mark_client_ready(clp, error); + nfs_put_client(clp); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return ERR_PTR(error); +} + +/* + * SETCLIENTID just did a callback update with the callback ident in + * "drop," but server trunking discovery claims "drop" and "keep" are + * actually the same server. Swap the callback IDs so that "keep" + * will continue to use the callback ident the server now knows about, + * and so that "keep"'s original callback ident is destroyed when + * "drop" is freed. + */ +static void nfs4_swap_callback_idents(struct nfs_client *keep, + struct nfs_client *drop) +{ + struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id); + unsigned int save = keep->cl_cb_ident; + + if (keep->cl_cb_ident == drop->cl_cb_ident) + return; + + dprintk("%s: keeping callback ident %u and dropping ident %u\n", + __func__, keep->cl_cb_ident, drop->cl_cb_ident); + + spin_lock(&nn->nfs_client_lock); + + idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident); + keep->cl_cb_ident = drop->cl_cb_ident; + + idr_replace(&nn->cb_ident_idr, drop, save); + drop->cl_cb_ident = save; + + spin_unlock(&nn->nfs_client_lock); +} + +/** + * nfs40_walk_client_list - Find server that recognizes a client ID + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs40_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs40_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + struct nfs4_setclientid_res clid = { + .clientid = new->cl_clientid, + .confirm = new->cl_confirm, + }; + int status = -NFS4ERR_STALE_CLIENTID; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos" */ + if (pos->cl_cons_state < NFS_CS_READY) + continue; + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (pos->cl_clientid != new->cl_clientid) + continue; + + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + status = nfs4_proc_setclientid_confirm(pos, &clid, cred); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + break; + case 0: + nfs4_swap_callback_idents(pos, new); + + prev = NULL; + *result = pos; + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + default: + goto out; + } + + spin_lock(&nn->nfs_client_lock); + } + spin_unlock(&nn->nfs_client_lock); + + /* No match found. The server lost our clientid */ +out: + if (prev) + nfs_put_client(prev); + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; +} + +#ifdef CONFIG_NFS_V4_1 +/* + * Returns true if the client IDs match + */ +static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +{ + if (a->cl_clientid != b->cl_clientid) { + dprintk("NFS: --> %s client ID %llx does not match %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return false; + } + dprintk("NFS: --> %s client ID %llx matches %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return true; +} + +/* + * Returns true if the server owners match + */ +static bool +nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +{ + struct nfs41_server_owner *o1 = a->cl_serverowner; + struct nfs41_server_owner *o2 = b->cl_serverowner; + + if (o1->minor_id != o2->minor_id) { + dprintk("NFS: --> %s server owner minor IDs do not match\n", + __func__); + return false; + } + + if (o1->major_id_sz != o2->major_id_sz) + goto out_major_mismatch; + if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) + goto out_major_mismatch; + + dprintk("NFS: --> %s server owners match\n", __func__); + return true; + +out_major_mismatch: + dprintk("NFS: --> %s server owner major IDs do not match\n", + __func__); + return false; +} + +/** + * nfs41_walk_client_list - Find nfs_client that matches a client/server owner + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs41_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs41_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + int status = -NFS4ERR_STALE_CLIENTID; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos", especially the client + * ID and serverowner fields. Wait for CREATE_SESSION + * to finish. */ + if (pos->cl_cons_state < NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + nfs4_schedule_lease_recovery(pos); + status = nfs_wait_client_init_complete(pos); + if (status < 0) { + nfs_put_client(pos); + spin_lock(&nn->nfs_client_lock); + continue; + } + status = pos->cl_cons_state; + spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; + } + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (!nfs4_match_clientids(pos, new)) + continue; + + if (!nfs4_match_serverowners(pos, new)) + continue; + + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + + *result = pos; + return 0; + } + + /* No matching nfs_client found. */ + spin_unlock(&nn->nfs_client_lock); + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ + +static void nfs4_destroy_server(struct nfs_server *server) +{ + nfs_server_return_all_delegations(server); + unset_pnfs_layoutdriver(server); + nfs4_purge_state_owners(server); +} + +/* + * NFSv4.0 callback thread helper + * + * Find a client by callback identifier + */ +struct nfs_client * +nfs4_find_client_ident(struct net *net, int cb_ident) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + clp = idr_find(&nn->cb_ident_idr, cb_ident); + if (clp) + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; +} + +#if defined(CONFIG_NFS_V4_1) +/* Common match routine for v4.0 and v4.1 callback services */ +static bool nfs4_cb_match_client(const struct sockaddr *addr, + struct nfs_client *clp, u32 minorversion) +{ + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise */ + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) + return false; + + smp_rmb(); + + /* Match the version and minorversion */ + if (clp->rpc_ops->version != 4 || + clp->cl_minorversion != minorversion) + return false; + + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) + return false; + + return true; +} + +/* + * NFSv4.1 callback thread helper + * For CB_COMPOUND calls, find a client by IP address, protocol version, + * minorversion, and sessionID + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 1) == false) + continue; + + if (!nfs4_has_session(clp)) + continue; + + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; + } + spin_unlock(&nn->nfs_client_lock); + return NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + return NULL; +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, + const struct sockaddr *addr, + const size_t addrlen, + const char *ip_addr, + rpc_authflavor_t authflavour, + int proto, const struct rpc_timeout *timeparms, + u32 minorversion, struct net *net) +{ + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = addr, + .addrlen = addrlen, + .nfs_mod = &nfs_v4, + .proto = proto, + .minorversion = minorversion, + .net = net, + }; + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + if (server->flags & NFS_MOUNT_NORESVPORT) + set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + + /* + * Query for the lease time on clientid setup or renewal + * + * Note that this will be set on nfs_clients that were created + * only for the DS role and did not set this bit, but now will + * serve a dual role. + */ + set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Set up a pNFS Data Server client. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .nfs_mod = &nfs_v4, + .proto = ds_proto, + .minorversion = mds_clp->cl_minorversion, + .net = mds_clp->cl_net, + }; + struct rpc_timeout ds_timeout; + struct nfs_client *clp; + + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS + * (section 13.1 RFC 5661). + */ + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + mds_clp->cl_rpcclient->cl_auth->au_flavor); + + dprintk("<-- %s %p\n", __func__, clp); + return clp; +} +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + struct nfs4_session *sess; + u32 server_resp_sz; + u32 server_rqst_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + sess = server->nfs_client->cl_session; + server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; + server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; + + if (server->rsize > server_resp_sz) + server->rsize = server_resp_sz; + if (server->wsize > server_rqst_sz) + server->wsize = server_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + /* data servers support only a subset of NFSv4.1 */ + if (is_ds_only_client(server->nfs_client)) + return -EPROTONOSUPPORT; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server); + if (error < 0) + goto out; + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_get_rootfh(server, mntfh); + if (error < 0) + goto out; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, fattr); + if (error < 0) + goto out; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + nfs_server_insert_lists(server); + server->mount_time = jiffies; + server->destroy = nfs4_destroy_server; +out: + nfs_free_fattr(fattr); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) +{ + struct rpc_timeout timeparms; + int error; + + dprintk("--> nfs4_init_server()\n"); + + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + server->options = data->options; + + /* Get a client record */ + error = nfs4_set_client(server, + data->nfs_server.hostname, + (const struct sockaddr *)&data->nfs_server.address, + data->nfs_server.addrlen, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + &timeparms, + data->minorversion, + data->net); + if (error < 0) + goto error; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + server->port = data->nfs_server.port; + + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + +error: + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh)*/ +struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* set up the general RPC client */ + error = nfs4_init_server(server, mount_info->parsed); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mount_info->mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *mntfh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, + data->addr, + data->addrlen, + parent_client->cl_ipaddr, + data->authflavor, + rpc_protocol(parent_server->client), + parent_server->client->cl_timeout, + parent_client->cl_mvops->minor_version, + parent_client->cl_net); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c new file mode 100644 index 00000000000..08ddcccb888 --- /dev/null +++ b/fs/nfs/nfs4file.c @@ -0,0 +1,138 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + */ +#include <linux/nfs_fs.h> +#include "internal.h" +#include "fscache.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_FILE + +static int +nfs4_file_open(struct inode *inode, struct file *filp) +{ + struct nfs_open_context *ctx; + struct dentry *dentry = filp->f_path.dentry; + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; + struct iattr attr; + int err; + + /* + * If no cached dentry exists or if it's negative, NFSv4 handled the + * opens in ->lookup() or ->create(). + * + * We only get this far for a cached positive dentry. We skipped + * revalidation, so handle it here by dropping the dentry and returning + * -EOPENSTALE. The VFS will retry the lookup/create/open. + */ + + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((openflags & O_ACCMODE) == 3) + openflags--; + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + parent = dget_parent(dentry); + dir = parent->d_inode; + + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + switch (err) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_file_set_open_context(filp, ctx); + nfs_fscache_set_inode_cookie(inode, filp); + err = 0; + +out_put_ctx: + put_nfs_open_context(ctx); +out: + dput(parent); + return err; + +out_drop: + d_drop(dentry); + err = -EOPENSTALE; + goto out_put_ctx; +} + +static int +nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + /* + * If nfs_file_fsync_commit detected a server reboot, then + * resend all dirty pages that might have been covered by + * the NFS_CONTEXT_RESEND_WRITES flag + */ + start = 0; + end = LLONG_MAX; + } while (ret == -EAGAIN); + + return ret; +} + +const struct file_operations nfs4_file_operations = { + .llseek = nfs_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, + .mmap = nfs_file_mmap, + .open = nfs4_file_open, + .flush = nfs_file_flush, + .release = nfs_file_release, + .fsync = nfs4_file_fsync, + .lock = nfs_lock, + .flock = nfs_flock, + .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, + .check_flags = nfs_check_flags, + .setlease = nfs_setlease, +}; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e1340293872..194c4841033 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -35,6 +35,7 @@ #include <linux/sunrpc/metrics.h> +#include "nfs4session.h" #include "internal.h" #include "delegation.h" #include "nfs4filelayout.h" @@ -122,12 +123,21 @@ static void filelayout_reset_read(struct nfs_read_data *data) } } +static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) +{ + if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return; + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); + pnfs_return_layout(inode); +} + static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct inode *inode = lo->plh_inode; struct nfs_server *mds_server = NFS_SERVER(inode); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs_client *mds_client = mds_server->nfs_client; @@ -169,7 +179,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, break; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - case -EKEYEXPIRED: rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); break; case -NFS4ERR_RETRY_UNCACHED_REP: @@ -190,8 +199,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, * i/o and all i/o waiting on the slot table to the MDS until * layout is destroyed and a new valid layout is obtained. */ - set_bit(NFS_LAYOUT_INVALID, - &NFS_I(inode)->layout->plh_flags); pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; @@ -205,11 +212,9 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - if (!filelayout_test_devid_invalid(devid)) - _pnfs_return_layout(inode); - filelayout_mark_devid_invalid(devid); + nfs4_mark_deviceid_unavailable(devid); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); rpc_wake_up(&tbl->slot_tbl_waitq); - nfs4_ds_disconnect(clp); /* fall through */ default: reset: @@ -269,6 +274,21 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } +bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node) +{ + return filelayout_test_devid_invalid(node) || + nfs4_test_deviceid_unavailable(node); +} + +static bool +filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) +{ + struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); + + return filelayout_test_devid_unavailable(node); +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -286,12 +306,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, &rdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(rdata->ds_clp->cl_session, + &rdata->args.seq_args, + &rdata->res.seq_res, + task); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -318,7 +336,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; + struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(rdata->ds_clp); rdata->header->mds_ops->rpc_release(data); } @@ -351,9 +371,9 @@ static void prepare_to_resend_writes(struct nfs_commit_data *data) struct nfs_page *first = nfs_list_entry(data->pages.next); data->task.tk_status = 0; - memcpy(data->verf.verifier, first->wb_verf.verifier, - sizeof(first->wb_verf.verifier)); - data->verf.verifier[0]++; /* ensure verifier mismatch */ + memcpy(&data->verf.verifier, &first->wb_verf, + sizeof(data->verf.verifier)); + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ } static int filelayout_commit_done_cb(struct rpc_task *task, @@ -386,12 +406,10 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, &wdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); } static void filelayout_write_call_done(struct rpc_task *task, void *data) @@ -416,7 +434,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; + struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(wdata->ds_clp); wdata->header->mds_ops->rpc_release(data); } @@ -425,12 +445,10 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, &wdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); } static void filelayout_write_commit_done(struct rpc_task *task, void *data) @@ -453,7 +471,7 @@ static void filelayout_commit_release(void *calldata) struct nfs_commit_data *data = calldata; data->completion_ops->completion(data); - put_lseg(data->lseg); + pnfs_put_lseg(data->lseg); nfs_put_client(data->ds_clp); nfs_commitdata_release(data); } @@ -488,7 +506,6 @@ filelayout_read_pagelist(struct nfs_read_data *data) loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; - int status; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", __func__, hdr->inode->i_ino, @@ -514,9 +531,8 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, &filelayout_read_call_ops, RPC_TASK_SOFTCONN); - BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -530,7 +546,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; - int status; /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); @@ -555,10 +570,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); - BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -608,13 +622,13 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, NFS_SERVER(lo->plh_inode)->nfs_client, id); if (d == NULL) { - dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); + dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is being reaped */ - if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; fl->dsaddr = dsaddr; @@ -726,7 +740,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; if (fl->num_fh > 0) { - fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]), gfp_flags); if (!fl->fh_array) goto out_err; @@ -885,7 +899,7 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { /* @@ -915,7 +929,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) goto out_mds; @@ -931,7 +945,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); if (status < 0) { - put_lseg(pgio->pg_lseg); + pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; goto out_mds; } @@ -985,7 +999,7 @@ filelayout_clear_request_commit(struct nfs_page *req, out: nfs_request_remove_commit_list(req, cinfo); spin_unlock(cinfo->lock); - put_lseg(freeme); + pnfs_put_lseg(freeme); } static struct list_head * @@ -1018,7 +1032,7 @@ filelayout_choose_commit_list(struct nfs_page *req, * off due to a rewrite, in which case it will be done in * filelayout_clear_request_commit */ - buckets[i].wlseg = get_lseg(lseg); + buckets[i].wlseg = pnfs_get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; @@ -1128,7 +1142,7 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, if (list_empty(src)) bucket->wlseg = NULL; else - get_lseg(bucket->clseg); + pnfs_get_lseg(bucket->clseg); } return ret; } @@ -1159,12 +1173,11 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, /* NOTE cinfo->lock is NOT held, relying on fact that this is * only called on single thread per dreq. - * Can't take the lock because need to do put_lseg + * Can't take the lock because need to do pnfs_put_lseg */ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - BUG_ON(!list_empty(&b->written)); - put_lseg(b->wlseg); + pnfs_put_lseg(b->wlseg); b->wlseg = NULL; } } @@ -1200,7 +1213,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (list_empty(&bucket->committing)) continue; nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - put_lseg(bucket->clseg); + pnfs_put_lseg(bucket->clseg); bucket->clseg = NULL; } /* Caller will clean up entries put on list */ diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 43fe802dd67..8c07241fe52 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -129,23 +129,13 @@ filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) } static inline bool -filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo) -{ - return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags); -} - -static inline bool filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) { return test_bit(NFS_DEVICEID_INVALID, &node->flags); } -static inline bool -filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) -{ - return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) || - filelayout_test_layout_invalid(lseg->pls_layout); -} +extern bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); @@ -158,7 +148,6 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); -void nfs4_ds_disconnect(struct nfs_client *clp); +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index a1fab8da7f0..b720064bcd7 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -33,6 +33,7 @@ #include <linux/module.h> #include "internal.h" +#include "nfs4session.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -149,28 +150,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) } /* - * Lookup DS by nfs_client pointer. Zero data server client pointer - */ -void nfs4_ds_disconnect(struct nfs_client *clp) -{ - struct nfs4_pnfs_ds *ds; - struct nfs_client *found = NULL; - - dprintk("%s clp %p\n", __func__, clp); - spin_lock(&nfs4_ds_cache_lock); - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (ds->ds_clp && ds->ds_clp == clp) { - found = ds->ds_clp; - ds->ds_clp = NULL; - } - spin_unlock(&nfs4_ds_cache_lock); - if (found) { - set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - nfs_put_client(clp); - } -} - -/* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only supports IPv4 and IPv6 addresses */ @@ -184,8 +163,6 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - BUG_ON(list_empty(&ds->ds_addrs)); - list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); @@ -690,7 +667,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; @@ -728,7 +705,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla pdev->layout_type = LAYOUT_NFSV4_1_FILES; pdev->pages = pages; pdev->pgbase = 0; - pdev->pglen = PAGE_SIZE * max_pages; + pdev->pglen = max_resp_sz; pdev->mincount = 0; rc = nfs4_proc_getdeviceinfo(server, pdev); @@ -804,13 +781,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - if (filelayout_test_devid_invalid(devid)) + if (filelayout_test_devid_unavailable(devid)) return NULL; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); - goto mark_dev_invalid; + filelayout_mark_devid_invalid(devid); + return NULL; } if (!ds->ds_clp) { @@ -818,14 +796,12 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) int err; err = nfs4_ds_connect(s, ds); - if (err) - goto mark_dev_invalid; + if (err) { + nfs4_mark_deviceid_unavailable(devid); + return NULL; + } } return ds; - -mark_dev_invalid: - filelayout_mark_devid_invalid(devid); - return NULL; } module_param(dataserver_retrans, uint, 0644); diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c new file mode 100644 index 00000000000..549462e5b9b --- /dev/null +++ b/fs/nfs/nfs4getroot.c @@ -0,0 +1,50 @@ +/* +* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. +* Written by David Howells (dhowells@redhat.com) +*/ + +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) +{ + struct nfs_fsinfo fsinfo; + int ret = -ENOMEM; + + dprintk("--> nfs4_get_rootfh()\n"); + + fsinfo.fattr = nfs_alloc_fattr(); + if (fsinfo.fattr == NULL) + goto out; + + /* Start by getting the root filehandle from the server */ + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); + goto out; + } + + if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) + || !S_ISDIR(fsinfo.fattr->mode)) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot encountered non-directory\n"); + ret = -ENOTDIR; + goto out; + } + + if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot obtained referral\n"); + ret = -EREMOTE; + goto out; + } + + memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); +out: + nfs_free_fattr(fsinfo.fattr); + dprintk("<-- nfs4_get_rootfh() = %d\n", ret); + return ret; +} diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 017b4b01a69..1e09eb78543 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end) static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) { char *limit; - char *path = nfs_path(&limit, dentry, buffer, buflen); + char *path = nfs_path(&limit, dentry, buffer, buflen, + NFS_PATH_CANONICAL); if (!IS_ERR(path)) { char *path_component = nfs_path_component(path, limit); if (path_component) @@ -192,25 +193,13 @@ out: struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, struct qstr *name) { - struct rpc_clnt *clone; - struct rpc_auth *auth; rpc_authflavor_t flavor; flavor = nfs4_negotiate_security(inode, name); if ((int)flavor < 0) - return ERR_PTR(flavor); + return ERR_PTR((int)flavor); - clone = rpc_clone_client(clnt); - if (IS_ERR(clone)) - return clone; - - auth = rpcauth_create(flavor, clone); - if (!auth) { - rpc_shutdown_client(clone); - clone = ERR_PTR(-EIO); - } - - return clone; + return rpc_clone_client_set_auth(clnt, flavor); } static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4664e..cf747ef8665 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -43,7 +43,6 @@ #include <linux/printk.h> #include <linux/slab.h> #include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/gss_api.h> #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> @@ -53,7 +52,6 @@ #include <linux/mount.h> #include <linux/module.h> #include <linux/nfs_idmap.h> -#include <linux/sunrpc/bc_xprt.h> #include <linux/xattr.h> #include <linux/utsname.h> #include <linux/freezer.h> @@ -65,16 +63,14 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "nfs4session.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) -#define NFS4_MAX_LOOP_ON_RECOVER (10) - -static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -107,6 +103,8 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; + case -NFS4ERR_ACCESS: + return -EACCES; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -153,6 +151,12 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { FATTR4_WORD2_MDSTHRESHOLD }; +static const u32 nfs4_open_noattr_bitmap[3] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_FILEID, +}; + const u32 nfs4_statfs_bitmap[2] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE @@ -201,7 +205,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent { __be32 *start, *p; - BUG_ON(readdir->count < 80); if (cookie > 2) { readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); @@ -251,17 +254,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } -static int nfs4_wait_clnt_recover(struct nfs_client *clp) -{ - int res; - - might_sleep(); - - res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs_wait_bit_killable, TASK_KILLABLE); - return res; -} - static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) { int res = 0; @@ -294,8 +286,8 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case 0: return 0; case -NFS4ERR_OPENMODE: - if (inode && nfs_have_delegation(inode, FMODE_READ)) { - nfs_inode_return_delegation(inode); + if (inode && nfs4_have_delegation(inode, FMODE_READ)) { + nfs4_inode_return_delegation(inode); exception->retry = 1; return 0; } @@ -329,8 +321,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); nfs4_schedule_session_recovery(clp->cl_session, errorcode); - exception->retry = 1; - break; + goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { @@ -342,7 +333,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - case -EKEYEXPIRED: ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; @@ -388,144 +378,136 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp #if defined(CONFIG_NFS_V4_1) -/* - * nfs4_free_slot - free a slot and efficiently update slot table. - * - * freeing a slot is trivially done by clearing its respective bit - * in the bitmap. - * If the freed slotid equals highest_used_slotid we want to update it - * so that the server would be able to size down the slot table if needed, - * otherwise we know that the highest_used_slotid is still in use. - * When updating highest_used_slotid there may be "holes" in the bitmap - * so we need to scan down from highest_used_slotid to 0 looking for the now - * highest slotid in use. - * If none found, highest_used_slotid is set to NFS4_NO_SLOT. - * - * Must be called while holding tbl->slot_tbl_lock - */ -static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) -{ - BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); - /* clear used bit in bitmap */ - __clear_bit(slotid, tbl->used_slots); - - /* update highest_used_slotid when it is freed */ - if (slotid == tbl->highest_used_slotid) { - slotid = find_last_bit(tbl->used_slots, tbl->max_slots); - if (slotid < tbl->max_slots) - tbl->highest_used_slotid = slotid; - else - tbl->highest_used_slotid = NFS4_NO_SLOT; - } - dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, - slotid, tbl->highest_used_slotid); -} - -bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - return true; -} - -/* - * Signal state manager thread if session fore channel is drained - */ -static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) -{ - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, - nfs4_set_task_privileged, NULL); - return; - } - - if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) - return; - - dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); - complete(&ses->fc_slot_table.complete); -} - -/* - * Signal state manager thread if session back channel is drained - */ -void nfs4_check_drain_bc_complete(struct nfs4_session *ses) -{ - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || - ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) - return; - dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); - complete(&ses->bc_slot_table.complete); -} - static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { + struct nfs4_session *session; struct nfs4_slot_table *tbl; + bool send_new_highest_used_slotid = false; - tbl = &res->sr_session->fc_slot_table; if (!res->sr_slot) { /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); return; } + tbl = res->sr_slot->table; + session = tbl->session; spin_lock(&tbl->slot_tbl_lock); - nfs4_free_slot(tbl, res->sr_slot - tbl->slots); - nfs4_check_drain_fc_complete(res->sr_session); + /* Be nice to the server: try to ensure that the last transmitted + * value for highest_user_slotid <= target_highest_slotid + */ + if (tbl->highest_used_slotid > tbl->target_highest_slotid) + send_new_highest_used_slotid = true; + + if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) { + send_new_highest_used_slotid = false; + goto out_unlock; + } + nfs4_free_slot(tbl, res->sr_slot); + + if (tbl->highest_used_slotid != NFS4_NO_SLOT) + send_new_highest_used_slotid = false; +out_unlock: spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; + if (send_new_highest_used_slotid) + nfs41_server_notify_highest_slotid_update(session->clp); } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { - unsigned long timestamp; + struct nfs4_session *session; + struct nfs4_slot *slot; struct nfs_client *clp; - - /* - * sr_status remains 1 if an RPC level error occurred. The server - * may or may not have processed the sequence operation.. - * Proceed as if the server received and processed the sequence - * operation. - */ - if (res->sr_status == 1) - res->sr_status = NFS_OK; + bool interrupted = false; + int ret = 1; /* don't increment the sequence number if the task wasn't sent */ if (!RPC_WAS_SENT(task)) goto out; + slot = res->sr_slot; + session = slot->table->session; + + if (slot->interrupted) { + slot->interrupted = 0; + interrupted = true; + } + /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++res->sr_slot->seq_nr; - timestamp = res->sr_renewal_time; - clp = res->sr_session->clp; - do_renew_lease(clp, timestamp); + ++slot->seq_nr; + clp = session->clp; + do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ if (res->sr_status_flags != 0) nfs4_schedule_lease_recovery(clp); + nfs41_update_target_slotid(slot->table, slot, res); break; + case 1: + /* + * sr_status remains 1 if an RPC level error occurred. + * The server may or may not have processed the sequence + * operation.. + * Mark the slot as having hosted an interrupted RPC call. + */ + slot->interrupted = 1; + goto out; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and * returned NFS4ERR_DELAY as per Section 2.10.6.2 * of RFC5661. */ - dprintk("%s: slot=%td seq=%d: Operation in progress\n", + dprintk("%s: slot=%u seq=%u: Operation in progress\n", __func__, - res->sr_slot - res->sr_session->fc_slot_table.slots, - res->sr_slot->seq_nr); + slot->slot_nr, + slot->seq_nr); goto out_retry; + case -NFS4ERR_BADSLOT: + /* + * The slot id we used was probably retired. Try again + * using a different slot id. + */ + goto retry_nowait; + case -NFS4ERR_SEQ_MISORDERED: + /* + * Was the last operation on this sequence interrupted? + * If so, retry after bumping the sequence number. + */ + if (interrupted) { + ++slot->seq_nr; + goto retry_nowait; + } + /* + * Could this slot have been previously retired? + * If so, then the server may be expecting seq_nr = 1! + */ + if (slot->seq_nr != 1) { + slot->seq_nr = 1; + goto retry_nowait; + } + break; + case -NFS4ERR_SEQ_FALSE_RETRY: + ++slot->seq_nr; + goto retry_nowait; default: /* Just update the slot sequence no. */ - ++res->sr_slot->seq_nr; + ++slot->seq_nr; } out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); nfs41_sequence_free_slot(res); - return 1; + return ret; +retry_nowait: + if (rpc_restart_call_prepare(task)) { + task->tk_status = 0; + ret = 0; + } + goto out; out_retry: if (!rpc_restart_call(task)) goto out; @@ -536,55 +518,27 @@ out_retry: static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { - if (res->sr_session == NULL) + if (res->sr_slot == NULL) return 1; return nfs41_sequence_done(task, res); } -/* - * nfs4_find_slot - efficiently look for a free slot - * - * nfs4_find_slot looks for an unset bit in the used_slots bitmap. - * If found, we mark the slot as used, update the highest_used_slotid, - * and respectively set up the sequence operation args. - * The slot number is returned if found, or NFS4_NO_SLOT otherwise. - * - * Note: must be called with under the slot_tbl_lock. - */ -static u32 -nfs4_find_slot(struct nfs4_slot_table *tbl) -{ - u32 slotid; - u32 ret_id = NFS4_NO_SLOT; - - dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", - __func__, tbl->used_slots[0], tbl->highest_used_slotid, - tbl->max_slots); - slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); - if (slotid >= tbl->max_slots) - goto out; - __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid || - tbl->highest_used_slotid == NFS4_NO_SLOT) - tbl->highest_used_slotid = slotid; - ret_id = slotid; -out: - dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", - __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id); - return ret_id; -} - static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { - args->sa_session = NULL; + args->sa_slot = NULL; args->sa_cache_this = 0; + args->sa_privileged = 0; if (cache_reply) args->sa_cache_this = 1; - res->sr_session = NULL; res->sr_slot = NULL; } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ + args->sa_privileged = 1; +} + int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -592,59 +546,59 @@ int nfs41_setup_sequence(struct nfs4_session *session, { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - u32 slotid; dprintk("--> %s\n", __func__); /* slot already allocated? */ if (res->sr_slot != NULL) - return 0; + goto out_success; tbl = &session->fc_slot_table; + task->tk_timeout = 0; + spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && - !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { + !args->sa_privileged) { /* The state manager will wait until the slot table is empty */ - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); dprintk("%s session is draining\n", __func__); - return -EAGAIN; - } - - if (!rpc_queue_empty(&tbl->slot_tbl_waitq) && - !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s enforce FIFO order\n", __func__); - return -EAGAIN; + goto out_sleep; } - slotid = nfs4_find_slot(tbl); - if (slotid == NFS4_NO_SLOT) { - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* If out of memory, try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; dprintk("<-- %s: no free slots\n", __func__); - return -EAGAIN; + goto out_sleep; } spin_unlock(&tbl->slot_tbl_lock); - rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); - slot = tbl->slots + slotid; - args->sa_session = session; - args->sa_slotid = slotid; + args->sa_slot = slot; - dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); + dprintk("<-- %s slotid=%d seqid=%d\n", __func__, + slot->slot_nr, slot->seq_nr); - res->sr_session = session; res->sr_slot = slot; - res->sr_renewal_time = jiffies; + res->sr_timestamp = jiffies; res->sr_status_flags = 0; /* * sr_status is only set in decode_sequence, and so will remain * set to 1 if an rpc level failure occurs. */ res->sr_status = 1; +out_success: + rpc_call_start(task); return 0; +out_sleep: + /* Privileged tasks are queued with top priority */ + if (args->sa_privileged) + rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, + NULL, RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; } EXPORT_SYMBOL_GPL(nfs41_setup_sequence); @@ -656,12 +610,14 @@ int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) + if (session == NULL) { + rpc_call_start(task); goto out; + } - dprintk("--> %s clp %p session %p sr_slot %td\n", + dprintk("--> %s clp %p session %p sr_slot %d\n", __func__, session->clp, session, res->sr_slot ? - res->sr_slot - session->fc_slot_table.slots : -1); + res->sr_slot->slot_nr : -1); ret = nfs41_setup_sequence(session, args, res, task); out: @@ -678,19 +634,11 @@ struct nfs41_call_sync_data { static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { struct nfs41_call_sync_data *data = calldata; + struct nfs4_session *session = nfs4_get_session(data->seq_server); dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); - if (nfs4_setup_sequence(data->seq_server, data->seq_args, - data->seq_res, task)) - return; - rpc_call_start(task); -} - -static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs41_call_sync_prepare(task, calldata); + nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); } static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) @@ -705,17 +653,11 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -static const struct rpc_call_ops nfs41_call_priv_sync_ops = { - .rpc_call_prepare = nfs41_call_priv_sync_prepare, - .rpc_call_done = nfs41_call_sync_done, -}; - static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int privileged) + struct nfs4_sequence_res *res) { int ret; struct rpc_task *task; @@ -731,8 +673,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; - if (privileged) - task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); if (IS_ERR(task)) ret = PTR_ERR(task); @@ -743,24 +683,18 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } -int _nfs4_call_sync_session(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply) -{ - nfs41_init_sequence(args, res, cache_reply); - return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); -} - #else -static inline +static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ +} + + static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -768,18 +702,17 @@ static int nfs4_sequence_done(struct rpc_task *task, } #endif /* CONFIG_NFS_V4_1 */ +static int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply) + struct nfs4_sequence_res *res) { - nfs41_init_sequence(args, res, cache_reply); return rpc_call_sync(clnt, msg, 0); } -static inline +static int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -787,8 +720,9 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { + nfs41_init_sequence(args, res, cache_reply); return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, - args, res, cache_reply); + args, res); } static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) @@ -800,6 +734,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } @@ -830,6 +765,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; + p->o_res.access_request = p->o_arg.access; nfs_fattr_init(&p->f_attr); nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); } @@ -858,6 +794,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS + * will return permission denied for all bits until close */ + if (!(flags & O_EXCL)) { + /* ask server to check for all possible rights as results + * are cached */ + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); p->o_arg.id.uniquifier = sp->so_seqid.owner_id; @@ -1065,7 +1009,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo return; } rcu_read_unlock(); - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); } static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) @@ -1113,11 +1057,80 @@ out_return_state: return state; } -static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +static void +nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) +{ + struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; + struct nfs_delegation *delegation; + int delegation_flags = 0; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); + if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { + pr_err_ratelimited("NFS: Broken NFSv4 server %s is " + "returning a delegation for " + "OPEN(CLAIM_DELEGATE_CUR)\n", + clp->cl_hostname); + } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) + nfs_inode_set_delegation(state->inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); +} + +/* + * Check the inode attributes against the CLAIM_PREVIOUS returned attributes + * and update the nfs4_state. + */ +static struct nfs4_state * +_nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) +{ + struct inode *inode = data->state->inode; + struct nfs4_state *state = data->state; + int ret; + + if (!data->rpc_done) { + ret = data->rpc_status; + goto err; + } + + ret = -ESTALE; + if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || + !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || + !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) + goto err; + + ret = -ENOMEM; + state = nfs4_get_open_state(inode, data->owner); + if (state == NULL) + goto err; + + ret = nfs_refresh_inode(inode, &data->f_attr); + if (ret) + goto err; + + if (data->o_res.delegation_type != 0) + nfs4_opendata_check_deleg(data, state); + update_open_stateid(state, &data->o_res.stateid, NULL, + data->o_arg.fmode); + + return state; +err: + return ERR_PTR(ret); + +} + +static struct nfs4_state * +_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { struct inode *inode; struct nfs4_state *state = NULL; - struct nfs_delegation *delegation; int ret; if (!data->rpc_done) { @@ -1136,30 +1149,8 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data state = nfs4_get_open_state(inode, data->owner); if (state == NULL) goto err_put_inode; - if (data->o_res.delegation_type != 0) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - int delegation_flags = 0; - - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { - pr_err_ratelimited("NFS: Broken NFSv4 server %s is " - "returning a delegation for " - "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) - nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - &data->o_res); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - &data->o_res); - } - + if (data->o_res.delegation_type != 0) + nfs4_opendata_check_deleg(data, state); update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); iput(inode); @@ -1171,6 +1162,14 @@ err: return ERR_PTR(ret); } +static struct nfs4_state * +nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) + return _nfs4_opendata_reclaim_to_nfs4_state(data); + return _nfs4_opendata_to_nfs4_state(data); +} + static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -1372,13 +1371,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ case -ENOMEM: err = 0; goto out; @@ -1492,26 +1484,21 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task)) - return; - rpc_call_start(task); + &data->o_res.seq_res, + task) != 0) + nfs_release_seqid(data->o_arg.seqid); return; unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; - -} - -static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_open_prepare(task, calldata); + nfs4_sequence_done(task, &data->o_res.seq_res); } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -1524,7 +1511,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) return; if (task->tk_status == 0) { - switch (data->o_res.f_attr->mode & S_IFMT) { + if (data->o_res.f_attr->valid & NFS_ATTR_FATTR_TYPE) { + switch (data->o_res.f_attr->mode & S_IFMT) { case S_IFREG: break; case S_IFLNK: @@ -1535,6 +1523,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) break; default: data->rpc_status = -ENOTDIR; + } } renew_lease(data->o_res.server, data->timestamp); if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) @@ -1570,12 +1559,6 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static const struct rpc_call_ops nfs4_recover_open_ops = { - .rpc_call_prepare = nfs4_recover_open_prepare, - .rpc_call_done = nfs4_open_done, - .rpc_release = nfs4_open_release, -}; - static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) { struct inode *dir = data->dir->d_inode; @@ -1605,7 +1588,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) data->rpc_status = 0; data->cancelled = 0; if (isrecover) - task_setup_data.callback_ops = &nfs4_recover_open_ops; + nfs4_set_sequence_privileged(&o_arg->seq_args); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1641,6 +1624,43 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) return status; } +static int nfs4_opendata_access(struct rpc_cred *cred, + struct nfs4_opendata *opendata, + struct nfs4_state *state, fmode_t fmode, + int openflags) +{ + struct nfs_access_entry cache; + u32 mask; + + /* access call failed or for some reason the server doesn't + * support any access modes -- defer access call until later */ + if (opendata->o_res.access_supported == 0) + return 0; + + mask = 0; + /* don't check MAY_WRITE - a newly created file may not have + * write mode bits, but POSIX allows the creating process to write. + * use openflags to check for exec, because fmode won't + * always have FMODE_EXEC set when file open for exec. */ + if (openflags & __FMODE_EXEC) { + /* ONLY check for exec rights */ + mask = MAY_EXEC; + } else if (fmode & FMODE_READ) + mask = MAY_READ; + + cache.cred = cred; + cache.jiffies = jiffies; + nfs_access_set_mask(&cache, opendata->o_res.access_result); + nfs_access_add_cache(state->inode, &cache); + + if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) + return 0; + + /* even though OPEN succeeded, access is denied. Close the file */ + nfs4_close_state(state, fmode); + return -EACCES; +} + /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ @@ -1678,24 +1698,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int nfs4_client_recover_expired_lease(struct nfs_client *clp) -{ - unsigned int loop; - int ret; - - for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { - ret = nfs4_wait_clnt_recover(clp); - if (ret != 0) - break; - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && - !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) - break; - nfs4_schedule_state_manager(clp); - ret = -EIO; - } - return ret; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { return nfs4_client_recover_expired_lease(server->nfs_client); @@ -1756,33 +1758,74 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) +static void nfs41_clear_delegation_stateid(struct nfs4_state *state) { - int status = NFS_OK; struct nfs_server *server = NFS_SERVER(state->inode); + nfs4_stateid *stateid = &state->stateid; + int status; - if (state->flags & flags) { - status = nfs41_test_stateid(server, stateid); - if (status != NFS_OK) { + /* If a state reset has been done, test_stateid is unneeded */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + return; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, stateid); - state->flags &= ~flags; - } + nfs_remove_bad_delegation(state->inode); + + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +} + +/** + * nfs41_check_open_stateid - possibly free an open stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_open_stateid(struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + nfs4_stateid *stateid = &state->open_stateid; + int status; + + /* If a state reset has been done, test_stateid is unneeded */ + if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) + return -NFS4ERR_BAD_STATEID; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); + + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); } return status; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_status, open_status; - int deleg_flags = 1 << NFS_DELEGATED_STATE; - int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); - - deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + int status; - if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) - return NFS_OK; - return nfs4_open_expired(sp, state); + nfs41_clear_delegation_stateid(state); + status = nfs41_check_open_stateid(state); + if (status != NFS_OK) + status = nfs4_open_expired(sp, state); + return status; } #endif @@ -1857,6 +1900,10 @@ static int _nfs4_do_open(struct inode *dir, if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + status = nfs4_opendata_access(cred, opendata, state, fmode, flags); + if (status != 0) + goto err_opendata_put; + if (opendata->o_arg.open_flags & O_EXCL) { nfs4_exclusive_attrset(opendata, sattr); @@ -1902,7 +1949,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs4_state *res; int status; - fmode &= FMODE_READ|FMODE_WRITE; + fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res, ctx_th); @@ -1974,8 +2021,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); if (state != NULL) { + struct nfs_lockowner lockowner = { + .l_owner = current->files, + .l_pid = current->tgid, + }; nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - current->files, current->tgid); + &lockowner); } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, FMODE_WRITE)) { /* Use that stateid */ @@ -2037,7 +2088,7 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); kfree(calldata); } @@ -2094,6 +2145,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; + struct inode *inode = calldata->inode; int call_close = 0; dprintk("%s: begin!\n", __func__); @@ -2121,27 +2173,24 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); goto out; } if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && - pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { - rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, - task, NULL); + pnfs_roc_drain(inode, &calldata->roc_barrier, task)) goto out; - } } nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; - if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), + if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, &calldata->res.seq_res, - task)) - goto out; - rpc_call_start(task); + task) != 0) + nfs_release_seqid(calldata->arg.seqid); out: dprintk("%s: done!\n", __func__); } @@ -2163,7 +2212,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) +int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -2199,7 +2248,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = roc; + calldata->roc = pnfs_roc(state->inode); nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -2216,8 +2265,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) out_free_calldata: kfree(calldata); out: - if (roc) - pnfs_roc_release(state->inode); nfs4_put_open_state(state); nfs4_put_state_owner(sp); return status; @@ -2360,7 +2407,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl int ret; auth = rpcauth_create(flavor, server->client); - if (!auth) { + if (IS_ERR(auth)) { ret = -EIO; goto out; } @@ -2375,11 +2422,16 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int i, len, status = 0; rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; - len = gss_mech_list_pseudoflavors(&flav_array[0]); - flav_array[len] = RPC_AUTH_NULL; - len += 1; + len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); + if (len < 0) + return len; for (i = 0; i < len; i++) { + /* AUTH_UNIX is the default flavor if none was specified, + * thus has already been tried. */ + if (flav_array[i] == RPC_AUTH_UNIX) + continue; + status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; @@ -2724,13 +2776,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { - entry->mask = 0; - if (res.access & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + nfs_access_set_mask(entry, res.access); nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); @@ -2766,9 +2812,7 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) * * In the case of WRITE, we also want to put the GETATTR after * the operation -- in this case because we want to make sure - * we get the post-operation mtime and size. This means that - * we can't use xdr_encode_pages() as written: we need a variant - * of it which would leave room in the 'tail' iovec. + * we get the post-operation mtime and size. * * Both of these changes to the XDR layer would in fact be quite * minor, but I decided to leave them for a subsequent patch. @@ -2806,37 +2850,24 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, } /* - * Got race? - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this create/open method is prone to inefficiency and race - * conditions due to the lookup, create, and open VFS calls from sys_open() - * placed on the wire. - * - * Given the above sorry state of affairs, I'm simply sending an OPEN. - * The file will be opened again in the subsequent VFS open call - * (nfs4_proc_file_open). - * - * The open for read will just hang around to be used by any process that - * opens the file O_RDONLY. This will all be resolved with the VFS changes. + * This is just for mknod. open(O_CREAT) will always do ->open_context(). */ - static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { - struct dentry *de = dentry; + struct nfs_open_context *ctx; struct nfs4_state *state; - struct rpc_cred *cred = NULL; - fmode_t fmode = 0; int status = 0; - if (ctx != NULL) { - cred = ctx->cred; - de = ctx->dentry; - fmode = ctx->mode; - } + ctx = alloc_nfs_open_context(dentry, FMODE_READ); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, dentry, ctx->mode, + flags, sattr, ctx->cred, + &ctx->mdsthreshold); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2844,11 +2875,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (ctx != NULL) - ctx->state = state; - else - nfs4_close_sync(state, fmode); + ctx->state = state; out: + put_nfs_open_context(ctx); return status; } @@ -2900,12 +2929,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->dir), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) @@ -2933,12 +2960,10 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->old_dir), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3189,11 +3214,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -3224,9 +3249,6 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, int mode = sattr->ia_mode; int status = -ENOMEM; - BUG_ON(!(sattr->ia_valid & ATTR_MODE)); - BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); - data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); if (data == NULL) goto out; @@ -3242,10 +3264,13 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, data->arg.ftype = NF4CHR; data->arg.u.device.specdata1 = MAJOR(rdev); data->arg.u.device.specdata2 = MINOR(rdev); + } else if (!S_ISSOCK(mode)) { + status = -EINVAL; + goto out_free; } status = nfs4_do_create(dir, dentry, data); - +out_free: nfs4_free_createdata(data); out: return status; @@ -3332,8 +3357,17 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { + int error; + nfs_fattr_init(fsinfo->fattr); - return nfs4_do_fsinfo(server, fhandle, fsinfo); + error = nfs4_do_fsinfo(server, fhandle, fsinfo); + if (error == 0) { + /* block layout checks this! */ + server->pnfs_blksize = fsinfo->blksize; + set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + } + + return error; } static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, @@ -3418,12 +3452,10 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->header->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3460,7 +3492,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) @@ -3484,22 +3516,18 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->header->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) @@ -3621,11 +3649,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); } -/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that - * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on * the stack. */ -#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) +#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) @@ -3636,7 +3664,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, spages = pages; do { - len = min_t(size_t, PAGE_CACHE_SIZE, buflen); + len = min_t(size_t, PAGE_SIZE, buflen); newpage = alloc_page(GFP_KERNEL); if (newpage == NULL) @@ -3705,9 +3733,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; @@ -3749,16 +3778,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i, acl_len = 0; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + int ret = -ENOMEM, i; - npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ if (npages == 0) npages = 1; - - /* Add an extra page to handle the bitmap returned */ - npages++; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3774,11 +3802,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu args.acl_len = npages * PAGE_SIZE; args.acl_pgbase = 0; - /* Let decode_getfacl know not to fail if the ACL data is larger than - * the page we send as a guess */ - if (buf == NULL) - res.acl_flags |= NFS4_ACL_LEN_REQUEST; - dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), @@ -3786,20 +3809,24 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; - if (acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, 0, acl_len); - else - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, - acl_len); - if (buf) { + /* Handle the case where the passed-in buffer is too short */ + if (res.acl_flags & NFS4_ACL_TRUNC) { + /* Did the user only issue a request for the acl length? */ + if (buf == NULL) + goto out_ok; ret = -ERANGE; - if (acl_len > buflen) + goto out_free; + } + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + if (buf) { + if (res.acl_len > buflen) { + ret = -ERANGE; goto out_free; - _copy_from_pages(buf, pages, res.acl_data_offset, - acl_len); + } + _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); } - ret = acl_len; +out_ok: + ret = res.acl_len; out_free: for (i = 0; i < npages; i++) if (pages[i]) @@ -3857,14 +3884,17 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .rpc_argp = &arg, .rpc_resp = &res, }; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -3941,7 +3971,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: - case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; @@ -3978,6 +4007,46 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +static unsigned int +nfs4_init_nonuniform_client_string(const struct nfs_client *clp, + char *buf, size_t len) +{ + unsigned int result; + + rcu_read_lock(); + result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); + rcu_read_unlock(); + return result; +} + +static unsigned int +nfs4_init_uniform_client_string(const struct nfs_client *clp, + char *buf, size_t len) +{ + char *nodename = clp->cl_rpcclient->cl_nodename; + + if (nfs4_client_id_uniquifier[0] != '\0') + nodename = nfs4_client_id_uniquifier; + return scnprintf(buf, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + nodename); +} + +/** + * nfs4_proc_setclientid - Negotiate client ID + * @clp: state data structure + * @program: RPC program for NFSv4 callback service + * @port: IP port number for NFS4 callback service + * @cred: RPC credential to use for this call + * @res: where to place the result + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -3994,44 +4063,47 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; - int loop = 0; int status; + /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - - for(;;) { - rcu_read_lock(); - setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%s %s %s %u", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO), - clp->cl_rpcclient->cl_auth->au_ops->au_name, - clp->cl_id_uniquifier); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + setclientid.sc_name_len = + nfs4_init_uniform_client_string(clp, + setclientid.sc_name, + sizeof(setclientid.sc_name)); + else + setclientid.sc_name_len = + nfs4_init_nonuniform_client_string(clp, + setclientid.sc_name, + sizeof(setclientid.sc_name)); + /* cb_client4 */ + rcu_read_lock(); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_NETID)); - setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + rcu_read_unlock(); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - rcu_read_unlock(); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status != -NFS4ERR_CLID_INUSE) - break; - if (loop != 0) { - ++clp->cl_id_uniquifier; - break; - } - ++loop; - ssleep(clp->cl_lease_time / HZ + 1); - } + dprintk("NFS call setclientid auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + setclientid.sc_name_len, setclientid.sc_name); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + dprintk("NFS reply setclientid: %d\n", status); return status; } +/** + * nfs4_proc_setclientid_confirm - Confirm client ID + * @clp: state data structure + * @res: result of a previous SETCLIENTID + * @cred: RPC credential to use for this call + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) @@ -4046,6 +4118,9 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, unsigned long now; int status; + dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + clp->cl_clientid); now = jiffies; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status == 0) { @@ -4054,6 +4129,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, clp->cl_last_renewal = now; spin_unlock(&clp->cl_lock); } + dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -4102,11 +4178,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (nfs4_setup_sequence(d_data->res.server, - &d_data->args.seq_args, - &d_data->res.seq_res, task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(d_data->res.server, + &d_data->args.seq_args, + &d_data->res.seq_res, + task); } #endif /* CONFIG_NFS_V4_1 */ @@ -4340,6 +4415,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) rpc_restart_call_prepare(task); } + nfs_release_seqid(calldata->arg.seqid); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -4348,17 +4424,18 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); return; } calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - rpc_call_start(task); + &calldata->res.seq_res, + task) != 0) + nfs_release_seqid(calldata->arg.seqid); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4502,8 +4579,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) return; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { - if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) - return; + if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { + goto out_release_lock_seqid; + } data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4512,18 +4590,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, + task) == 0) return; - rpc_call_start(task); + nfs_release_seqid(data->arg.open_seqid); +out_release_lock_seqid: + nfs_release_seqid(data->arg.lock_seqid); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } -static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_lock_prepare(task, calldata); -} - static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; @@ -4542,7 +4617,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) } if (data->rpc_status == 0) { nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); - data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; + set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } out: @@ -4576,12 +4651,6 @@ static const struct rpc_call_ops nfs4_lock_ops = { .rpc_release = nfs4_lock_release, }; -static const struct rpc_call_ops nfs4_recover_lock_ops = { - .rpc_call_prepare = nfs4_recover_lock_prepare, - .rpc_call_done = nfs4_lock_done, - .rpc_release = nfs4_lock_release, -}; - static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) { switch (error) { @@ -4589,7 +4658,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ case -NFS4ERR_BAD_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || - (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) nfs4_schedule_stateid_recovery(server, lsp->ls_state); break; case -NFS4ERR_STALE_STATEID: @@ -4624,15 +4693,15 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - if (recovery_type > NFS_LOCK_NEW) { - if (recovery_type == NFS_LOCK_RECLAIM) - data->arg.reclaim = NFS_LOCK_RECLAIM; - task_setup_data.callback_ops = &nfs4_recover_lock_ops; - } nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; + if (recovery_type > NFS_LOCK_NEW) { + if (recovery_type == NFS_LOCK_RECLAIM) + data->arg.reclaim = NFS_LOCK_RECLAIM; + nfs4_set_sequence_privileged(&data->arg.seq_args); + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -4698,18 +4767,30 @@ out: } #if defined(CONFIG_NFS_V4_1) +/** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status, ret = NFS_OK; + int status, ret = -NFS4ERR_BAD_STATEID; struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { - nfs41_free_stateid(server, &lsp->ls_stateid); - lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; + /* Free the stateid unless the server + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, + &lsp->ls_stateid); + clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); ret = status; } } @@ -4724,9 +4805,9 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques if (test_bit(LK_STATE_IN_USE, &state->flags)) status = nfs41_check_expired_locks(state); - if (status == NFS_OK) - return status; - return nfs4_lock_expired(state, request); + if (status != NFS_OK) + status = nfs4_lock_expired(state, request); + return status; } #endif @@ -4824,7 +4905,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. */ - switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) { + switch (request->fl_type) { case F_RDLCK: if (!(filp->f_mode & FMODE_READ)) return -EBADF; @@ -4889,15 +4970,6 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) nfs4_schedule_stateid_recovery(server, state); err = 0; goto out; - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ - err = 0; - goto out; case -ENOMEM: case -NFS4ERR_DENIED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ @@ -5146,7 +5218,6 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred }; dprintk("--> %s\n", __func__); - BUG_ON(clp == NULL); res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); if (unlikely(res.session == NULL)) { @@ -5185,6 +5256,8 @@ out: /* * nfs4_proc_exchange_id() * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * * Since the clientid has expired, all compounds using sessions * associated with the stale clientid will be returning * NFS4ERR_BADSESSION in the sequence operation, and will therefore @@ -5209,16 +5282,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_cred = cred, }; - dprintk("--> %s\n", __func__); - BUG_ON(clp == NULL); - nfs4_init_boot_verifier(clp, &verifier); - - args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s/%u", - clp->cl_ipaddr, - clp->cl_rpcclient->cl_nodename, - clp->cl_rpcclient->cl_auth->au_flavor); + args.id_len = nfs4_init_uniform_client_string(clp, args.id, + sizeof(args.id)); + dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + args.id_len, args.id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); @@ -5281,12 +5350,12 @@ out_server_scope: kfree(res.server_scope); out: if (clp->cl_implid != NULL) - dprintk("%s: Server Implementation ID: " + dprintk("NFS reply exchange_id: Server Implementation ID: " "domain: %s, name: %s, date: %llu,%u\n", - __func__, clp->cl_implid->domain, clp->cl_implid->name, + clp->cl_implid->domain, clp->cl_implid->name, clp->cl_implid->date.seconds, clp->cl_implid->date.nseconds); - dprintk("<-- %s status= %d\n", __func__, status); + dprintk("NFS reply exchange_id: %d\n", status); return status; } @@ -5336,6 +5405,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_exchange_flags == 0) goto out; + if (clp->cl_preserve_clid) + goto out; cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) @@ -5358,20 +5429,16 @@ struct nfs4_get_lease_time_data { static void nfs4_get_lease_time_prepare(struct rpc_task *task, void *calldata) { - int ret; struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; dprintk("--> %s\n", __func__); - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); /* just setup sequence, do not trigger session recovery since we're invoked within one */ - ret = nfs41_setup_sequence(data->clp->cl_session, - &data->args->la_seq_args, - &data->res->lr_seq_res, task); - - BUG_ON(ret == -EAGAIN); - rpc_call_start(task); + nfs41_setup_sequence(data->clp->cl_session, + &data->args->la_seq_args, + &data->res->lr_seq_res, + task); dprintk("<-- %s\n", __func__); } @@ -5433,6 +5500,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) int status; nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); + nfs4_set_sequence_privileged(&args.la_seq_args); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5447,145 +5515,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) -{ - return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags); -} - -static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, - struct nfs4_slot *new, - u32 max_slots, - u32 ivalue) -{ - struct nfs4_slot *old = NULL; - u32 i; - - spin_lock(&tbl->slot_tbl_lock); - if (new) { - old = tbl->slots; - tbl->slots = new; - tbl->max_slots = max_slots; - } - tbl->highest_used_slotid = -1; /* no slot is currently used */ - for (i = 0; i < tbl->max_slots; i++) - tbl->slots[i].seq_nr = ivalue; - spin_unlock(&tbl->slot_tbl_lock); - kfree(old); -} - -/* - * (re)Initialise a slot table - */ -static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, - u32 ivalue) -{ - struct nfs4_slot *new = NULL; - int ret = -ENOMEM; - - dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, - max_reqs, tbl->max_slots); - - /* Does the newly negotiated max_reqs match the existing slot table? */ - if (max_reqs != tbl->max_slots) { - new = nfs4_alloc_slots(max_reqs, GFP_NOFS); - if (!new) - goto out; - } - ret = 0; - - nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue); - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, - tbl, tbl->slots, tbl->max_slots); -out: - dprintk("<-- %s: return %d\n", __func__, ret); - return ret; -} - -/* Destroy the slot table */ -static void nfs4_destroy_slot_tables(struct nfs4_session *session) -{ - if (session->fc_slot_table.slots != NULL) { - kfree(session->fc_slot_table.slots); - session->fc_slot_table.slots = NULL; - } - if (session->bc_slot_table.slots != NULL) { - kfree(session->bc_slot_table.slots); - session->bc_slot_table.slots = NULL; - } - return; -} - -/* - * Initialize or reset the forechannel and backchannel tables - */ -static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) -{ - struct nfs4_slot_table *tbl; - int status; - - dprintk("--> %s\n", __func__); - /* Fore channel */ - tbl = &ses->fc_slot_table; - status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) /* -ENOMEM */ - return status; - /* Back channel */ - tbl = &ses->bc_slot_table; - status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); - if (status && tbl->slots == NULL) - /* Fore and back channel share a connection so get - * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); - return status; -} - -struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) -{ - struct nfs4_session *session; - struct nfs4_slot_table *tbl; - - session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); - if (!session) - return NULL; - - tbl = &session->fc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); - init_completion(&tbl->complete); - - tbl = &session->bc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); - init_completion(&tbl->complete); - - session->session_state = 1<<NFS4_SESSION_INITING; - - session->clp = clp; - return session; -} - -void nfs4_destroy_session(struct nfs4_session *session) -{ - struct rpc_xprt *xprt; - struct rpc_cred *cred; - - cred = nfs4_get_exchange_id_cred(session->clp); - nfs4_proc_destroy_session(session, cred); - if (cred) - put_rpccred(cred); - - rcu_read_lock(); - xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); - rcu_read_unlock(); - dprintk("%s Destroy backchannel for xprt %p\n", - __func__, xprt); - xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); - nfs4_destroy_slot_tables(session); - kfree(session); -} - /* * Initialize the values to be used by the client in CREATE_SESSION * If nfs4_init_session set the fore channel request and response sizes, @@ -5598,8 +5527,8 @@ void nfs4_destroy_session(struct nfs4_session *session) static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) { struct nfs4_session *session = args->client->cl_session; - unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, - mxresp_sz = session->fc_attrs.max_resp_sz; + unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, + mxresp_sz = session->fc_target_max_resp_sz; if (mxrqst_sz == 0) mxrqst_sz = NFS_MAX_FILE_IO_SIZE; @@ -5708,10 +5637,9 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (!status) + if (!status) { /* Verify the session's negotiated channel_attrs values */ status = nfs4_verify_channel_attrs(&args, session); - if (!status) { /* Increment the clientid slot sequence id */ clp->cl_seqid++; } @@ -5781,83 +5709,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, } /* - * With sessions, the client is not marked ready until after a - * successful EXCHANGE_ID and CREATE_SESSION. - * - * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate - * other versions of NFS can be tried. - */ -static int nfs41_check_session_ready(struct nfs_client *clp) -{ - int ret; - - if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { - ret = nfs4_client_recover_expired_lease(clp); - if (ret) - return ret; - } - if (clp->cl_cons_state < NFS_CS_READY) - return -EPROTONOSUPPORT; - smp_rmb(); - return 0; -} - -int nfs4_init_session(struct nfs_server *server) -{ - struct nfs_client *clp = server->nfs_client; - struct nfs4_session *session; - unsigned int rsize, wsize; - - if (!nfs4_has_session(clp)) - return 0; - - session = clp->cl_session; - spin_lock(&clp->cl_lock); - if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - - rsize = server->rsize; - if (rsize == 0) - rsize = NFS_MAX_FILE_IO_SIZE; - wsize = server->wsize; - if (wsize == 0) - wsize = NFS_MAX_FILE_IO_SIZE; - - session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; - session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; - } - spin_unlock(&clp->cl_lock); - - return nfs41_check_session_ready(clp); -} - -int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) -{ - struct nfs4_session *session = clp->cl_session; - int ret; - - spin_lock(&clp->cl_lock); - if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - /* - * Do not set NFS_CS_CHECK_LEASE_TIME instead set the - * DS lease to be equal to the MDS lease. - */ - clp->cl_lease_time = lease_time; - clp->cl_last_renewal = jiffies; - } - spin_unlock(&clp->cl_lock); - - ret = nfs41_check_session_ready(clp); - if (ret) - return ret; - /* Test for the DS role */ - if (!is_ds_client(clp)) - return -ENODEV; - return 0; -} -EXPORT_SYMBOL_GPL(nfs4_init_ds_session); - - -/* * Renew the cl_session lease. */ struct nfs4_sequence_data { @@ -5922,9 +5773,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - if (nfs41_setup_sequence(clp->cl_session, args, res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(clp->cl_session, args, res, task); } static const struct rpc_call_ops nfs41_sequence_ops = { @@ -5933,7 +5782,9 @@ static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_release = nfs41_sequence_release, }; -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + bool is_privileged) { struct nfs4_sequence_data *calldata; struct rpc_message msg = { @@ -5955,6 +5806,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return ERR_PTR(-ENOMEM); } nfs41_init_sequence(&calldata->args, &calldata->res, 0); + if (is_privileged) + nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5970,7 +5823,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return 0; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -5984,7 +5837,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; @@ -6013,13 +5866,10 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) { struct nfs4_reclaim_complete_data *calldata = data; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - if (nfs41_setup_sequence(calldata->clp->cl_session, - &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(calldata->clp->cl_session, + &calldata->arg.seq_args, + &calldata->res.seq_res, + task); } static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp) @@ -6096,6 +5946,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->arg.one_fs = 0; nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); + nfs4_set_sequence_privileged(&calldata->arg.seq_args); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -6119,6 +5970,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct nfs4_session *session = nfs4_get_session(server); dprintk("--> %s\n", __func__); /* Note the is a race here, where a CB_LAYOUTRECALL can come in @@ -6126,49 +5978,112 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) * However, that is not so catastrophic, and there seems * to be no way to prevent it completely. */ - if (nfs4_setup_sequence(server, &lgp->args.seq_args, + if (nfs41_setup_sequence(session, &lgp->args.seq_args, &lgp->res.seq_res, task)) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, lgp->args.ctx->state)) { rpc_exit(task, NFS4_OK); - return; } - rpc_call_start(task); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct inode *inode = lgp->args.inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo; + struct nfs4_state *state = NULL; dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &lgp->res.seq_res)) - return; + if (!nfs41_sequence_done(task, &lgp->res.seq_res)) + goto out; switch (task->tk_status) { case 0: - break; + goto out; case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: task->tk_status = -NFS4ERR_DELAY; - /* Fall through */ - default: - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (!lo || list_empty(&lo->plh_segs)) { + spin_unlock(&inode->i_lock); + /* If the open stateid was bad, then recover it. */ + state = lgp->args.ctx->state; + } else { + LIST_HEAD(head); + + pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); + spin_unlock(&inode->i_lock); + /* Mark the bad layout state as invalid, then + * retry using the open stateid. */ + pnfs_free_lseg_list(&head); } } + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + rpc_restart_call_prepare(task); +out: dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6180,9 +6095,11 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +struct pnfs_layout_segment * +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6196,24 +6113,34 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) .callback_data = lgp, .flags = RPC_TASK_ASYNC, }; + struct pnfs_layout_segment *lseg = NULL; int status = 0; dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return ERR_PTR(-ENOMEM); + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; if (status == 0) - status = pnfs_layout_process(lgp); + lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + if (status) + return ERR_PTR(status); + return lseg; } static void @@ -6222,21 +6149,20 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, - &lrp->res.seq_res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(lrp->clp->cl_session, + &lrp->args.seq_args, + &lrp->res.seq_res, + task); } static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &lrp->res.seq_res)) + if (!nfs41_sequence_done(task, &lrp->res.seq_res)) return; server = NFS_SERVER(lrp->args.inode); @@ -6244,24 +6170,21 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; } - spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } - lo->plh_block_lgets--; - spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); } static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); - put_layout_hdr(lrp->args.layout); + spin_lock(&lo->plh_inode->i_lock); + if (lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + lo->plh_block_lgets--; + spin_unlock(&lo->plh_inode->i_lock); + pnfs_put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } @@ -6388,11 +6311,12 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); + struct nfs4_session *session = nfs4_get_session(server); - if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(session, + &data->args.seq_args, + &data->res.seq_res, + task); } static void @@ -6401,7 +6325,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs41_sequence_done(task, &data->res.seq_res)) return; switch (task->tk_status) { /* Just ignore these failures */ @@ -6435,7 +6359,7 @@ static void nfs4_layoutcommit_release(void *calldata) list_del_init(&lseg->pls_lc_list); if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) - put_lseg(lseg); + pnfs_put_lseg(lseg); } clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); @@ -6587,22 +6511,38 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_resp = &res, }; + dprintk("NFS call test_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - - if (status == NFS_OK) - return res.status; - return status; + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res); + if (status != NFS_OK) { + dprintk("NFS reply test_stateid: failed, %d\n", status); + return status; + } + dprintk("NFS reply test_stateid: succeeded, %d\n", -res.status); + return -res.status; } +/** + * nfs41_test_stateid - perform a TEST_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to test + * + * Returns NFS_OK if the server recognizes that "stateid" is valid. + * Otherwise a negative NFS4ERR value is returned if the operation + * failed or the state ID is not currently valid. + */ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, stateid), - &exception); + err = _nfs41_test_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -6618,19 +6558,35 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_argp = &args, .rpc_resp = &res, }; + int status; + dprintk("NFS call free_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res); + dprintk("NFS reply free_stateid: %d\n", status); + return status; } +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * + * Returns NFS_OK if the server freed "stateid". Otherwise a + * negative NFS4ERR value is returned. + */ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, stateid), - &exception); + err = _nfs4_free_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -6665,6 +6621,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, .get_clid_cred = nfs4_get_setclientid_cred, + .detect_trunking = nfs40_discover_server_trunking, }; #if defined(CONFIG_NFS_V4_1) @@ -6676,6 +6633,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, + .detect_trunking = nfs41_discover_server_trunking, }; #endif /* CONFIG_NFS_V4_1 */ @@ -6726,7 +6684,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { #if defined(CONFIG_NFS_V4_1) static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, - .call_sync = _nfs4_call_sync_session, + .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, @@ -6742,6 +6700,26 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #endif }; +const struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, +}; + static const struct inode_operations nfs4_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, @@ -6760,6 +6738,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, + .try_mount = nfs4_try_mount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, @@ -6786,9 +6765,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_pageio_init = pnfs_pageio_init_read, .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_pageio_init = pnfs_pageio_init_write, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, @@ -6798,7 +6779,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, + .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, + .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, + .free_client = nfs4_free_client, + .create_server = nfs4_create_server, + .clone_server = nfs_clone_server, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { @@ -6813,10 +6800,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; -module_param(max_session_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " - "requests the client will negotiate"); - /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6930bec91bc..1720d32ffa5 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); + mod_delayed_work(system_wq, &clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c new file mode 100644 index 00000000000..ebda5f4a031 --- /dev/null +++ b/fs/nfs/nfs4session.c @@ -0,0 +1,552 @@ +/* + * fs/nfs/nfs4session.c + * + * Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com> + * + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/bc_xprt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/module.h> + +#include "nfs4_fs.h" +#include "internal.h" +#include "nfs4session.h" +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_STATE + +/* + * nfs4_shrink_slot_table - free retired slots from the slot table + */ +static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) +{ + struct nfs4_slot **p; + if (newsize >= tbl->max_slots) + return; + + p = &tbl->slots; + while (newsize--) + p = &(*p)->next; + while (*p) { + struct nfs4_slot *slot = *p; + + *p = slot->next; + kfree(slot); + tbl->max_slots--; + } +} + +/* + * nfs4_free_slot - free a slot and efficiently update slot table. + * + * freeing a slot is trivially done by clearing its respective bit + * in the bitmap. + * If the freed slotid equals highest_used_slotid we want to update it + * so that the server would be able to size down the slot table if needed, + * otherwise we know that the highest_used_slotid is still in use. + * When updating highest_used_slotid there may be "holes" in the bitmap + * so we need to scan down from highest_used_slotid to 0 looking for the now + * highest slotid in use. + * If none found, highest_used_slotid is set to NFS4_NO_SLOT. + * + * Must be called while holding tbl->slot_tbl_lock + */ +void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) +{ + u32 slotid = slot->slot_nr; + + /* clear used bit in bitmap */ + __clear_bit(slotid, tbl->used_slots); + + /* update highest_used_slotid when it is freed */ + if (slotid == tbl->highest_used_slotid) { + u32 new_max = find_last_bit(tbl->used_slots, slotid); + if (new_max < slotid) + tbl->highest_used_slotid = new_max; + else { + tbl->highest_used_slotid = NFS4_NO_SLOT; + nfs4_session_drain_complete(tbl->session, tbl); + } + } + dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + slotid, tbl->highest_used_slotid); +} + +static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot *slot; + + slot = kzalloc(sizeof(*slot), gfp_mask); + if (slot) { + slot->table = tbl; + slot->slot_nr = slotid; + slot->seq_nr = seq_init; + } + return slot; +} + +static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot **p, *slot; + + p = &tbl->slots; + for (;;) { + if (*p == NULL) { + *p = nfs4_new_slot(tbl, tbl->max_slots, + seq_init, gfp_mask); + if (*p == NULL) + break; + tbl->max_slots++; + } + slot = *p; + if (slot->slot_nr == slotid) + return slot; + p = &slot->next; + } + return ERR_PTR(-ENOMEM); +} + +/* + * nfs4_alloc_slot - efficiently look for a free slot + * + * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. + * If found, we mark the slot as used, update the highest_used_slotid, + * and respectively set up the sequence operation args. + * + * Note: must be called with under the slot_tbl_lock. + */ +struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) +{ + struct nfs4_slot *ret = ERR_PTR(-EBUSY); + u32 slotid; + + dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + tbl->max_slotid + 1); + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); + if (slotid > tbl->max_slotid) + goto out; + ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + if (IS_ERR(ret)) + goto out; + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid || + tbl->highest_used_slotid == NFS4_NO_SLOT) + tbl->highest_used_slotid = slotid; + ret->generation = tbl->generation; + +out: + dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + !IS_ERR(ret) ? ret->slot_nr : -1); + return ret; +} + +static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + if (max_reqs <= tbl->max_slots) + return 0; + if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))) + return 0; + return -ENOMEM; +} + +static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, + u32 server_highest_slotid, + u32 ivalue) +{ + struct nfs4_slot **p; + + nfs4_shrink_slot_table(tbl, server_highest_slotid + 1); + p = &tbl->slots; + while (*p) { + (*p)->seq_nr = ivalue; + (*p)->interrupted = 0; + p = &(*p)->next; + } + tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->target_highest_slotid = server_highest_slotid; + tbl->server_highest_slotid = server_highest_slotid; + tbl->d_target_highest_slotid = 0; + tbl->d2_target_highest_slotid = 0; + tbl->max_slotid = server_highest_slotid; +} + +/* + * (re)Initialise a slot table + */ +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + int ret; + + dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + max_reqs, tbl->max_slots); + + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + goto out; + + spin_lock(&tbl->slot_tbl_lock); + nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); + spin_unlock(&tbl->slot_tbl_lock); + + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + nfs4_shrink_slot_table(&session->fc_slot_table, 0); + nfs4_shrink_slot_table(&session->bc_slot_table, 0); +} + +static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) +{ + struct nfs4_sequence_args *args = task->tk_msg.rpc_argp; + struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; + struct nfs4_slot *slot = pslot; + struct nfs4_slot_table *tbl = slot->table; + + if (nfs4_session_draining(tbl->session) && !args->sa_privileged) + return false; + slot->generation = tbl->generation; + args->sa_slot = slot; + res->sr_timestamp = jiffies; + res->sr_slot = slot; + res->sr_status_flags = 0; + res->sr_status = 1; + return true; +} + +static bool __nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot) +{ + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs41_assign_slot, slot)) + return true; + return false; +} + +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot) +{ + if (slot->slot_nr > tbl->max_slotid) + return false; + return __nfs41_wake_and_assign_slot(tbl, slot); +} + +static bool nfs41_try_wake_next_slot_table_entry(struct nfs4_slot_table *tbl) +{ + struct nfs4_slot *slot = nfs4_alloc_slot(tbl); + if (!IS_ERR(slot)) { + bool ret = __nfs41_wake_and_assign_slot(tbl, slot); + if (ret) + return ret; + nfs4_free_slot(tbl, slot); + } + return false; +} + +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl) +{ + for (;;) { + if (!nfs41_try_wake_next_slot_table_entry(tbl)) + break; + } +} + +static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + u32 max_slotid; + + max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, target_highest_slotid); + if (max_slotid > tbl->server_highest_slotid) + max_slotid = tbl->server_highest_slotid; + if (max_slotid > tbl->target_highest_slotid) + max_slotid = tbl->target_highest_slotid; + tbl->max_slotid = max_slotid; + nfs41_wake_slot_table(tbl); +} + +/* Update the client's idea of target_highest_slotid */ +static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + if (tbl->target_highest_slotid == target_highest_slotid) + return; + tbl->target_highest_slotid = target_highest_slotid; + tbl->generation++; +} + +void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + spin_lock(&tbl->slot_tbl_lock); + nfs41_set_target_slotid_locked(tbl, target_highest_slotid); + tbl->d_target_highest_slotid = 0; + tbl->d2_target_highest_slotid = 0; + nfs41_set_max_slotid_locked(tbl, target_highest_slotid); + spin_unlock(&tbl->slot_tbl_lock); +} + +static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, + u32 highest_slotid) +{ + if (tbl->server_highest_slotid == highest_slotid) + return; + if (tbl->highest_used_slotid > highest_slotid) + return; + /* Deallocate slots */ + nfs4_shrink_slot_table(tbl, highest_slotid + 1); + tbl->server_highest_slotid = highest_slotid; +} + +static s32 nfs41_derivative_target_slotid(s32 s1, s32 s2) +{ + s1 -= s2; + if (s1 == 0) + return 0; + if (s1 < 0) + return (s1 - 1) >> 1; + return (s1 + 1) >> 1; +} + +static int nfs41_sign_s32(s32 s1) +{ + if (s1 > 0) + return 1; + if (s1 < 0) + return -1; + return 0; +} + +static bool nfs41_same_sign_or_zero_s32(s32 s1, s32 s2) +{ + if (!s1 || !s2) + return true; + return nfs41_sign_s32(s1) == nfs41_sign_s32(s2); +} + +/* Try to eliminate outliers by checking for sharp changes in the + * derivatives and second derivatives + */ +static bool nfs41_is_outlier_target_slotid(struct nfs4_slot_table *tbl, + u32 new_target) +{ + s32 d_target, d2_target; + bool ret = true; + + d_target = nfs41_derivative_target_slotid(new_target, + tbl->target_highest_slotid); + d2_target = nfs41_derivative_target_slotid(d_target, + tbl->d_target_highest_slotid); + /* Is first derivative same sign? */ + if (nfs41_same_sign_or_zero_s32(d_target, tbl->d_target_highest_slotid)) + ret = false; + /* Is second derivative same sign? */ + if (nfs41_same_sign_or_zero_s32(d2_target, tbl->d2_target_highest_slotid)) + ret = false; + tbl->d_target_highest_slotid = d_target; + tbl->d2_target_highest_slotid = d2_target; + return ret; +} + +void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res) +{ + spin_lock(&tbl->slot_tbl_lock); + if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid)) + nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); + if (tbl->generation == slot->generation) + nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); + nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid); + spin_unlock(&tbl->slot_tbl_lock); +} + +/* + * Initialize or reset the forechannel and backchannel tables + */ +int nfs4_setup_session_slot_tables(struct nfs4_session *ses) +{ + struct nfs4_slot_table *tbl; + int status; + + dprintk("--> %s\n", __func__); + /* Fore channel */ + tbl = &ses->fc_slot_table; + tbl->session = ses; + status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); + if (status) /* -ENOMEM */ + return status; + /* Back channel */ + tbl = &ses->bc_slot_table; + tbl->session = ses; + status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + if (status && tbl->slots == NULL) + /* Fore and back channel share a connection so get + * both slot tables or neither */ + nfs4_destroy_slot_tables(ses); + return status; +} + +struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) +{ + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + + session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); + if (!session) + return NULL; + + tbl = &session->fc_slot_table; + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + init_completion(&tbl->complete); + + tbl = &session->bc_slot_table; + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + init_completion(&tbl->complete); + + session->session_state = 1<<NFS4_SESSION_INITING; + + session->clp = clp; + return session; +} + +void nfs4_destroy_session(struct nfs4_session *session) +{ + struct rpc_xprt *xprt; + struct rpc_cred *cred; + + cred = nfs4_get_exchange_id_cred(session->clp); + nfs4_proc_destroy_session(session, cred); + if (cred) + put_rpccred(cred); + + rcu_read_lock(); + xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); + rcu_read_unlock(); + dprintk("%s Destroy backchannel for xprt %p\n", + __func__, xprt); + xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + nfs4_destroy_slot_tables(session); + kfree(session); +} + +/* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +static int nfs41_check_session_ready(struct nfs_client *clp) +{ + int ret; + + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { + ret = nfs4_client_recover_expired_lease(clp); + if (ret) + return ret; + } + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + smp_rmb(); + return 0; +} + +int nfs4_init_session(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_session *session; + unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; + unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; + + if (!nfs4_has_session(clp)) + return 0; + + if (server->rsize != 0) + target_max_resp_sz = server->rsize; + target_max_resp_sz += nfs41_maxread_overhead; + + if (server->wsize != 0) + target_max_rqst_sz = server->wsize; + target_max_rqst_sz += nfs41_maxwrite_overhead; + + session = clp->cl_session; + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { + /* Initialise targets and channel attributes */ + session->fc_target_max_rqst_sz = target_max_rqst_sz; + session->fc_attrs.max_rqst_sz = target_max_rqst_sz; + session->fc_target_max_resp_sz = target_max_resp_sz; + session->fc_attrs.max_resp_sz = target_max_resp_sz; + } else { + /* Just adjust the targets */ + if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { + session->fc_target_max_rqst_sz = target_max_rqst_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } + if (target_max_resp_sz > session->fc_target_max_resp_sz) { + session->fc_target_max_resp_sz = target_max_resp_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } + } + spin_unlock(&clp->cl_lock); + + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + nfs4_schedule_lease_recovery(clp); + + return nfs41_check_session_ready(clp); +} + +int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) +{ + struct nfs4_session *session = clp->cl_session; + int ret; + + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { + /* + * Do not set NFS_CS_CHECK_LEASE_TIME instead set the + * DS lease to be equal to the MDS lease. + */ + clp->cl_lease_time = lease_time; + clp->cl_last_renewal = jiffies; + } + spin_unlock(&clp->cl_lock); + + ret = nfs41_check_session_ready(clp); + if (ret) + return ret; + /* Test for the DS role */ + if (!is_ds_client(clp)) + return -ENODEV; + return 0; +} +EXPORT_SYMBOL_GPL(nfs4_init_ds_session); + + diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h new file mode 100644 index 00000000000..6f3cb39386d --- /dev/null +++ b/fs/nfs/nfs4session.h @@ -0,0 +1,142 @@ +/* + * fs/nfs/nfs4session.h + * + * Copyright (c) 2012 Trond Myklebust <Trond.Myklebust@netapp.com> + * + */ +#ifndef __LINUX_FS_NFS_NFS4SESSION_H +#define __LINUX_FS_NFS_NFS4SESSION_H + +/* maximum number of slots to use */ +#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_MAX_SLOT_TABLE (1024U) +#define NFS4_NO_SLOT ((u32)-1) + +#if IS_ENABLED(CONFIG_NFS_V4) + +/* Sessions slot seqid */ +struct nfs4_slot { + struct nfs4_slot_table *table; + struct nfs4_slot *next; + unsigned long generation; + u32 slot_nr; + u32 seq_nr; + unsigned int interrupted : 1; +}; + +/* Sessions */ +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +struct nfs4_slot_table { + struct nfs4_session *session; /* Parent session */ + struct nfs4_slot *slots; /* seqid per slot */ + unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ + spinlock_t slot_tbl_lock; + struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ + u32 max_slots; /* # slots in table */ + u32 max_slotid; /* Max allowed slotid value */ + u32 highest_used_slotid; /* sent to server on each SEQ. + * op for dynamic resizing */ + u32 target_highest_slotid; /* Server max_slot target */ + u32 server_highest_slotid; /* Server highest slotid */ + s32 d_target_highest_slotid; /* Derivative */ + s32 d2_target_highest_slotid; /* 2nd derivative */ + unsigned long generation; /* Generation counter for + target_highest_slotid */ + struct completion complete; +}; + +/* + * Session related parameters + */ +struct nfs4_session { + struct nfs4_sessionid sess_id; + u32 flags; + unsigned long session_state; + u32 hash_alg; + u32 ssv_len; + + /* The fore and back channel */ + struct nfs4_channel_attrs fc_attrs; + struct nfs4_slot_table fc_slot_table; + struct nfs4_channel_attrs bc_attrs; + struct nfs4_slot_table bc_slot_table; + struct nfs_client *clp; + /* Create session arguments */ + unsigned int fc_target_max_rqst_sz; + unsigned int fc_target_max_resp_sz; +}; + +enum nfs4_session_state { + NFS4_SESSION_INITING, + NFS4_SESSION_DRAINING, +}; + +#if defined(CONFIG_NFS_V4_1) +extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); +extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); + +extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid); +extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res); + +extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); + +extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); +extern void nfs4_destroy_session(struct nfs4_session *session); +extern int nfs4_init_session(struct nfs_server *server); +extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); + +extern void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl); + +static inline bool nfs4_session_draining(struct nfs4_session *session) +{ + return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); +} + +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot); +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); + +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + if (clp->cl_session) + return 1; + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) + return (clp->cl_session->flags & SESSION4_PERSIST); + return 0; +} + +#else /* defined(CONFIG_NFS_V4_1) */ + +static inline int nfs4_init_session(struct nfs_server *server) +{ + return 0; +} + +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + return 0; +} + +#endif /* defined(CONFIG_NFS_V4_1) */ +#endif /* IS_ENABLED(CONFIG_NFS_V4) */ +#endif /* __LINUX_FS_NFS_NFS4SESSION_H */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f38300e9f17..e61f68d5ef2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -51,19 +51,22 @@ #include <linux/bitops.h> #include <linux/jiffies.h> +#include <linux/sunrpc/clnt.h> + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_STATE #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; - -static LIST_HEAD(nfs4_clientid_list); +static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { @@ -73,12 +76,13 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) }; unsigned short port; int status; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - port = nfs_callback_tcpport; + port = nn->nfs_callback_tcpport; if (clp->cl_addr.ss_family == AF_INET6) - port = nfs_callback_tcpport6; + port = nn->nfs_callback_tcpport6; status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); if (status != 0) @@ -96,6 +100,51 @@ out: return status; } +/** + * nfs40_discover_server_trunking - Detect server IP address trunking (mv0) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs4_setclientid_res clid = { + .clientid = clp->cl_clientid, + .confirm = clp->cl_confirm, + }; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + unsigned short port; + int status; + + port = nn->nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nn->nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); + if (status != 0) + goto out; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; + + status = nfs40_walk_client_list(clp, result, cred); + if (status == 0) { + /* Sustain the lease, even if it's empty. If the clientid4 + * goes stale it's of no use for trunking discovery. */ + nfs4_schedule_state_renewal(*result); + } +out: + return status; +} + struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -200,24 +249,27 @@ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; struct nfs4_slot_table *tbl; - int max_slots; if (ses == NULL) return; tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { spin_lock(&tbl->slot_tbl_lock); - max_slots = tbl->max_slots; - while (max_slots--) { - if (rpc_wake_up_first(&tbl->slot_tbl_waitq, - nfs4_set_task_privileged, - NULL) == NULL) - break; - } + nfs41_wake_slot_table(tbl); spin_unlock(&tbl->slot_tbl_lock); } } +/* + * Signal state manager thread if session fore channel is drained + */ +void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl) +{ + if (nfs4_session_draining(session)) + complete(&tbl->complete); +} + static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { spin_lock(&tbl->slot_tbl_lock); @@ -249,7 +301,6 @@ static void nfs41_finish_session_reset(struct nfs_client *clp) clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ - clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); nfs41_setup_state_renewal(clp); } @@ -275,6 +326,33 @@ out: return status; } +/** + * nfs41_discover_server_trunking - Detect server IP address trunking (mv1) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status. + * If NFS4_OK is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + int status; + + status = nfs4_proc_exchange_id(clp, cred); + if (status != NFS4_OK) + return status; + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + + return nfs41_walk_client_list(clp, result, cred); +} + struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; @@ -729,11 +807,8 @@ static void __nfs4_close(struct nfs4_state *state, if (!call_close) { nfs4_put_open_state(state); nfs4_put_state_owner(owner); - } else { - bool roc = pnfs_roc(state->inode); - - nfs4_do_close(state, gfp_mask, wait, roc); - } + } else + nfs4_do_close(state, gfp_mask, wait); } void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) @@ -865,7 +940,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { if (nfs4_release_lockowner(lsp) == 0) return; } @@ -911,17 +986,25 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) } static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fl_owner_t fl_owner, pid_t fl_pid) + const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; + fl_owner_t fl_owner; + pid_t fl_pid; bool ret = false; + + if (lockowner == NULL) + goto out; + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; + fl_owner = lockowner->l_owner; + fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { + if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = true; } @@ -946,11 +1029,11 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) * requests. */ void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) + fmode_t fmode, const struct nfs_lockowner *lockowner) { if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) return; - if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) + if (nfs4_copy_lock_stateid(dst, state, lockowner)) return; nfs4_copy_open_stateid(dst, state); } @@ -1000,7 +1083,6 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; @@ -1123,6 +1205,40 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); +int nfs4_wait_clnt_recover(struct nfs_client *clp) +{ + int res; + + might_sleep(); + + res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (res) + return res; + + if (clp->cl_cons_state < 0) + return clp->cl_cons_state; + return 0; +} + +int nfs4_client_recover_expired_lease(struct nfs_client *clp) +{ + unsigned int loop; + int ret; + + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + ret = nfs4_wait_clnt_recover(clp); + if (ret != 0) + break; + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && + !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) + break; + nfs4_schedule_state_manager(clp); + ret = -EIO; + } + return ret; +} + /* * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN * @clp: client to process @@ -1289,7 +1405,7 @@ restart: if (status >= 0) { spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) pr_warn_ratelimited("NFS: " "%s: Lock reclaim " "failed!\n", __func__); @@ -1315,14 +1431,6 @@ restart: /* Mark the file as being 'closed' */ state->state = 0; break; - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ - break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -1361,7 +1469,7 @@ static void nfs4_clear_open_state(struct nfs4_state *state) spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { lock->ls_seqid.flags = 0; - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags); } spin_unlock(&state->state_lock); } @@ -1475,14 +1583,6 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } -static void nfs4_warn_keyexpired(const char *s) -{ - printk_ratelimited(KERN_WARNING "Error: state manager" - " encountered RPCSEC_GSS session" - " expired against NFSv4 server %s.\n", - s); -} - static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { @@ -1516,10 +1616,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); break; - case -EKEYEXPIRED: - /* Nothing we can do */ - nfs4_warn_keyexpired(clp->cl_hostname); - break; default: dprintk("%s: failed to handle error %d for server %s\n", __func__, error, clp->cl_hostname); @@ -1595,8 +1691,8 @@ out: return nfs4_recovery_handle_error(clp, status); } -/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors - * on EXCHANGE_ID for v4.1 +/* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors + * and for recoverable errors on EXCHANGE_ID for v4.1 */ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) { @@ -1606,10 +1702,19 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return -ESERVERFAULT; /* Lease confirmation error: retry after purging the lease */ ssleep(1); - case -NFS4ERR_CLID_INUSE: + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + break; case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + nfs4_state_clear_reclaim_reboot(clp); + nfs4_state_start_reclaim_reboot(clp); break; + case -NFS4ERR_CLID_INUSE: + pr_err("NFS: Server %s reports our clientid is in use\n", + clp->cl_hostname); + nfs_mark_client_ready(clp, -EPERM); + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + return -EPERM; case -EACCES: if (clp->cl_machine_cred == NULL) return -EACCES; @@ -1627,8 +1732,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) dprintk("%s: exit with error %d for server %s\n", __func__, -EPROTONOSUPPORT, clp->cl_hostname); return -EPROTONOSUPPORT; - case -EKEYEXPIRED: - nfs4_warn_keyexpired(clp->cl_hostname); case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ default: @@ -1642,7 +1745,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return 0; } -static int nfs4_reclaim_lease(struct nfs_client *clp) +static int nfs4_establish_lease(struct nfs_client *clp) { struct rpc_cred *cred; const struct nfs4_state_recovery_ops *ops = @@ -1655,10 +1758,147 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) status = ops->establish_clid(clp, cred); put_rpccred(cred); if (status != 0) + return status; + pnfs_destroy_all_layouts(clp); + return 0; +} + +/* + * Returns zero or a negative errno. NFS4ERR values are converted + * to local errno values. + */ +static int nfs4_reclaim_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) return nfs4_handle_reclaim_lease_error(clp, status); + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + return 0; +} + +static int nfs4_purge_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) + return nfs4_handle_reclaim_lease_error(clp, status); + clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); return 0; } +/** + * nfs4_discover_server_trunking - Detect server IP address trunking + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * + * Returns zero or a negative errno. If zero is returned, + * an nfs_client pointer is planted in "result". + * + * Note: since we are invoked in process context, and + * not from inside the state manager, we cannot use + * nfs4_handle_reclaim_lease_error(). + */ +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result) +{ + const struct nfs4_state_recovery_ops *ops = + clp->cl_mvops->reboot_recovery_ops; + rpc_authflavor_t *flavors, flav, save; + struct rpc_clnt *clnt; + struct rpc_cred *cred; + int i, len, status; + + dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); + + len = NFS_MAX_SECFLAVORS; + flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); + if (flavors == NULL) { + status = -ENOMEM; + goto out; + } + len = rpcauth_list_flavors(flavors, len); + if (len < 0) { + status = len; + goto out_free; + } + clnt = clp->cl_rpcclient; + save = clnt->cl_auth->au_flavor; + i = 0; + + mutex_lock(&nfs_clid_init_mutex); + status = -ENOENT; +again: + cred = ops->get_clid_cred(clp); + if (cred == NULL) + goto out_unlock; + + status = ops->detect_trunking(clp, result, cred); + put_rpccred(cred); + switch (status) { + case 0: + break; + + case -EACCES: + if (clp->cl_machine_cred == NULL) + break; + /* Handle case where the user hasn't set up machine creds */ + nfs4_clear_machine_cred(clp); + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EAGAIN: + ssleep(1); + case -NFS4ERR_STALE_CLIENTID: + dprintk("NFS: %s after status %d, retrying\n", + __func__, status); + goto again; + + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_WRONGSEC: + status = -EPERM; + if (i >= len) + break; + + flav = flavors[i++]; + if (flav == save) + flav = flavors[i++]; + clnt = rpc_clone_client_set_auth(clnt, flav); + if (IS_ERR(clnt)) { + status = PTR_ERR(clnt); + break; + } + clp->cl_rpcclient = clnt; + goto again; + + case -NFS4ERR_MINOR_VERS_MISMATCH: + status = -EPROTONOSUPPORT; + break; + + case -EKEYEXPIRED: + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + status = -EKEYEXPIRED; + } + +out_unlock: + mutex_unlock(&nfs_clid_init_mutex); +out_free: + kfree(flavors); +out: + dprintk("NFS: %s: status = %d\n", __func__, status); + return status; +} + #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { @@ -1675,14 +1915,23 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); -void nfs41_handle_recall_slot(struct nfs_client *clp) +static void nfs41_ping_server(struct nfs_client *clp) { - set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); - dprintk("%s: scheduling slot recall for server %s\n", __func__, - clp->cl_hostname); + /* Use CHECK_LEASE to ping the server with a SEQUENCE */ + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); nfs4_schedule_state_manager(clp); } +void nfs41_server_notify_target_slotid_update(struct nfs_client *clp) +{ + nfs41_ping_server(clp); +} + +void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp) +{ + nfs41_ping_server(clp); +} + static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { @@ -1764,11 +2013,23 @@ static int nfs4_reset_session(struct nfs_client *clp) struct rpc_cred *cred; int status; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); - if (status && status != -NFS4ERR_BADSESSION && - status != -NFS4ERR_DEADSESSION) { + switch (status) { + case 0: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + break; + case -NFS4ERR_BACK_CHAN_BUSY: + case -NFS4ERR_DELAY: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + status = 0; + ssleep(1); + goto out; + default: status = nfs4_recovery_handle_error(clp, status); goto out; } @@ -1790,39 +2051,13 @@ out: return status; } -static int nfs4_recall_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table; - struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs; - struct nfs4_slot *new, *old; - int i; - - nfs4_begin_drain_session(clp); - new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), - GFP_NOFS); - if (!new) - return -ENOMEM; - - spin_lock(&fc_tbl->slot_tbl_lock); - for (i = 0; i < fc_tbl->target_max_slots; i++) - new[i].seq_nr = fc_tbl->slots[i].seq_nr; - old = fc_tbl->slots; - fc_tbl->slots = new; - fc_tbl->max_slots = fc_tbl->target_max_slots; - fc_tbl->target_max_slots = 0; - fc_attrs->max_reqs = fc_tbl->max_slots; - spin_unlock(&fc_tbl->slot_tbl_lock); - - kfree(old); - nfs4_end_drain_session(clp); - return 0; -} - static int nfs4_bind_conn_to_session(struct nfs_client *clp) { struct rpc_cred *cred; int ret; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); @@ -1846,7 +2081,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } -static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { @@ -1857,47 +2091,30 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; + const char *section = "", *section_sep = ""; /* Ensure exclusive access to NFSv4 state */ do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { - status = nfs4_reclaim_lease(clp); + section = "purge state"; + status = nfs4_purge_lease(clp); if (status < 0) goto out_error; - clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + continue; } - if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + section = "lease expired"; /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status < 0) goto out_error; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - continue; - clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - - if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, - &clp->cl_state)) - nfs4_state_start_reclaim_nograce(clp); - else - set_bit(NFS4CLNT_RECLAIM_REBOOT, - &clp->cl_state); - - pnfs_destroy_all_layouts(clp); - } - - if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { - status = nfs4_check_lease(clp); - if (status < 0) - goto out_error; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - continue; + continue; } /* Initialize or reset the session */ - if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) - && nfs4_has_session(clp)) { + if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { + section = "reset session"; status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) continue; @@ -1907,15 +2124,25 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Send BIND_CONN_TO_SESSION */ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, - &clp->cl_state) && nfs4_has_session(clp)) { + &clp->cl_state)) { + section = "bind conn to session"; status = nfs4_bind_conn_to_session(clp); if (status < 0) goto out_error; continue; } + if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + section = "check lease"; + status = nfs4_check_lease(clp); + if (status < 0) + goto out_error; + continue; + } + /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1930,6 +2157,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1945,15 +2173,6 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs_client_return_marked_delegations(clp); continue; } - /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) - && nfs4_has_session(clp)) { - status = nfs4_recall_slot(clp); - if (status < 0) - goto out_error; - continue; - } - nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ @@ -1964,8 +2183,12 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" - " with error %d\n", clp->cl_hostname, -status); + if (strlen(section)) + section_sep = ": "; + pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" + " with error %d\n", section_sep, section, + clp->cl_hostname, -status); + ssleep(1); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c new file mode 100644 index 00000000000..84d2e9e2f31 --- /dev/null +++ b/fs/nfs/nfs4super.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2012 Bryan Schumaker <bjschuma@netapp.com> + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/nfs_idmap.h> +#include <linux/nfs4_mount.h> +#include <linux/nfs_fs.h> +#include "delegation.h" +#include "internal.h" +#include "nfs4_fs.h" +#include "pnfs.h" +#include "nfs.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); +static void nfs4_evict_inode(struct inode *inode); +static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); + +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static const struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs4_write_inode, + .drop_inode = nfs_drop_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .evict_inode = nfs4_evict_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_devname = nfs_show_devname, + .show_path = nfs_show_path, + .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, +}; + +struct nfs_subversion nfs_v4 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, +}; + +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); + + if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { + int status; + bool sync = true; + + if (wbc->sync_mode == WB_SYNC_NONE) + sync = false; + + status = pnfs_layoutcommit_inode(inode, sync); + if (status < 0) + return status; + } + return ret; +} + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation_noreclaim(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + +/* + * Get the superblock for the NFS4 root partition + */ +static struct dentry * +nfs4_remote_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *info) +{ + struct nfs_mount_info *mount_info = info; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + mount_info->set_security = nfs_set_sb_security; + + /* Get a volume representation */ + server = nfs4_create_server(mount_info, &nfs_v4); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); + +out: + return mntroot; +} + +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 5; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +struct nfs_referral_count { + struct list_head list; + const struct task_struct *task; + unsigned int referral_count; +}; + +static LIST_HEAD(nfs_referral_count_list); +static DEFINE_SPINLOCK(nfs_referral_count_list_lock); + +static struct nfs_referral_count *nfs_find_referral_count(void) +{ + struct nfs_referral_count *p; + + list_for_each_entry(p, &nfs_referral_count_list, list) { + if (p->task == current) + return p; + } + return NULL; +} + +#define NFS_MAX_NESTED_REFERRALS 2 + +static int nfs_referral_loop_protect(void) +{ + struct nfs_referral_count *p, *new; + int ret = -ENOMEM; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out; + new->task = current; + new->referral_count = 1; + + ret = 0; + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + if (p != NULL) { + if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) + ret = -ELOOP; + else + p->referral_count++; + } else { + list_add(&new->list, &nfs_referral_count_list); + new = NULL; + } + spin_unlock(&nfs_referral_count_list_lock); + kfree(new); +out: + return ret; +} + +static void nfs_referral_loop_unprotect(void) +{ + struct nfs_referral_count *p; + + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + p->referral_count--; + if (p->referral_count == 0) + list_del(&p->list); + else + p = NULL; + spin_unlock(&nfs_referral_count_list_lock); + kfree(p); +} + +static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path) +{ + struct dentry *dentry; + int err; + + if (IS_ERR(root_mnt)) + return ERR_CAST(root_mnt); + + err = nfs_referral_loop_protect(); + if (err) { + mntput(root_mnt); + return ERR_PTR(err); + } + + dentry = mount_subtree(root_mnt, export_path); + nfs_referral_loop_unprotect(); + + return dentry; +} + +struct dentry *nfs4_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + struct nfs_parsed_mount_data *data = mount_info->parsed; + + dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + + dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + +static struct dentry * +nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs_fill_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + dprintk("--> nfs4_referral_get_sb()\n"); + + mount_info.mntfh = nfs_alloc_fhandle(); + if (mount_info.cloned == NULL || mount_info.mntfh == NULL) + goto out; + + /* create a new volume representation */ + server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); +out: + nfs_free_fhandle(mount_info.mntfh); + return mntroot; +} + +/* + * Create an NFS4 server record on referral traversal + */ +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + + dprintk("--> nfs4_referral_mount()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + dprintk("<-- nfs4_referral_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + + +static int __init init_nfs_v4(void) +{ + int err; + + err = nfs_idmap_init(); + if (err) + goto out; + + err = nfs4_register_sysctl(); + if (err) + goto out1; + + register_nfs_version(&nfs_v4); + return 0; +out1: + nfs_idmap_quit(); +out: + return err; +} + +static void __exit exit_nfs_v4(void) +{ + unregister_nfs_version(&nfs_v4); + nfs4_unregister_sysctl(); + nfs_idmap_quit(); +} + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v4); +module_exit(exit_nfs_v4); diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c new file mode 100644 index 00000000000..2628d921b7e --- /dev/null +++ b/fs/nfs/nfs4sysctl.c @@ -0,0 +1,69 @@ +/* + * linux/fs/nfs/nfs4sysctl.c + * + * Sysctl interface to NFS v4 parameters + * + * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#include <linux/sysctl.h> +#include <linux/nfs_idmap.h> +#include <linux/nfs_fs.h> + +#include "nfs4_fs.h" +#include "callback.h" + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static struct ctl_table_header *nfs4_callback_sysctl_table; + +static ctl_table nfs4_cb_sysctls[] = { + { + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, + { + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_dir[] = { + { + .procname = "nfs", + .mode = 0555, + .child = nfs4_cb_sysctls, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = nfs4_cb_sysctl_dir, + }, + { } +}; + +int nfs4_register_sysctl(void) +{ + nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root); + if (nfs4_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs4_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs4_callback_sysctl_table); + nfs4_callback_sysctl_table = NULL; +} diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 18fae29b030..26b14392043 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -56,6 +56,7 @@ #include "nfs4_fs.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" @@ -270,6 +271,8 @@ static int nfs4_stat_to_errno(int); #if defined(CONFIG_NFS_V4_1) #define NFS4_MAX_MACHINE_NAME_LEN (64) +#define IMPL_NAME_LIMIT (sizeof(utsname()->sysname) + sizeof(utsname()->release) + \ + sizeof(utsname()->version) + sizeof(utsname()->machine) + 8) #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ @@ -282,7 +285,7 @@ static int nfs4_stat_to_errno(int); 1 /* nii_domain */ + \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* nii_name */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + XDR_QUADLEN(IMPL_NAME_LIMIT) + \ 3 /* nii_date */) #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 2 /* eir_clientid */ + \ @@ -447,12 +450,14 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ @@ -467,11 +472,13 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ @@ -852,12 +859,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ -static unsigned short send_implementation_id = 1; - -module_param(send_implementation_id, ushort, 0644); -MODULE_PARM_DESC(send_implementation_id, - "Send implementation ID with NFSv4.1 exchange_id"); - static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, @@ -938,7 +939,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr, * but this is not required as a MUST for the server to do so. */ hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; - BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); + WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN); encode_string(xdr, hdr->taglen, hdr->tag); p = reserve_space(xdr, 8); *p++ = cpu_to_be32(hdr->minorversion); @@ -957,7 +958,7 @@ static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, static void encode_nops(struct compound_hdr *hdr) { - BUG_ON(hdr->nops > NFS4_MAX_OPS); + WARN_ON_ONCE(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } @@ -1236,7 +1237,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct static inline int nfs4_lock_type(struct file_lock *fl, int block) { - if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + if (fl->fl_type == F_RDLCK) return block ? NFS4_READW_LT : NFS4_READ_LT; return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; } @@ -1405,7 +1406,6 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a *p = cpu_to_be32(NFS4_OPEN_NOCREATE); break; default: - BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); *p = cpu_to_be32(NFS4_OPEN_CREATE); encode_createmode(xdr, arg); } @@ -1515,8 +1515,12 @@ static void encode_open_stateid(struct xdr_stream *xdr, nfs4_stateid stateid; if (ctx->state != NULL) { + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, l_ctx->lockowner, l_ctx->pid); + fmode, lockowner); if (zero_seqid) stateid.seqid = 0; encode_nfs4_stateid(xdr, &stateid); @@ -1619,7 +1623,6 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); - BUG_ON(arg->acl_len % 4); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); @@ -1711,7 +1714,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - char impl_name[NFS4_OPAQUE_LIMIT]; + char impl_name[IMPL_NAME_LIMIT]; int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); @@ -1726,7 +1729,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, if (send_implementation_id && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - <= NFS4_OPAQUE_LIMIT + 1) + <= sizeof(impl_name) + 1) len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", utsname()->sysname, utsname()->release, utsname()->version, utsname()->machine); @@ -1833,18 +1836,16 @@ static void encode_sequence(struct xdr_stream *xdr, struct compound_hdr *hdr) { #if defined(CONFIG_NFS_V4_1) - struct nfs4_session *session = args->sa_session; + struct nfs4_session *session; struct nfs4_slot_table *tp; - struct nfs4_slot *slot; + struct nfs4_slot *slot = args->sa_slot; __be32 *p; - if (!session) + if (slot == NULL) return; - tp = &session->fc_slot_table; - - WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); - slot = tp->slots + args->sa_slotid; + tp = slot->table; + session = tp->session; encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); @@ -1858,12 +1859,12 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[1], ((u32 *)session->sess_id.data)[2], ((u32 *)session->sess_id.data)[3], - slot->seq_nr, args->sa_slotid, + slot->seq_nr, slot->slot_nr, tp->highest_used_slotid, args->sa_cache_this); p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); - *p++ = cpu_to_be32(args->sa_slotid); + *p++ = cpu_to_be32(slot->slot_nr); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); #endif /* CONFIG_NFS_V4_1 */ @@ -2025,8 +2026,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) { #if defined(CONFIG_NFS_V4_1) - if (args->sa_session) - return args->sa_session->clp->cl_mvops->minor_version; + + if (args->sa_slot) + return args->sa_slot->table->session->clp->cl_mvops->minor_version; #endif /* CONFIG_NFS_V4_1 */ return 0; } @@ -2222,6 +2224,8 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -2258,7 +2262,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); + encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -3078,7 +3084,7 @@ out_overflow: return -EIO; } -static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) +static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) { __be32 *p; @@ -3086,7 +3092,7 @@ static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, if (unlikely(!p)) goto out_overflow; *attrlen = be32_to_cpup(p); - *savep = xdr->p; + *savep = xdr_stream_pos(xdr); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4068,10 +4074,10 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } -static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen) +static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); - unsigned int nwords = xdr->p - savep; + unsigned int nwords = (xdr_stream_pos(xdr) - savep) >> 2; if (unlikely(attrwords != nwords)) { dprintk("%s: server returned incorrect attribute length: " @@ -4101,7 +4107,7 @@ out_overflow: return -EIO; } -static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) +static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) { __be32 *p; uint32_t supp, acc; @@ -4115,8 +4121,8 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) goto out_overflow; supp = be32_to_cpup(p++); acc = be32_to_cpup(p); - access->supported = supp; - access->access = acc; + *supported = supp; + *access = acc; return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4158,13 +4164,18 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier) return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } +static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) +{ + return decode_opaque_fixed(xdr, verifier->data, NFS4_VERIFIER_SIZE); +} + static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_verifier(xdr, res->verf->verifier); + status = decode_write_verifier(xdr, &res->verf->verifier); return status; } @@ -4193,7 +4204,7 @@ out_overflow: static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4222,7 +4233,7 @@ xdr_error: static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4254,7 +4265,7 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4299,7 +4310,8 @@ out_overflow: static int decode_first_threshold_item4(struct xdr_stream *xdr, struct nfs4_threshold *res) { - __be32 *p, *savep; + __be32 *p; + unsigned int savep; uint32_t bitmap[3] = {0,}, attrlen; int status; @@ -4503,7 +4515,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4615,7 +4627,7 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3]; int status; @@ -4920,9 +4932,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) { - struct kvec *iov = req->rq_rcv_buf.head; __be32 *p; - uint32_t count, eof, recvd, hdrlen; + uint32_t count, eof, recvd; int status; status = decode_op_hdr(xdr, OP_READ); @@ -4933,15 +4944,13 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ goto out_overflow; eof = be32_to_cpup(p++); count = be32_to_cpup(p); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (count > recvd) { dprintk("NFS: server cheating in read reply: " "count %u > recvd %u\n", count, recvd); count = recvd; eof = 0; } - xdr_read_pages(xdr, count); res->eof = eof; res->count = count; return 0; @@ -4952,10 +4961,6 @@ out_overflow: static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; - u32 recvd, pglen = rcvbuf->page_len; int status; __be32 verf[2]; @@ -4967,22 +4972,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", __func__, verf[0], verf[1]); - - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = rcvbuf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - - - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; u32 len, recvd; __be32 *p; int status; @@ -5000,14 +4995,12 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, len); if (recvd < len) { dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } - xdr_read_pages(xdr, len); /* * The XDR encode routine has set things up so that * the link text will be copied directly into the @@ -5063,23 +5056,20 @@ decode_restorefh(struct xdr_stream *xdr) static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { - __be32 *savep, *bm_p; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; - struct kvec *iov = req->rq_rcv_buf.head; int status; - size_t page_len = xdr->buf->page_len; + unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; - bm_p = xdr->p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; - /* Check if the acl data starts beyond the allocated buffer */ - if (res->acl_data_offset > page_len) - return -ERANGE; + xdr_enter_page(xdr, xdr->buf->page_len); + + /* Calculate the offset of the page data */ + pg_offset = xdr->buf->head[0].iov_len; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5089,29 +5079,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { - size_t hdrlen; /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - xdr->p = bm_p; - - /* We ignore &savep and don't do consistency checks on - * the attr length. Let userspace figure it out.... */ - hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; - attrlen += res->acl_data_offset; - if (attrlen > page_len) { - if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { - /* getxattr interface called with a NULL buf */ - res->acl_len = attrlen; - goto out; - } - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, page_len); - return -EINVAL; - } - xdr_read_pages(xdr, attrlen); + res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; res->acl_len = attrlen; + + /* Check for receive buffer overflow */ + if (res->acl_len > (xdr->nwords << 2) || + res->acl_len + res->acl_data_offset > xdr->buf->page_len) { + res->acl_flags |= NFS4_ACL_TRUNC; + dprintk("NFS: acl reply: attrlen %u > page_len %u\n", + attrlen, xdr->nwords << 2); + } } else status = -EOPNOTSUPP; @@ -5212,13 +5193,12 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) if (status) return status; - p = xdr_inline_decode(xdr, 16); + p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); - return 0; + return decode_write_verifier(xdr, &res->verf->verifier); out_overflow: print_overflow_msg(__func__, xdr); return -EIO; @@ -5529,12 +5509,13 @@ static int decode_sequence(struct xdr_stream *xdr, struct rpc_rqst *rqstp) { #if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session; struct nfs4_sessionid id; u32 dummy; int status; __be32 *p; - if (!res->sr_session) + if (res->sr_slot == NULL) return 0; status = decode_op_hdr(xdr, OP_SEQUENCE); @@ -5548,8 +5529,9 @@ static int decode_sequence(struct xdr_stream *xdr, * sequence number, the server is looney tunes. */ status = -EREMOTEIO; + session = res->sr_slot->table->session; - if (memcmp(id.data, res->sr_session->sess_id.data, + if (memcmp(id.data, session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out_err; @@ -5567,14 +5549,14 @@ static int decode_sequence(struct xdr_stream *xdr, } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { + if (dummy != res->sr_slot->slot_nr) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } - /* highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); - /* target highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); + /* highest slot id */ + res->sr_highest_slotid = be32_to_cpup(p++); + /* target highest slot id */ + res->sr_target_highest_slotid = be32_to_cpup(p++); /* result flags */ res->sr_status_flags = be32_to_cpup(p); status = 0; @@ -5599,7 +5581,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, { __be32 *p; int status, i; - struct nfs_writeverf verftemp; + nfs4_verifier verftemp; status = decode_op_hdr(xdr, OP_GETDEVICELIST); if (status) @@ -5613,7 +5595,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); + p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); res->num_devs = be32_to_cpup(p); @@ -5674,7 +5656,8 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, * and places the remaining xdr data in xdr_buf->tail */ pdev->mincount = be32_to_cpup(p); - xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ + if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount) + goto out_overflow; /* Parse notification bitmap, verifying that it is zero. */ p = xdr_inline_decode(xdr, 4); @@ -5707,9 +5690,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, __be32 *p; int status; u32 layout_count; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - u32 hdrlen, recvd; + u32 recvd; status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) @@ -5746,8 +5727,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, res->type, res->layoutp->len); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, res->layoutp->len); if (res->layoutp->len > recvd) { dprintk("NFS: server cheating in layoutget reply: " "layout len %u > recvd %u\n", @@ -5755,8 +5735,6 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, return -EINVAL; } - xdr_read_pages(xdr, res->layoutp->len); - if (layout_count > 1) { /* We only handle a length one array at the moment. Any * further entries are just ignored. Note that this means @@ -5924,7 +5902,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_access(xdr, res); + status = decode_access(xdr, &res->supported, &res->access); if (status != 0) goto out; decode_getfattr(xdr, res->fattr, res->server); @@ -6262,8 +6240,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(xdr, &res->fh) != 0) + status = decode_getfh(xdr, &res->fh); + if (status) goto out; + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -6312,6 +6293,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -7103,6 +7086,7 @@ out: int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { + unsigned int savep; uint32_t bitmap[3] = {0}; uint32_t len; __be32 *p = xdr_inline_decode(xdr, 4); @@ -7141,7 +7125,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (decode_attr_bitmap(xdr, bitmap) < 0) goto out_overflow; - if (decode_attr_length(xdr, &len, &p) < 0) + if (decode_attr_length(xdr, &len, &savep) < 0) goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f2..c6f990656f8 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -41,6 +41,7 @@ #include <scsi/osd_ore.h> #include "objlayout.h" +#include "../internal.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -368,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir) kfree(objios); } -enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) +static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) { switch (oep) { case OSD_ERR_PRI_NO_ERROR: @@ -570,17 +571,72 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + (unsigned long)pgio->pg_layout_private; +} + +static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; +} + +static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, + unsigned long *stripe_end) +{ + u32 stripe_off; + unsigned stripe_size; + + if (layout->raid_algorithm == PNFS_OSD_RAID_0) + return true; + + stripe_size = layout->stripe_unit * + (layout->group_width - layout->parity); + + div_u64_rem(offset, stripe_size, &stripe_off); + if (!stripe_off) + return true; + + *stripe_end = stripe_size - stripe_off; + return false; +} + +static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + unsigned long stripe_end = 0; + u64 wb_size; + + if (pgio->pg_dreq == NULL) + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + if (req->wb_offset || + !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, + &OBJIO_LSEG(pgio->pg_lseg)->layout, + &stripe_end)) { + pgio->pg_layout_private = (void *)stripe_end; + } else { + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + } } static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 87461354530..a9ebd817278 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -148,17 +148,6 @@ end_offset(u64 start, u64 len) return end >= start ? end : NFS4_MAX_UINT64; } -/* last octet in a range */ -static inline u64 -last_byte_offset(u64 start, u64 len) -{ - u64 end; - - BUG_ON(!len); - end = start + len; - return end > start ? end - 1 : NFS4_MAX_UINT64; -} - static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, struct page ***p_pages, unsigned *p_pgbase, u64 offset, unsigned long count) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f..e56e846e9d2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,11 +49,13 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->dreq = desc->pg_dreq; + hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); } +EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { @@ -70,7 +72,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -100,6 +102,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, unsigned int offset, unsigned int count) { struct nfs_page *req; + struct nfs_lock_context *l_ctx; /* try to allocate the request struct */ req = nfs_page_alloc(); @@ -107,17 +110,18 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_PTR(-ENOMEM); /* get lock context early so we can deal with alloc failures */ - req->wb_lock_context = nfs_get_lock_context(ctx); - if (req->wb_lock_context == NULL) { + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) { nfs_page_free(req); - return ERR_PTR(-ENOMEM); + return ERR_CAST(l_ctx); } + req->wb_lock_context = l_ctx; /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - req->wb_index = page->index; + req->wb_index = page_file_index(page); page_cache_get(page); req->wb_offset = offset; req->wb_pgbase = offset; @@ -267,7 +271,9 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_error = 0; desc->pg_lseg = NULL; desc->pg_dreq = NULL; + desc->pg_layout_private = NULL; } +EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_can_coalesce_requests - test two requests for compatibility @@ -286,7 +292,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, { if (req->wb_context->cred != prev->wb_context->cred) return false; - if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) + if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner) + return false; + if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid) return false; if (req->wb_context->state != prev->wb_context->state) return false; @@ -409,6 +417,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_pageio_add_request); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -424,6 +433,7 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } +EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bbc49caa7a8..d00260b0810 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PNFS +#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) /* Locking: * @@ -190,7 +191,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); /* Need to hold i_lock if caller does not already hold reference */ void -get_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { atomic_inc(&lo->plh_refcount); } @@ -199,43 +200,107 @@ static struct pnfs_layout_hdr * pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; - return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : - kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + return ld->alloc_layout_hdr(ino, gfp_flags); } static void pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(lo->plh_inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (!list_empty(&lo->plh_layouts)) { + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } put_rpccred(lo->plh_lc_cred); - return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); + return ld->free_layout_hdr(lo); } static void -destroy_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) { + struct nfs_inode *nfsi = NFS_I(lo->plh_inode); dprintk("%s: freeing layout cache %p\n", __func__, lo); - BUG_ON(!list_empty(&lo->plh_layouts)); - NFS_I(lo->plh_inode)->layout = NULL; - pnfs_free_layout_hdr(lo); + nfsi->layout = NULL; + /* Reset MDS Threshold I/O counters */ + nfsi->write_io = 0; + nfsi->read_io = 0; +} + +void +pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct inode *inode = lo->plh_inode; + + if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + pnfs_detach_layout_hdr(lo); + spin_unlock(&inode->i_lock); + pnfs_free_layout_hdr(lo); + } +} + +static int +pnfs_iomode_to_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; } static void -put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { - if (atomic_dec_and_test(&lo->plh_refcount)) - destroy_layout_hdr(lo); + lo->plh_retry_timestamp = jiffies; + if (!test_and_set_bit(fail_bit, &lo->plh_flags)) + atomic_inc(&lo->plh_refcount); } -void -put_layout_hdr(struct pnfs_layout_hdr *lo) +static void +pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) +{ + if (test_and_clear_bit(fail_bit, &lo->plh_flags)) + atomic_dec(&lo->plh_refcount); +} + +static void +pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) { struct inode *inode = lo->plh_inode; + struct pnfs_layout_range range = { + .iomode = iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(head); - if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { - destroy_layout_hdr(lo); - spin_unlock(&inode->i_lock); + spin_lock(&inode->i_lock); + pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + pnfs_mark_matching_lsegs_invalid(lo, &head, &range); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&head); + dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, + iomode == IOMODE_RW ? "RW" : "READ"); +} + +static bool +pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ + unsigned long start, end; + int fail_bit = pnfs_iomode_to_fail_bit(iomode); + + if (test_bit(fail_bit, &lo->plh_flags) == 0) + return false; + end = jiffies; + start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; + if (!time_in_range(lo->plh_retry_timestamp, start, end)) { + /* It is time to retry the failed layoutgets */ + pnfs_layout_clear_fail_bit(lo, fail_bit); + return false; } + return true; } static void @@ -249,33 +314,32 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) lseg->pls_layout = lo; } -static void free_lseg(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { struct inode *ino = lseg->pls_layout->plh_inode; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by get_layout_hdr in pnfs_insert_layout */ - put_layout_hdr(NFS_I(ino)->layout); } static void -put_lseg_common(struct pnfs_layout_segment *lseg) +pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct inode *inode = lo->plh_inode; WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); - if (list_empty(&lseg->pls_layout->plh_segs)) { - set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); - /* Matched by initial refcount set in alloc_init_layout_hdr */ - put_layout_hdr_locked(lseg->pls_layout); - } + /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ + atomic_dec(&lo->plh_refcount); + if (list_empty(&lo->plh_segs)) + clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } void -put_lseg(struct pnfs_layout_segment *lseg) +pnfs_put_lseg(struct pnfs_layout_segment *lseg) { + struct pnfs_layout_hdr *lo; struct inode *inode; if (!lseg) @@ -284,17 +348,17 @@ put_lseg(struct pnfs_layout_segment *lseg) dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - inode = lseg->pls_layout->plh_inode; + lo = lseg->pls_layout; + inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - LIST_HEAD(free_me); - - put_lseg_common(lseg); - list_add(&lseg->pls_list, &free_me); + pnfs_get_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); - pnfs_free_lseg_list(&free_me); + pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } } -EXPORT_SYMBOL_GPL(put_lseg); +EXPORT_SYMBOL_GPL(pnfs_put_lseg); static inline u64 end_offset(u64 start, u64 len) @@ -305,17 +369,6 @@ end_offset(u64 start, u64 len) return end >= start ? end : NFS4_MAX_UINT64; } -/* last octet in a range */ -static inline u64 -last_byte_offset(u64 start, u64 len) -{ - u64 end; - - BUG_ON(!len); - end = start + len; - return end > start ? end - 1 : NFS4_MAX_UINT64; -} - /* * is l2 fully contained in l1? * start1 end1 @@ -378,7 +431,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); if (atomic_dec_and_test(&lseg->pls_refcount)) { - put_lseg_common(lseg); + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); rv = 1; } @@ -390,7 +443,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * after call. */ int -mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range) { @@ -399,14 +452,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); - if (list_empty(&lo->plh_segs)) { - /* Reset MDS Threshold I/O counters */ - NFS_I(lo->plh_inode)->write_io = 0; - NFS_I(lo->plh_inode)->read_io = 0; - if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) - put_layout_hdr_locked(lo); + if (list_empty(&lo->plh_segs)) return 0; - } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (!recall_range || should_free_lseg(&lseg->pls_range, recall_range)) { @@ -426,25 +473,13 @@ void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; - struct pnfs_layout_hdr *lo; if (list_empty(free_me)) return; - lo = list_first_entry(free_me, struct pnfs_layout_segment, - pls_list)->pls_layout; - - if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { - struct nfs_client *clp; - - clp = NFS_SERVER(lo->plh_inode)->nfs_client; - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); - free_lseg(lseg); + pnfs_free_lseg(lseg); } } @@ -458,10 +493,15 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); - } - spin_unlock(&nfsi->vfs_inode.i_lock); - pnfs_free_lseg_list(&tmp_list); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_get_layout_hdr(lo); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); + spin_unlock(&nfsi->vfs_inode.i_lock); + pnfs_free_lseg_list(&tmp_list); + pnfs_put_layout_hdr(lo); + } else + spin_unlock(&nfsi->vfs_inode.i_lock); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); @@ -498,46 +538,54 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) } } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)s1 - (s32)s2 > 0; +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { - u32 oldseq, newseq; + u32 oldseq, newseq, new_barrier; + int empty = list_empty(&lo->plh_segs); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if ((int)(newseq - oldseq) > 0) { + if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->seqid); - - if ((int)(new_barrier - lo->plh_barrier)) - lo->plh_barrier = new_barrier; + new_barrier = be32_to_cpu(new->seqid); } else { /* Because of wraparound, we want to keep the barrier - * "close" to the current seqids. It needs to be - * within 2**31 to count as "behind", so if it - * gets too near that limit, give us a litle leeway - * and bring it to within 2**30. - * NOTE - and yes, this is all unsigned arithmetic. + * "close" to the current seqids. */ - if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) - lo->plh_barrier = newseq - (1 << 30); + new_barrier = newseq - atomic_read(&lo->plh_outstanding); } + if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + lo->plh_barrier = new_barrier; } } +static bool +pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid) +{ + u32 seqid = be32_to_cpu(stateid->seqid); + + return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); +} + /* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, - int lget) +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) { - if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) - return true; return lo->plh_block_lgets || - test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); @@ -551,7 +599,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, NULL, 1)) { + if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -582,32 +630,14 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; - struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; + struct pnfs_layout_segment *lseg; dprintk("--> %s\n", __func__); - BUG_ON(ctx == NULL); lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -616,42 +646,35 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; - lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); - if (!lseg) { - /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + if (IS_ERR(lseg)) { + switch (PTR_ERR(lseg)) { + case -ENOMEM: + case -ERESTARTSYS: + break; + default: + /* remember that LAYOUTGET failed and suspend trying */ + pnfs_layout_io_set_failed(lo, range->iomode); + } + return NULL; } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } -/* Initiates a LAYOUTRETURN(FILE) */ +/* + * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr + * when the layout segment list is empty. + * + * Note that a pnfs_layout_hdr can exist with an empty layout segment + * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the + * deviceid is marked invalid. + */ int _pnfs_return_layout(struct inode *ino) { @@ -660,21 +683,29 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); struct nfs4_layoutreturn *lrp; nfs4_stateid stateid; - int status = 0; + int status = 0, empty; - dprintk("--> %s\n", __func__); + dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo) { spin_unlock(&ino->i_lock); - dprintk("%s: no layout to return\n", __func__); - return status; + dprintk("NFS: %s no layout to return\n", __func__); + goto out; } stateid = nfsi->layout->plh_stateid; /* Reference matched in nfs4_layoutreturn_release */ - get_layout_hdr(lo); - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_get_layout_hdr(lo); + empty = list_empty(&lo->plh_segs); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + /* Don't send a LAYOUTRETURN if list was initially empty */ + if (empty) { + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); + dprintk("NFS: %s no layout segments to return\n", __func__); + goto out; + } lo->plh_block_lgets++; spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); @@ -684,9 +715,10 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; - set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); - set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); - put_layout_hdr(lo); + spin_lock(&ino->i_lock); + lo->plh_block_lgets--; + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); goto out; } @@ -723,7 +755,7 @@ bool pnfs_roc(struct inode *ino) if (!found) goto out_nolayout; lo->plh_block_lgets++; - get_layout_hdr(lo); /* matched in pnfs_roc_release */ + pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); return true; @@ -740,8 +772,12 @@ void pnfs_roc_release(struct inode *ino) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; lo->plh_block_lgets--; - put_layout_hdr_locked(lo); - spin_unlock(&ino->i_lock); + if (atomic_dec_and_test(&lo->plh_refcount)) { + pnfs_detach_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_layout_hdr(lo); + } else + spin_unlock(&ino->i_lock); } void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) @@ -750,32 +786,34 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; - if ((int)(barrier - lo->plh_barrier) > 0) + if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) lo->plh_barrier = barrier; spin_unlock(&ino->i_lock); } -bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) { struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + u32 current_seqid; bool found = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); found = true; - break; + goto out; } - if (!found) { - struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); + lo = nfsi->layout; + current_seqid = be32_to_cpu(lo->plh_stateid.seqid); - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - } + /* Since close does not return a layout stateid for use as + * a barrier, we choose the worst-case barrier. + */ + *barrier = current_seqid + atomic_read(&lo->plh_outstanding); +out: spin_unlock(&ino->i_lock); return found; } @@ -806,14 +844,13 @@ cmp_layout(struct pnfs_layout_range *l1, } static void -pnfs_insert_layout(struct pnfs_layout_hdr *lo, +pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; @@ -833,7 +870,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); out: - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); dprintk("%s:Return\n", __func__); } @@ -867,21 +904,19 @@ pnfs_find_alloc_layout(struct inode *ino, dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); - assert_spin_locked(&ino->i_lock); - if (nfsi->layout) { - if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) - return NULL; - else - return nfsi->layout; - } + if (nfsi->layout != NULL) + goto out_existing; spin_unlock(&ino->i_lock); new = alloc_init_layout_hdr(ino, ctx, gfp_flags); spin_lock(&ino->i_lock); - if (likely(nfsi->layout == NULL)) /* Won the race? */ + if (likely(nfsi->layout == NULL)) { /* Won the race? */ nfsi->layout = new; - else + return new; + } else if (new != NULL) pnfs_free_layout_hdr(new); +out_existing: + pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; } @@ -924,11 +959,10 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && is_matching_lseg(&lseg->pls_range, range)) { - ret = get_lseg(lseg); + ret = pnfs_get_lseg(lseg); break; } if (lseg->pls_range.offset > range->offset) @@ -1033,7 +1067,6 @@ pnfs_update_layout(struct inode *ino, .length = count, }; unsigned pg_offset; - struct nfs_inode *nfsi = NFS_I(ino); struct nfs_server *server = NFS_SERVER(ino); struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; @@ -1041,16 +1074,16 @@ pnfs_update_layout(struct inode *ino, bool first = false; if (!pnfs_enabled_sb(NFS_SERVER(ino))) - return NULL; + goto out; if (pnfs_within_mdsthreshold(ctx, ino, iomode)) - return NULL; + goto out; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { - dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); - goto out_unlock; + spin_unlock(&ino->i_lock); + goto out; } /* Do we even need to bother with this? */ @@ -1060,7 +1093,7 @@ pnfs_update_layout(struct inode *ino, } /* if LAYOUTGET already failed once we don't try again */ - if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) + if (pnfs_layout_io_test_failed(lo, iomode)) goto out_unlock; /* Check to see if the layout for the given range already exists */ @@ -1068,20 +1101,19 @@ pnfs_update_layout(struct inode *ino, if (lseg) goto out_unlock; - if (pnfs_layoutgets_blocked(lo, NULL, 0)) + if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); - get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; + spin_unlock(&ino->i_lock); if (first) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. */ spin_lock(&clp->cl_lock); - BUG_ON(!list_empty(&lo->plh_layouts)); list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } @@ -1095,24 +1127,26 @@ pnfs_update_layout(struct inode *ino, arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); - if (!lseg && first) { - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } atomic_dec(&lo->plh_outstanding); - put_layout_hdr(lo); +out_put_layout_hdr: + pnfs_put_layout_hdr(lo); out: - dprintk("%s end, state 0x%lx lseg %p\n", __func__, - nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); + dprintk("%s: inode %s/%llu pNFS layout segment %s for " + "(%s, offset: %llu, length: %llu)\n", + __func__, ino->i_sb->s_id, + (unsigned long long)NFS_FILEID(ino), + lseg == NULL ? "not found" : "found", + iomode==IOMODE_RW ? "read/write" : "read-only", + (unsigned long long)pos, + (unsigned long long)count); return lseg; out_unlock: spin_unlock(&ino->i_lock); - goto out; + goto out_put_layout_hdr; } EXPORT_SYMBOL_GPL(pnfs_update_layout); -int +struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; @@ -1139,25 +1173,29 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } - if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { + if (pnfs_layoutgets_blocked(lo, 1) || + pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid, false); + init_lseg(lo, lseg); lseg->pls_range = res->range; - *lgp->lsegpp = get_lseg(lseg); - pnfs_insert_layout(lo, lseg); + pnfs_get_lseg(lseg); + pnfs_layout_insert_lseg(lo, lseg); if (res->return_on_close) { set_bit(NFS_LSEG_ROC, &lseg->pls_flags); set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); } - /* Done processing layoutget. Set the layout stateid */ - pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); + return lseg; out: - return status; + return ERR_PTR(status); out_forget_reply: spin_unlock(&ino->i_lock); @@ -1169,16 +1207,24 @@ out_forget_reply: void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - BUG_ON(pgio->pg_lseg != NULL); + u64 rd_size = req->wb_bytes; + + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_read_mds(pgio); return; } + + if (pgio->pg_dreq == NULL) + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + rd_size, IOMODE_READ, GFP_KERNEL); /* If no lseg, fall back to read through mds */ @@ -1189,18 +1235,20 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); void -pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size) { - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_write_mds(pgio); return; } + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + wb_size, IOMODE_RW, GFP_NOFS); /* If no lseg, fall back to write through mds */ @@ -1209,7 +1257,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1217,13 +1265,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, - server->rsize, 0); - return true; + nfs_pageio_init_read(pgio, inode, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -bool +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) @@ -1232,10 +1279,9 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, - server->wsize, ioflags); - return true; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); } bool @@ -1272,7 +1318,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1380,14 +1426,15 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_write_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) @@ -1399,17 +1446,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); @@ -1427,7 +1474,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read_mds(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1534,14 +1581,15 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_read_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) @@ -1554,17 +1602,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_pagein(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); @@ -1590,13 +1638,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { - if (lseg->pls_range.iomode == IOMODE_RW) { - dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - } else { - dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); - } + pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); } EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); @@ -1617,7 +1659,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) } if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - get_lseg(hdr->lseg); + pnfs_get_lseg(hdr->lseg); } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 64f90d845f6..dbf7bba52da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -62,8 +62,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ - NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ - NFS_LAYOUT_INVALID, /* layout is being destroyed */ + NFS_LAYOUT_RETURN, /* Return this layout ASAP */ }; enum layoutdriver_policy_flags { @@ -139,6 +138,7 @@ struct pnfs_layout_hdr { atomic_t plh_outstanding; /* number of RPCs out */ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ u32 plh_barrier; /* ignore lower seqids */ + unsigned long plh_retry_timestamp; unsigned long plh_flags; loff_t plh_lwb; /* last write byte for layoutcommit */ struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ @@ -171,44 +171,45 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -void get_layout_hdr(struct pnfs_layout_hdr *lo); -void put_lseg(struct pnfs_layout_segment *lseg); +void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); +void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); -void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); -int pnfs_layout_process(struct nfs4_layoutget *lgp); +struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -void put_layout_hdr(struct pnfs_layout_hdr *lo); +void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier); int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, struct nfs4_state *open_state); -int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -bool pnfs_roc_drain(struct inode *ino, u32 *barrier); +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); @@ -232,6 +233,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ + NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ }; /* pnfs_dev.c */ @@ -241,6 +243,7 @@ struct nfs4_deviceid_node { const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; unsigned long flags; + unsigned long timestamp_unavailable; struct nfs4_deviceid deviceid; atomic_t ref; }; @@ -253,16 +256,12 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); +void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); +bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); -static inline int lo_fail_bit(u32 iomode) -{ - return iomode == IOMODE_RW ? - NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; -} - static inline struct pnfs_layout_segment * -get_lseg(struct pnfs_layout_segment *lseg) +pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { atomic_inc(&lseg->pls_refcount); @@ -387,12 +386,12 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) } static inline struct pnfs_layout_segment * -get_lseg(struct pnfs_layout_segment *lseg) +pnfs_get_lseg(struct pnfs_layout_segment *lseg) { return NULL; } -static inline void put_lseg(struct pnfs_layout_segment *lseg) +static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } @@ -424,7 +423,7 @@ pnfs_roc_set_barrier(struct inode *ino, u32 barrier) } static inline bool -pnfs_roc_drain(struct inode *ino, u32 *barrier) +pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) { return false; } @@ -438,16 +437,16 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, +static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_read(pgio, inode, compl_ops); } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, +static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); } static inline int diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 73f701f1f4d..d35b62e83ea 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -40,6 +40,8 @@ #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) +#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) + static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static DEFINE_SPINLOCK(nfs4_deviceid_lock); @@ -218,6 +220,30 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) } EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); +void +nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node) +{ + node->timestamp_unavailable = jiffies; + set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); +} +EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable); + +bool +nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node) +{ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long start, end; + + end = jiffies; + start = end - PNFS_DEVICE_RETRY_TIMEOUT; + if (time_in_range(node->timestamp_unavailable, start, end)) + return true; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } + return false; +} +EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable); + static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { @@ -276,3 +302,4 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) } rcu_read_unlock(); } + diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c7419a08..f084dac948e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -47,39 +47,6 @@ #define NFSDBG_FACILITY NFSDBG_PROC /* - * wrapper to handle the -EKEYEXPIRED error message. This should generally - * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't - * support the NFSERR_JUKEBOX error code, but we handle this situation in the - * same way that we handle that error with NFSv3. - */ -static int -nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) -{ - int res; - do { - res = rpc_call_sync(clnt, msg, flags); - if (res != -EKEYEXPIRED) - break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); - res = -ERESTARTSYS; - } while (!fatal_signal_pending(current)); - return res; -} - -#define rpc_call_sync(clnt, msg, flags) nfs_rpc_wrapper(clnt, msg, flags) - -static int -nfs_async_handle_expired_key(struct rpc_task *task) -{ - if (task->tk_status != -EKEYEXPIRED) - return 0; - task->tk_status = 0; - rpc_restart_call(task); - rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); - return 1; -} - -/* * Bare-bones access to getattr: this is for nfs_read_super. */ static int @@ -259,7 +226,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs_createdata *data; struct rpc_message msg = { @@ -364,8 +331,6 @@ static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlink static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { - if (nfs_async_handle_expired_key(task)) - return 0; nfs_mark_for_revalidate(dir); return 1; } @@ -385,8 +350,6 @@ static int nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { - if (nfs_async_handle_expired_key(task)) - return 0; nfs_mark_for_revalidate(old_dir); nfs_mark_for_revalidate(new_dir); return 1; @@ -642,9 +605,6 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { struct inode *inode = data->header->inode; - if (nfs_async_handle_expired_key(task)) - return -EAGAIN; - nfs_invalidate_atime(inode); if (task->tk_status >= 0) { nfs_refresh_inode(inode, data->res.fattr); @@ -671,9 +631,6 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->header->inode; - if (nfs_async_handle_expired_key(task)) - return -EAGAIN; - if (task->tk_status >= 0) nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; @@ -734,6 +691,38 @@ out_einval: return -EINVAL; } +static int nfs_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + +static int nfs_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + +static const struct inode_operations nfs_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +static const struct inode_operations nfs_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -742,6 +731,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, @@ -767,9 +757,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, @@ -777,5 +769,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, + .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, + .free_client = nfs_free_client, + .create_server = nfs_create_server, + .clone_server = nfs_clone_server, }; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 86ced783621..a5e5d9899d5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -20,8 +20,6 @@ #include <linux/nfs_page.h> #include <linux/module.h> -#include "pnfs.h" - #include "nfs4_fs.h" #include "internal.h" #include "iostat.h" @@ -50,6 +48,7 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } return rhdr; } +EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -82,6 +81,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) kmem_cache_free(nfs_rdata_cachep, rhdr); } +EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_read_data *rdata) { @@ -91,13 +91,18 @@ void nfs_readdata_release(struct nfs_read_data *rdata) put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) kfree(rdata->pages.pagevec); - if (rdata != &read_header->rpc_data) - kfree(rdata); - else + if (rdata == &read_header->rpc_data) { rdata->header = NULL; + rdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(rdata); } +EXPORT_SYMBOL_GPL(nfs_readdata_release); static int nfs_return_empty_page(struct page *page) @@ -108,13 +113,14 @@ int nfs_return_empty_page(struct page *page) return 0; } -void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { @@ -123,14 +129,6 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) - nfs_pageio_init_read_mds(pgio, inode, compl_ops); -} - int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -149,7 +147,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -407,6 +405,7 @@ int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, return nfs_pagein_multi(desc, hdr); return nfs_pagein_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { @@ -532,11 +531,11 @@ static const struct rpc_call_ops nfs_read_common_ops = { int nfs_readpage(struct file *file, struct page *page) { struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int error; dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_CACHE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); nfs_add_stats(inode, NFSIOS_READPAGES, 1); @@ -590,7 +589,7 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; @@ -652,7 +651,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 06228192f64..b056b162872 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -54,6 +54,7 @@ #include <linux/parser.h> #include <linux/nsproxy.h> #include <linux/rcupdate.h> +#include <linux/kthread.h> #include <asm/uaccess.h> @@ -63,12 +64,14 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "nfs4session.h" #include "pnfs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 @@ -87,6 +90,7 @@ enum { Opt_sharecache, Opt_nosharecache, Opt_resvport, Opt_noresvport, Opt_fscache, Opt_nofscache, + Opt_migration, Opt_nomigration, /* Mount options that take integer arguments */ Opt_port, @@ -146,6 +150,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noresvport, "noresvport" }, { Opt_fscache, "fsc" }, { Opt_nofscache, "nofsc" }, + { Opt_migration, "migration" }, + { Opt_nomigration, "nomigration" }, { Opt_port, "port=%s" }, { Opt_rsize, "rsize=%s" }, @@ -278,37 +284,17 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; -struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); - struct nfs_parsed_mount_data *parsed; - struct nfs_clone_mount *cloned; - struct nfs_fh *mntfh; -}; - -static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct dentry *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct dentry *); -static int nfs_show_devname(struct seq_file *, struct dentry *); -static int nfs_show_path(struct seq_file *, struct dentry *); -static int nfs_show_stats(struct seq_file *, struct dentry *); -static struct dentry *nfs_fs_mount_common(struct file_system_type *, - struct nfs_server *, int, const char *, struct nfs_mount_info *); -static struct dentry *nfs_fs_mount(struct file_system_type *, - int, const char *, void *); static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static void nfs_put_super(struct super_block *); -static void nfs_kill_super(struct super_block *); -static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); -static struct file_system_type nfs_fs_type = { +struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, @@ -318,10 +304,11 @@ struct file_system_type nfs_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -static const struct super_operations nfs_sops = { +const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .drop_inode = nfs_drop_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, .evict_inode = nfs_evict_inode, @@ -332,77 +319,40 @@ static const struct super_operations nfs_sops = { .show_stats = nfs_show_stats, .remount_fs = nfs_remount, }; +EXPORT_SYMBOL_GPL(nfs_sops); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static void nfs4_kill_super(struct super_block *sb); - -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; -struct file_system_type nfs4_xdev_fs_type = { +struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs4_kill_super, + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs4_fs_type); -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; +static int __init register_nfs4_fs(void) +{ + return register_filesystem(&nfs4_fs_type); +} -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; +static void unregister_nfs4_fs(void) +{ + unregister_filesystem(&nfs4_fs_type); +} +#else +static int __init register_nfs4_fs(void) +{ + return 0; +} -static const struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .put_super = nfs_put_super, - .statfs = nfs_statfs, - .evict_inode = nfs4_evict_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_devname = nfs_show_devname, - .show_path = nfs_show_path, - .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, -}; +static void unregister_nfs4_fs(void) +{ +} #endif static struct shrinker acl_shrinker = { @@ -421,21 +371,18 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; - ret = nfs_register_sysctl(); + ret = register_nfs4_fs(); if (ret < 0) goto error_1; -#ifdef CONFIG_NFS_V4 - ret = register_filesystem(&nfs4_fs_type); + + ret = nfs_register_sysctl(); if (ret < 0) goto error_2; -#endif register_shrinker(&acl_shrinker); return 0; -#ifdef CONFIG_NFS_V4 error_2: - nfs_unregister_sysctl(); -#endif + unregister_nfs4_fs(); error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -448,10 +395,8 @@ error_0: void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); -#ifdef CONFIG_NFS_V4 - unregister_filesystem(&nfs4_fs_type); -#endif nfs_unregister_sysctl(); + unregister_nfs4_fs(); unregister_filesystem(&nfs_fs_type); } @@ -462,6 +407,7 @@ void nfs_sb_active(struct super_block *sb) if (atomic_inc_return(&server->active) == 1) atomic_inc(&sb->s_active); } +EXPORT_SYMBOL_GPL(nfs_sb_active); void nfs_sb_deactive(struct super_block *sb) { @@ -470,11 +416,60 @@ void nfs_sb_deactive(struct super_block *sb) if (atomic_dec_and_test(&server->active)) deactivate_super(sb); } +EXPORT_SYMBOL_GPL(nfs_sb_deactive); + +static int nfs_deactivate_super_async_work(void *ptr) +{ + struct super_block *sb = ptr; + + deactivate_super(sb); + module_put_and_exit(0); + return 0; +} + +/* + * same effect as deactivate_super, but will do final unmount in kthread + * context + */ +static void nfs_deactivate_super_async(struct super_block *sb) +{ + struct task_struct *task; + char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_server *server = NFS_SB(sb); + struct nfs_client *clp = server->nfs_client; + + if (!atomic_add_unless(&sb->s_active, -1, 1)) { + rcu_read_lock(); + snprintf(buf, sizeof(buf), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + + __module_get(THIS_MODULE); + task = kthread_run(nfs_deactivate_super_async_work, sb, + "%s-deactivate-super", buf); + if (IS_ERR(task)) { + pr_err("%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); + /* make synchronous call and hope for the best */ + deactivate_super(sb); + module_put(THIS_MODULE); + } + } +} + +void nfs_sb_deactive_async(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + nfs_deactivate_super_async(sb); +} +EXPORT_SYMBOL_GPL(nfs_sb_deactive_async); /* * Deliver file system statistics to userspace */ -static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) +int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct nfs_server *server = NFS_SB(dentry->d_sb); unsigned char blockbits; @@ -535,6 +530,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) dprintk("%s: statfs error = %d\n", __func__, -error); return error; } +EXPORT_SYMBOL_GPL(nfs_statfs); /* * Map the security flavour number to a name @@ -640,7 +636,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { @@ -734,6 +730,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + if (nfss->options & NFS_OPTION_MIGRATION) + seq_printf(m, ",migration"); + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) seq_printf(m, ",lookupcache=none"); @@ -757,7 +756,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, /* * Describe the mount options on this VFS mountpoint */ -static int nfs_show_options(struct seq_file *m, struct dentry *root) +int nfs_show_options(struct seq_file *m, struct dentry *root) { struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -771,8 +770,9 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_options); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { @@ -805,7 +805,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } } #else -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void show_pnfs(struct seq_file *m, struct nfs_server *server) { } @@ -815,14 +815,14 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } #endif -static int nfs_show_devname(struct seq_file *m, struct dentry *root) +int nfs_show_devname(struct seq_file *m, struct dentry *root) { char *page = (char *) __get_free_page(GFP_KERNEL); char *devname, *dummy; int err = 0; if (!page) return -ENOMEM; - devname = nfs_path(&dummy, root, page, PAGE_SIZE); + devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0); if (IS_ERR(devname)) err = PTR_ERR(devname); else @@ -830,17 +830,19 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root) free_page((unsigned long)page); return err; } +EXPORT_SYMBOL_GPL(nfs_show_devname); -static int nfs_show_path(struct seq_file *m, struct dentry *dentry) +int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; } +EXPORT_SYMBOL_GPL(nfs_show_path); /* * Present statistical information for this VFS mountpoint */ -static int nfs_show_stats(struct seq_file *m, struct dentry *root) +int nfs_show_stats(struct seq_file *m, struct dentry *root) { int i, cpu; struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -870,7 +872,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); @@ -928,12 +930,13 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_stats); /* * Begin unmount by attempting to remove all automounted mountpoints we added * in response to xdev traversals and referrals */ -static void nfs_umount_begin(struct super_block *sb) +void nfs_umount_begin(struct super_block *sb) { struct nfs_server *server; struct rpc_clnt *rpc; @@ -947,6 +950,7 @@ static void nfs_umount_begin(struct super_block *sb) if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } +EXPORT_SYMBOL_GPL(nfs_umount_begin); static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) { @@ -1148,7 +1152,7 @@ static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); *option = match_strdup(args); - return !option; + return !*option; } static int nfs_get_option_ul(substring_t args[], unsigned long *option) @@ -1159,7 +1163,7 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) string = match_strdup(args); if (string == NULL) return -ENOMEM; - rc = strict_strtoul(string, 10, option); + rc = kstrtoul(string, 10, option); kfree(string); return rc; @@ -1296,6 +1300,12 @@ static int nfs_parse_mount_options(char *raw, kfree(mnt->fscache_uniq); mnt->fscache_uniq = NULL; break; + case Opt_migration: + mnt->options |= NFS_OPTION_MIGRATION; + break; + case Opt_nomigration: + mnt->options &= NFS_OPTION_MIGRATION; + break; /* * options that take numeric values @@ -1588,9 +1598,13 @@ static int nfs_parse_mount_options(char *raw, if (mnt->minorversion && mnt->version != 4) goto out_minorversion_mismatch; + if (mnt->options & NFS_OPTION_MIGRATION && + mnt->version != 4 && mnt->minorversion != 0) + goto out_migration_misuse; + /* * verify that any proto=/mountproto= options match the address - * familiies in the addr=/mountaddr= options. + * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && protofamily != mnt->nfs_server.address.ss_family) @@ -1625,6 +1639,10 @@ out_minorversion_mismatch: printk(KERN_INFO "NFS: mount option vers=%u does not support " "minorversion=%u\n", mnt->version, mnt->minorversion); return 0; +out_migration_misuse: + printk(KERN_INFO + "NFS: 'migration' not supported for this NFS version\n"); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; @@ -1748,8 +1766,9 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } -static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +struct dentry *nfs_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -1761,12 +1780,13 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); - return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); + return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } +EXPORT_SYMBOL_GPL(nfs_try_mount); /* * Split "dev_name" into "hostname:export_path". @@ -1918,6 +1938,7 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -1970,7 +1991,7 @@ static int nfs23_validate_mount_data(void *options, return NFS_TEXT_DATA; } -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ @@ -1990,7 +2011,7 @@ out_no_sec: dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); return -EINVAL; -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) out_v3_not_compiled: dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -2009,7 +2030,7 @@ out_invalid_fh: return -EINVAL; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, struct nfs_parsed_mount_data *args, @@ -2047,7 +2068,7 @@ static int nfs_validate_text_mount_data(void *options, goto out_no_address; if (args->version == 4) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; @@ -2070,7 +2091,7 @@ static int nfs_validate_text_mount_data(void *options, &args->nfs_server.export_path, max_pathlen); -#ifndef CONFIG_NFS_V4 +#if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -2108,7 +2129,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, return 0; } -static int +int nfs_remount(struct super_block *sb, int *flags, char *raw_data) { int error; @@ -2169,11 +2190,12 @@ out: kfree(data); return error; } +EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock */ -static inline void nfs_initialise_sb(struct super_block *sb) +inline void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2195,18 +2217,19 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -static void nfs_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; sb->s_blocksize = 0; - if (data->bsize) + sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; + sb->s_op = server->nfs_client->cl_nfs_mod->sops; + if (data && data->bsize) sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -2214,15 +2237,14 @@ static void nfs_fill_super(struct super_block *sb, sb->s_time_gran = 1; } - sb->s_op = &nfs_sops; nfs_initialise_sb(sb); } +EXPORT_SYMBOL_GPL(nfs_fill_super); /* - * Finish setting up a cloned NFS2/3 superblock + * Finish setting up a cloned NFS2/3/4 superblock */ -static void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2230,16 +2252,17 @@ static void nfs_clone_super(struct super_block *sb, sb->s_blocksize_bits = old_sb->s_blocksize_bits; sb->s_blocksize = old_sb->s_blocksize; sb->s_maxbytes = old_sb->s_maxbytes; + sb->s_xattr = old_sb->s_xattr; + sb->s_op = old_sb->s_op; + sb->s_time_gran = 1; - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ sb->s_flags |= MS_POSIXACL; - sb->s_time_gran = 1; } - sb->s_op = old_sb->s_op; nfs_initialise_sb(sb); } @@ -2352,19 +2375,30 @@ static void nfs_get_cache_cookie(struct super_block *sb, struct nfs_parsed_mount_data *parsed, struct nfs_clone_mount *cloned) { + struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; int ulen = 0; - if (parsed && parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + + if (parsed) { + if (!(parsed->options & NFS_OPTION_FSCACHE)) + return; + if (parsed->fscache_uniq) { + uniq = parsed->fscache_uniq; + ulen = strlen(parsed->fscache_uniq); + } } else if (cloned) { struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!(mnt_s->options & NFS_OPTION_FSCACHE)) + return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; }; - } + } else + return; nfs_fscache_get_super_cookie(sb, uniq, ulen); } @@ -2381,14 +2415,15 @@ static int nfs_bdi_register(struct nfs_server *server) return bdi_register_dev(&server->backing_dev_info, server->s_dev); } -static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } +EXPORT_SYMBOL_GPL(nfs_set_sb_security); -static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { /* clone any lsm security options from the parent to the new sb */ security_sb_clone_mnt_opts(mount_info->cloned->sb, s); @@ -2396,11 +2431,12 @@ static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return -ESTALE; return 0; } +EXPORT_SYMBOL_GPL(nfs_clone_sb_security); -static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, - int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +struct dentry *nfs_fs_mount_common(struct nfs_server *server, + int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2419,7 +2455,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; @@ -2469,8 +2505,9 @@ error_splat_bdi: deactivate_locked_super(s); goto out; } +EXPORT_SYMBOL_GPL(nfs_fs_mount_common); -static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, +struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { @@ -2478,6 +2515,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod; int error; mount_info.parsed = nfs_alloc_parsed_mount_data(); @@ -2494,34 +2532,38 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } -#ifdef CONFIG_NFS_V4 - if (mount_info.parsed->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, &mount_info); - else -#endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info); + nfs_mod = get_nfs_version(mount_info.parsed->version); + if (IS_ERR(nfs_mod)) { + mntroot = ERR_CAST(nfs_mod); + goto out; + } + mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); + + put_nfs_version(nfs_mod); out: nfs_free_parsed_mount_data(mount_info.parsed); nfs_free_fhandle(mount_info.mntfh); return mntroot; } +EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Ensure that we unregister the bdi before kill_anon_super * releases the device name */ -static void nfs_put_super(struct super_block *s) +void nfs_put_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); bdi_unregister(&server->backing_dev_info); } +EXPORT_SYMBOL_GPL(nfs_put_super); /* * Destroy an NFS2/3 superblock */ -static void nfs_kill_super(struct super_block *s) +void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); @@ -2529,95 +2571,45 @@ static void nfs_kill_super(struct super_block *s) nfs_fscache_release_super_cookie(s); nfs_free_server(server); } +EXPORT_SYMBOL_GPL(nfs_kill_super); /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ static struct dentry * -nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, - const char *dev_name, struct nfs_mount_info *mount_info) -{ - struct nfs_clone_mount *data = mount_info->cloned; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - int error; - - dprintk("--> nfs_xdev_mount_common()\n"); - - mount_info->mntfh = data->fh; - - /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err; - } - - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); - dprintk("<-- nfs_xdev_mount_common() = 0\n"); -out: - return mntroot; - -out_err: - dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); - goto out; -} - -/* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) - */ -static struct dentry * nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { + struct nfs_clone_mount *data = raw_data; struct nfs_mount_info mount_info = { .fill_super = nfs_clone_super, .set_security = nfs_clone_sb_security, - .cloned = raw_data, + .cloned = data, }; - return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); -} + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; -#ifdef CONFIG_NFS_V4 + dprintk("--> nfs_xdev_mount()\n"); -/* - * Finish setting up a cloned NFS4 superblock - */ -static void nfs4_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_time_gran = 1; - sb->s_op = old_sb->s_op; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = old_sb->s_xattr; - nfs_initialise_sb(sb); -} + mount_info.mntfh = mount_info.cloned->fh; -/* - * Set up an NFS4 superblock - */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); + /* create a new volume representation */ + server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); + + if (IS_ERR(server)) + mntroot = ERR_CAST(server); + else + mntroot = nfs_fs_mount_common(server, flags, + dev_name, &mount_info, nfs_mod); + + dprintk("<-- nfs_xdev_mount() = %ld\n", + IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); + return mntroot; } +#if IS_ENABLED(CONFIG_NFS_V4) + static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| @@ -2651,6 +2643,7 @@ static int nfs4_validate_mount_data(void *options, return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { if (data->auth_flavourlen > 1) @@ -2716,249 +2709,62 @@ out_no_address: } /* - * Get the superblock for the NFS4 root partition + * NFS v4 module parameters need to stay in the + * NFS client for backwards compatibility */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - struct nfs_mount_info *mount_info = info; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - mount_info->fill_super = nfs4_fill_super; - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); - -out: - return mntroot; -} - -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; +unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_tcpport; +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600; +/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ +bool nfs4_disable_idmapping = true; +unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short send_implementation_id = 1; +char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; + +EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); +EXPORT_SYMBOL_GPL(nfs_callback_tcpport); +EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); +EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); +EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(send_implementation_id); +EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); + +#define NFS_CALLBACK_MAXPORTNR (65535U) + +static int param_set_portnr(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + int ret; - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) - return ERR_PTR(-ENOMEM); - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); - kfree(root_devname); - return root_mnt; + if (!val) + return -EINVAL; + ret = kstrtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; } - -struct nfs_referral_count { - struct list_head list; - const struct task_struct *task; - unsigned int referral_count; +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, }; - -static LIST_HEAD(nfs_referral_count_list); -static DEFINE_SPINLOCK(nfs_referral_count_list_lock); - -static struct nfs_referral_count *nfs_find_referral_count(void) -{ - struct nfs_referral_count *p; - - list_for_each_entry(p, &nfs_referral_count_list, list) { - if (p->task == current) - return p; - } - return NULL; -} - -#define NFS_MAX_NESTED_REFERRALS 2 - -static int nfs_referral_loop_protect(void) -{ - struct nfs_referral_count *p, *new; - int ret = -ENOMEM; - - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (!new) - goto out; - new->task = current; - new->referral_count = 1; - - ret = 0; - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - if (p != NULL) { - if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) - ret = -ELOOP; - else - p->referral_count++; - } else { - list_add(&new->list, &nfs_referral_count_list); - new = NULL; - } - spin_unlock(&nfs_referral_count_list_lock); - kfree(new); -out: - return ret; -} - -static void nfs_referral_loop_unprotect(void) -{ - struct nfs_referral_count *p; - - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - p->referral_count--; - if (p->referral_count == 0) - list_del(&p->list); - else - p = NULL; - spin_unlock(&nfs_referral_count_list_lock); - kfree(p); -} - -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) -{ - struct dentry *dentry; - int err; - - if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); - - err = nfs_referral_loop_protect(); - if (err) { - mntput(root_mnt); - return ERR_PTR(err); - } - - dentry = mount_subtree(root_mnt, export_path); - nfs_referral_loop_unprotect(); - - return dentry; -} - -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) -{ - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - struct nfs_parsed_mount_data *data = mount_info->parsed; - - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - - mount_info->fill_super = nfs4_fill_super; - - export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); - data->nfs_server.export_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - - dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} - -static void nfs4_kill_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - dprintk("--> %s\n", __func__); - nfs_super_return_all_delegations(sb); - kill_anon_super(sb); - nfs_fscache_release_super_cookie(sb); - nfs_free_server(server); - dprintk("<-- %s\n", __func__); -} - -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs4_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); - - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; - - /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} - -/* - * Create an NFS4 server record on referral traversal - */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - - dprintk("--> nfs4_referral_mount()\n"); - - export_path = data->mnt_path; - data->mnt_path = "/"; - - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); - data->mnt_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param(nfs_idmap_cache_timeout, int, 0644); +module_param(nfs4_disable_idmapping, bool, 0644); +module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier, + NFS4_CLIENT_ID_UNIQ_LEN, 0600); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); +MODULE_ALIAS("nfs4"); #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b2..6b3f2535a3e 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -9,37 +9,11 @@ #include <linux/fs.h> #include <linux/sysctl.h> #include <linux/module.h> -#include <linux/nfs4.h> -#include <linux/nfs_idmap.h> #include <linux/nfs_fs.h> -#include "callback.h" - -#ifdef CONFIG_NFS_V4 -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; -#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { -#ifdef CONFIG_NFS_V4 - { - .procname = "nfs_callback_tcpport", - .data = &nfs_callback_set_tcpport, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = (int *)&nfs_set_port_min, - .extra2 = (int *)&nfs_set_port_max, - }, - { - .procname = "idmap_cache_timeout", - .data = &nfs_idmap_cache_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#endif { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3210a03342f..3f79c77153b 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata) nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) @@ -501,7 +501,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); + NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode); sdentry = NULL; do { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d6861c0dc1..c483cc50b82 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -52,7 +52,7 @@ static mempool_t *nfs_commit_mempool; struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); if (p) { memset(p, 0, sizeof(*p)); @@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); struct nfs_write_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; @@ -84,6 +84,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } return p; } +EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -115,6 +116,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); mempool_free(whdr, nfs_wdata_mempool); } +EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_write_data *wdata) { @@ -124,13 +126,18 @@ void nfs_writedata_release(struct nfs_write_data *wdata) put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) kfree(wdata->pages.pagevec); - if (wdata != &write_header->rpc_data) - kfree(wdata); - else + if (wdata == &write_header->rpc_data) { wdata->header = NULL; + wdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(wdata); } +EXPORT_SYMBOL_GPL(nfs_writedata_release); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -139,25 +146,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } -static struct nfs_page *nfs_page_find_request_locked(struct page *page) +static struct nfs_page * +nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; - if (PagePrivate(page)) { + if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - if (req != NULL) - kref_get(&req->wb_kref); + else if (unlikely(PageSwapCache(page))) { + struct nfs_page *freq, *t; + + /* Linearly search the commit list for the correct req */ + list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { + if (freq->wb_page == page) { + req = freq; + break; + } + } } + + if (req) + kref_get(&req->wb_kref); + return req; } static struct nfs_page *nfs_page_find_request(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -165,16 +185,16 @@ static struct nfs_page *nfs_page_find_request(struct page *page) /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (i_size > 0 && page->index < end_index) + if (i_size > 0 && page_file_index(page) < end_index) goto out; - end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; i_size_write(inode, end); @@ -186,8 +206,7 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct page *page) { - SetPageError(page); - nfs_zap_mapping(page->mapping->host, page->mapping); + nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } /* We can set the PG_uptodate flag if we see that a write request @@ -223,26 +242,23 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static int nfs_set_page_writeback(struct page *page) +static void nfs_set_page_writeback(struct page *page) { + struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host); int ret = test_set_page_writeback(page); - if (!ret) { - struct inode *inode = page->mapping->host; - struct nfs_server *nfss = NFS_SERVER(inode); + WARN_ON_ONCE(ret != 0); - if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) { - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } + if (atomic_long_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); } - return ret; } static void nfs_end_page_writeback(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); @@ -252,13 +268,13 @@ static void nfs_end_page_writeback(struct page *page) static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) break; if (nfs_lock_request(req)) @@ -299,10 +315,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (IS_ERR(req)) goto out; - ret = nfs_set_page_writeback(page); - BUG_ON(ret != 0); - BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + nfs_set_page_writeback(page); + WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); ret = pgio->pg_error; @@ -313,13 +329,13 @@ out: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - nfs_pageio_cond_complete(pgio, page->index); + nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); @@ -336,8 +352,10 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, + page->mapping->host, + wb_priority(wbc), + &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -380,8 +398,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -410,11 +427,17 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) + if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; - set_bit(PG_MAPPED, &req->wb_flags); - SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); + /* + * Swap-space should not get truncated. Hence no need to plug the race + * with invalidate/truncate. + */ + if (likely(!PageSwapCache(req->wb_page))) { + set_bit(PG_MAPPED, &req->wb_flags); + SetPagePrivate(req->wb_page); + set_page_private(req->wb_page, (unsigned long)req); + } nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -428,12 +451,12 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&inode->i_lock); - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (likely(!PageSwapCache(req->wb_page))) { + set_page_private(req->wb_page, 0); + ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); + } nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -445,7 +468,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -470,7 +493,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -537,7 +560,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } static void @@ -620,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -635,7 +658,7 @@ out: hdr->release(hdr); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -729,7 +752,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; @@ -788,7 +811,7 @@ out_err: static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; req = nfs_try_to_update_request(inode, page, offset, bytes); @@ -821,6 +844,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_lock_context *l_ctx; struct nfs_page *req; int do_flush, status; /* @@ -835,13 +859,16 @@ int nfs_flush_incompatible(struct file *file, struct page *page) req = nfs_page_find_request(page); if (req == NULL) return 0; - do_flush = req->wb_page != page || req->wb_context != ctx || - req->wb_lock_context->lockowner != current->files || - req->wb_lock_context->pid != current->tgid; + l_ctx = req->wb_lock_context; + do_flush = req->wb_page != page || req->wb_context != ctx; + if (l_ctx) { + do_flush |= l_ctx->lockowner.l_owner != current->files + || l_ctx->lockowner.l_pid != current->tgid; + } nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_page(page->mapping->host, page); + status = nfs_wb_page(page_file_mapping(page)->host, page); } while (status == 0); return status; } @@ -855,7 +882,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { if (nfs_have_delegated_attributes(inode)) goto out; - if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) return false; out: return PageUptodate(page) != 0; @@ -871,7 +898,7 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -879,7 +906,7 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) + offset)); + (long long)(page_file_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -1172,6 +1199,7 @@ int nfs_generic_flush(struct nfs_pageio_descriptor *desc, return nfs_flush_multi(desc, hdr); return nfs_flush_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { @@ -1202,13 +1230,14 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { @@ -1217,13 +1246,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) - nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); -} void nfs_write_prepare(struct rpc_task *task, void *calldata) { @@ -1303,7 +1325,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1363,7 +1385,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1475,7 +1497,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); @@ -1547,7 +1569,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) { /* We have a match */ nfs_inode_remove_request(req); dprintk(" OK\n"); @@ -1556,6 +1578,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); } @@ -1677,22 +1700,9 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - int ret; - - ret = nfs_commit_unstable_pages(inode, wbc); - if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status; - bool sync = true; - - if (wbc->sync_mode == WB_SYNC_NONE) - sync = false; - - status = pnfs_layoutcommit_inode(inode, sync); - if (status < 0) - return status; - } - return ret; + return nfs_commit_unstable_pages(inode, wbc); } +EXPORT_SYMBOL_GPL(nfs_write_inode); /* * flush the inode to disk. @@ -1708,13 +1718,13 @@ int nfs_wb_all(struct inode *inode) return sync_inode(inode, &wbc); } +EXPORT_SYMBOL_GPL(nfs_wb_all); int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; int ret = 0; - BUG_ON(!PageLocked(page)); for (;;) { wait_on_page_writeback(page); req = nfs_page_find_request(page); @@ -1744,7 +1754,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) */ int nfs_wb_page(struct inode *inode, struct page *page) { - loff_t range_start = page_offset(page); + loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -1788,7 +1798,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (PagePrivate(page)) return -EBUSY; - nfs_fscache_release_page(page, GFP_KERNEL); + if (!nfs_fscache_release_page(page, GFP_KERNEL)) + return -EBUSY; return migrate_page(mapping, newpage, page, mode); } @@ -1806,19 +1817,19 @@ int __init nfs_init_writepagecache(void) nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) - return -ENOMEM; + goto out_destroy_write_cache; nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", sizeof(struct nfs_commit_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_cdata_cachep == NULL) - return -ENOMEM; + goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, - nfs_wdata_cachep); + nfs_cdata_cachep); if (nfs_commit_mempool == NULL) - return -ENOMEM; + goto out_destroy_commit_cache; /* * NFS congestion size, scale with available memory. @@ -1841,11 +1852,20 @@ int __init nfs_init_writepagecache(void) nfs_congestion_kb = 256*1024; return 0; + +out_destroy_commit_cache: + kmem_cache_destroy(nfs_cdata_cachep); +out_destroy_write_mempool: + mempool_destroy(nfs_wdata_mempool); +out_destroy_write_cache: + kmem_cache_destroy(nfs_wdata_cachep); + return -ENOMEM; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); + kmem_cache_destroy(nfs_cdata_cachep); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); } |