aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
committerPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
commitd53b4ed072d9779cdf53582c46436dec06d0961f (patch)
treeac95ecab33e31cd79aae69c475e8348adac51230 /fs/nfs
parent5d4dff7f1011a81a693a9c7b1f6a0b9c842eb60c (diff)
parent28a33cbc24e4256c143dce96c7d93bf423229f92 (diff)
Merge tag 'v3.5' of 192.168.0.154:/repos/git/linux-2.6
Conflicts: drivers/Kconfig Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig40
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c435
-rw-r--r--fs/nfs/blocklayout/blocklayout.h23
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c46
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c33
-rw-r--r--fs/nfs/blocklayout/extents.c178
-rw-r--r--fs/nfs/cache_lib.c61
-rw-r--r--fs/nfs/cache_lib.h10
-rw-r--r--fs/nfs/callback.c25
-rw-r--r--fs/nfs/callback.h5
-rw-r--r--fs/nfs/callback_proc.c101
-rw-r--r--fs/nfs/callback_xdr.c33
-rw-r--r--fs/nfs/client.c500
-rw-r--r--fs/nfs/delegation.c84
-rw-r--r--fs/nfs/delegation.h5
-rw-r--r--fs/nfs/dir.c144
-rw-r--r--fs/nfs/direct.c763
-rw-r--r--fs/nfs/dns_resolve.c130
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c92
-rw-r--r--fs/nfs/fscache.c17
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c92
-rw-r--r--fs/nfs/idmap.c836
-rw-r--r--fs/nfs/inode.c291
-rw-r--r--fs/nfs/internal.h156
-rw-r--r--fs/nfs/mount_clnt.c16
-rw-r--r--fs/nfs/namespace.c145
-rw-r--r--fs/nfs/netns.h32
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c57
-rw-r--r--fs/nfs/nfs3xdr.c116
-rw-r--r--fs/nfs/nfs4_fs.h91
-rw-r--r--fs/nfs/nfs4filelayout.c778
-rw-r--r--fs/nfs/nfs4filelayout.h58
-rw-r--r--fs/nfs/nfs4filelayoutdev.c186
-rw-r--r--fs/nfs/nfs4namespace.c147
-rw-r--r--fs/nfs/nfs4proc.c1550
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c711
-rw-r--r--fs/nfs/nfs4xdr.c1256
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c98
-rw-r--r--fs/nfs/objlayout/objlayout.c163
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/pagelist.c143
-rw-r--r--fs/nfs/pnfs.c419
-rw-r--r--fs/nfs/pnfs.h180
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/proc.c53
-rw-r--r--fs/nfs/read.c450
-rw-r--r--fs/nfs/super.c1019
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c45
-rw-r--r--fs/nfs/write.c953
57 files changed, 7582 insertions, 5234 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index dbcd82126ae..f90f4f5cd42 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -29,9 +29,20 @@ config NFS_FS
If unsure, say N.
+config NFS_V2
+ bool "NFS client support for NFS version 2"
+ depends on NFS_FS
+ default y
+ help
+ This option enables support for version 2 of the NFS protocol
+ (RFC 1094) in the kernel's NFS client.
+
+ If unsure, say Y.
+
config NFS_V3
bool "NFS client support for NFS version 3"
depends on NFS_FS
+ default y
help
This option enables support for version 3 of the NFS protocol
(RFC 1813) in the kernel's NFS client.
@@ -64,6 +75,7 @@ config NFS_V4
bool "NFS client support for NFS version 4"
depends on NFS_FS
select SUNRPC_GSS
+ select KEYS
help
This option enables support for version 4 of the NFS protocol
(RFC 3530) in the kernel's NFS client.
@@ -98,6 +110,18 @@ config PNFS_OBJLAYOUT
depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
default m
+config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
+ string "NFSv4.1 Implementation ID Domain"
+ depends on NFS_V4_1
+ default "kernel.org"
+ help
+ This option defines the domain portion of the implementation ID that
+ may be sent in the NFS exchange_id operation. The value must be in
+ the format of a DNS domain name and should be set to the DNS domain
+ name of the distribution.
+ If the NFS client is unchanged from the upstream kernel, this
+ option should be set to the default "kernel.org".
+
config ROOT_NFS
bool "Root file system on NFS"
depends on NFS_FS=y && IP_PNP
@@ -130,16 +154,10 @@ config NFS_USE_KERNEL_DNS
bool
depends on NFS_V4 && !NFS_USE_LEGACY_DNS
select DNS_RESOLVER
- select KEYS
default y
-config NFS_USE_NEW_IDMAPPER
- bool "Use the new idmapper upcall routine"
- depends on NFS_V4 && KEYS
- help
- Say Y here if you want NFS to use the new idmapper upcall functions.
- You will need /sbin/request-key (usually provided by the keyutils
- package). For details, read
- <file:Documentation/filesystems/nfs/idmapper.txt>.
-
- If you are unsure, say N.
+config NFS_DEBUG
+ bool
+ depends on NFS_FS && SUNRPC_DEBUG
+ select CRC32
+ default y
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b58613d0abb..7ddd45d9f17 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,11 +4,12 @@
obj-$(CONFIG_NFS_FS) += nfs.o
-nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
- direct.o pagelist.o proc.o read.o symlink.o unlink.o \
+nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
+ direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o \
dns_resolve.o cache_lib.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
+nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 281ae95932c..7ae8a608956 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -38,6 +38,8 @@
#include <linux/buffer_head.h> /* various write calls */
#include <linux/prefetch.h>
+#include "../pnfs.h"
+#include "../internal.h"
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -46,9 +48,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
-struct dentry *bl_device_pipe;
-wait_queue_head_t bl_wq;
-
static void print_page(struct page *page)
{
dprintk("PRINTPAGE page %p\n", page);
@@ -90,9 +89,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- struct rpc_call_ops call_ops;
- void (*pnfs_callback) (void *data);
+ void (*pnfs_callback) (void *data, int num_se);
void *data;
+ int bse_count;
};
static inline struct parallel_io *alloc_parallel(void *data)
@@ -103,6 +102,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
+ rv->bse_count = 0;
}
return rv;
}
@@ -117,7 +117,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
dprintk("%s enter\n", __func__);
- p->pnfs_callback(p->data);
+ p->pnfs_callback(p->data, p->bse_count);
kfree(p);
}
@@ -146,14 +146,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
{
struct bio *bio;
+ npg = min(npg, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, npg);
- if (!bio)
- return NULL;
+ if (!bio && (current->flags & PF_MEMALLOC)) {
+ while (!bio && (npg /= 2))
+ bio = bio_alloc(GFP_NOIO, npg);
+ }
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
- bio->bi_bdev = be->be_mdev;
- bio->bi_end_io = end_io;
- bio->bi_private = par;
+ if (bio) {
+ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = end_io;
+ bio->bi_private = par;
+ }
return bio;
}
@@ -182,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
do {
struct page *page = bvec->bv_page;
@@ -193,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(page);
} while (bvec >= bio->bi_io_vec);
if (!uptodate) {
- if (!rdata->pnfs_error)
- rdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(rdata->lseg);
+ struct nfs_read_data *rdata = par->data;
+ struct nfs_pgio_header *header = rdata->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -212,59 +219,50 @@ static void bl_read_cleanup(struct work_struct *work)
}
static void
-bl_end_par_io_read(void *data)
+bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;
+ rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
-/* We don't want normal .rpc_call_done callback used, so we replace it
- * with this stub.
- */
-static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
-{
- return;
-}
-
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *header = rdata->header;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, extent_length = 0;
struct parallel_io *par;
loff_t f_offset = rdata->args.offset;
- size_t count = rdata->args.count;
struct page **pages = rdata->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
- dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
- rdata->npages, f_offset, count);
+ dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
+ rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
- par->call_ops = *rdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_read;
/* At this point, we can no longer jump to use_mds */
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->npages; i++) {
+ for (i = pg_index; i < rdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
bl_put_extent(cow_read);
bio = bl_submit_bio(READ, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, &cow_read);
if (!be) {
- rdata->pnfs_error = -EIO;
+ header->pnfs_error = -EIO;
goto out;
}
extent_length = be->be_length -
@@ -287,11 +285,12 @@ bl_read_pagelist(struct nfs_read_data *rdata)
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+ bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ READ,
isect, pages[i], be_read,
bl_end_io_read, par);
if (IS_ERR(bio)) {
- rdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -299,9 +298,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
}
- if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+ if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = rdata->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - f_offset;
} else {
rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
}
@@ -322,6 +321,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{
sector_t isect, end;
struct pnfs_block_extent *be;
+ struct pnfs_block_short_extent *se;
dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0)
@@ -334,8 +334,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect;
- if (be->be_state == PNFS_BLOCK_INVALID_DATA)
- bl_mark_for_commit(be, isect, len); /* What if fails? */
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ se = bl_pop_one_short_extent(be->be_inval);
+ BUG_ON(!se);
+ bl_mark_for_commit(be, isect, len, se);
+ }
isect += len;
bl_put_extent(be);
}
@@ -346,7 +349,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
do {
struct page *page = bvec->bv_page;
@@ -357,26 +359,30 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+
+ if (unlikely(!uptodate)) {
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
}
-/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;
if (!uptodate) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -392,20 +398,25 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (!wdata->pnfs_error) {
+ if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
wdata->args.offset, wdata->args.count);
}
pnfs_ld_write_done(wdata);
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
-static void bl_end_par_io_write(void *data)
+static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;
- wdata->task.tk_status = 0;
+ if (unlikely(wdata->header->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+ num_se);
+ }
+
+ wdata->task.tk_status = wdata->header->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
@@ -484,9 +495,59 @@ cleanup:
return ret;
}
+/* Find or create a zeroing page marked being writeback.
+ * Return ERR_PTR on error, NULL to indicate skip this page and page itself
+ * to indicate write out.
+ */
+static struct page *
+bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
+ struct pnfs_block_extent *cow_read)
+{
+ struct page *page;
+ int locked = 0;
+ page = find_get_page(inode->i_mapping, index);
+ if (page)
+ goto check_page;
+
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (unlikely(!page)) {
+ dprintk("%s oom\n", __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+ locked = 1;
+
+check_page:
+ /* PageDirty: Other will write this out
+ * PageWriteback: Other is writing this out
+ * PageUptodate: It was read before
+ */
+ if (PageDirty(page) || PageWriteback(page)) {
+ print_page(page);
+ if (locked)
+ unlock_page(page);
+ page_cache_release(page);
+ return NULL;
+ }
+
+ if (!locked) {
+ lock_page(page);
+ locked = 1;
+ goto check_page;
+ }
+ if (!PageUptodate(page)) {
+ /* New page, readin or zero it */
+ init_page_for_write(page, cow_read);
+ }
+ set_page_writeback(page);
+ unlock_page(page);
+
+ return page;
+}
+
static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
+ struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -499,7 +560,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
pgoff_t index;
u64 temp;
int npg_per_block =
- NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+ NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -508,22 +569,23 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/
par = alloc_parallel(wdata);
if (!par)
- return PNFS_NOT_ATTEMPTED;
- par->call_ops = *wdata->mds_ops;
- par->call_ops.rpc_call_done = bl_rpc_do_nothing;
+ goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */
isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
- wdata->pnfs_error = -EINVAL;
- goto out;
+ goto out_mds;
}
/* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else
+ goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -543,56 +605,45 @@ fill_invalid_ext:
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
- page =
- find_or_create_page(wdata->inode->i_mapping, index,
- GFP_NOFS);
- if (!page) {
- dprintk("%s oom\n", __func__);
- wdata->pnfs_error = -ENOMEM;
+ page = bl_find_get_zeroing_page(header->inode, index,
+ cow_read);
+ if (unlikely(IS_ERR(page))) {
+ header->pnfs_error = PTR_ERR(page);
goto out;
- }
-
- /* PageDirty: Other will write this out
- * PageWriteback: Other is writing this out
- * PageUptodate: It was read before
- * sector_initialized: already written out
- */
- if (PageDirty(page) || PageWriteback(page)) {
- print_page(page);
- unlock_page(page);
- page_cache_release(page);
+ } else if (page == NULL)
goto next_page;
- }
- if (!PageUptodate(page)) {
- /* New page, readin or zero it */
- init_page_for_write(page, cow_read);
- }
- set_page_writeback(page);
- unlock_page(page);
ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
end_page_writeback(page);
page_cache_release(page);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else {
+ end_page_writeback(page);
+ page_cache_release(page);
+ header->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ /* FIXME: This should be done in bi_end_io */
+ mark_extents_written(BLK_LSEG2EXT(header->lseg),
+ page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE);
+
bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be,
bl_end_io_write_zero, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
- /* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
- page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
next_page:
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
@@ -604,37 +655,45 @@ next_page:
/* Middle pages */
pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->npages; i++) {
+ for (i = pg_index; i < wdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, NULL);
if (!be || !is_writable(be, isect)) {
- wdata->pnfs_error = -EINVAL;
+ header->pnfs_error = -EINVAL;
goto out;
}
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(
+ be->be_inval)))
+ par->bse_count++;
+ else {
+ header->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ }
extent_length = be->be_length -
(isect - be->be_f_offset);
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
ret = bl_mark_sectors_init(be->be_inval, isect,
- PAGE_CACHE_SECTORS,
- NULL);
+ PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
}
- bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+ bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
bl_end_io_write, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -664,6 +723,10 @@ out:
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
+out_mds:
+ bl_put_extent(be);
+ kfree(par);
+ return PNFS_NOT_ATTEMPTED;
}
/* FIXME - range ignored */
@@ -690,11 +753,17 @@ static void
release_inval_marks(struct pnfs_inval_markings *marks)
{
struct pnfs_inval_tracking *pos, *temp;
+ struct pnfs_block_short_extent *se, *stemp;
list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
list_del(&pos->it_link);
kfree(pos);
}
+
+ list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ }
return;
}
@@ -779,16 +848,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
static void free_blk_mountid(struct block_mount_id *mid)
{
if (mid) {
- struct pnfs_block_dev *dev;
- spin_lock(&mid->bm_lock);
- while (!list_empty(&mid->bm_devlist)) {
- dev = list_first_entry(&mid->bm_devlist,
- struct pnfs_block_dev,
- bm_node);
+ struct pnfs_block_dev *dev, *tmp;
+
+ /* No need to take bm_lock as we are last user freeing bm_devlist */
+ list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
list_del(&dev->bm_node);
bl_free_block_dev(dev);
}
- spin_unlock(&mid->bm_lock);
kfree(mid);
}
}
@@ -812,7 +878,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s max_resp_sz %u max_pages %d\n",
__func__, max_resp_sz, max_pages);
@@ -965,10 +1031,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = {
.destroy_msg = bl_pipe_destroy_msg,
};
+static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
+ struct rpc_pipe *pipe)
+{
+ struct dentry *dir, *dentry;
+
+ dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
+ if (dir == NULL)
+ return ERR_PTR(-ENOENT);
+ dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
+ dput(dir);
+ return dentry;
+}
+
+static void nfs4blocklayout_unregister_sb(struct super_block *sb,
+ struct rpc_pipe *pipe)
+{
+ if (pipe->dentry)
+ rpc_unlink(pipe->dentry);
+}
+
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct super_block *sb = ptr;
+ struct net *net = sb->s_fs_info;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct dentry *dentry;
+ int ret = 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ if (nn->bl_device_pipe == NULL) {
+ module_put(THIS_MODULE);
+ return 0;
+ }
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ break;
+ }
+ nn->bl_device_pipe->dentry = dentry;
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (nn->bl_device_pipe->dentry)
+ nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static struct notifier_block nfs4blocklayout_block = {
+ .notifier_call = rpc_pipefs_event,
+};
+
+static struct dentry *nfs4blocklayout_register_net(struct net *net,
+ struct rpc_pipe *pipe)
+{
+ struct super_block *pipefs_sb;
+ struct dentry *dentry;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (!pipefs_sb)
+ return NULL;
+ dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
+ rpc_put_sb_net(net);
+ return dentry;
+}
+
+static void nfs4blocklayout_unregister_net(struct net *net,
+ struct rpc_pipe *pipe)
+{
+ struct super_block *pipefs_sb;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
+ rpc_put_sb_net(net);
+ }
+}
+
+static int nfs4blocklayout_net_init(struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct dentry *dentry;
+
+ init_waitqueue_head(&nn->bl_wq);
+ nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
+ if (IS_ERR(nn->bl_device_pipe))
+ return PTR_ERR(nn->bl_device_pipe);
+ dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
+ if (IS_ERR(dentry)) {
+ rpc_destroy_pipe_data(nn->bl_device_pipe);
+ return PTR_ERR(dentry);
+ }
+ nn->bl_device_pipe->dentry = dentry;
+ return 0;
+}
+
+static void nfs4blocklayout_net_exit(struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
+ rpc_destroy_pipe_data(nn->bl_device_pipe);
+ nn->bl_device_pipe = NULL;
+}
+
+static struct pernet_operations nfs4blocklayout_net_ops = {
+ .init = nfs4blocklayout_net_init,
+ .exit = nfs4blocklayout_net_exit,
+};
+
static int __init nfs4blocklayout_init(void)
{
- struct vfsmount *mnt;
- struct path path;
int ret;
dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
@@ -977,32 +1161,17 @@ static int __init nfs4blocklayout_init(void)
if (ret)
goto out;
- init_waitqueue_head(&bl_wq);
-
- mnt = rpc_get_mount();
- if (IS_ERR(mnt)) {
- ret = PTR_ERR(mnt);
+ ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
+ if (ret)
goto out_remove;
- }
-
- ret = vfs_path_lookup(mnt->mnt_root,
- mnt,
- NFS_PIPE_DIRNAME, 0, &path);
+ ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
if (ret)
- goto out_putrpc;
-
- bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL,
- &bl_upcall_ops, 0);
- path_put(&path);
- if (IS_ERR(bl_device_pipe)) {
- ret = PTR_ERR(bl_device_pipe);
- goto out_putrpc;
- }
+ goto out_notifier;
out:
return ret;
-out_putrpc:
- rpc_put_mount();
+out_notifier:
+ rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
out_remove:
pnfs_unregister_layoutdriver(&blocklayout_type);
return ret;
@@ -1013,9 +1182,9 @@ static void __exit nfs4blocklayout_exit(void)
dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
__func__);
+ rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
+ unregister_pernet_subsys(&nfs4blocklayout_net_ops);
pnfs_unregister_layoutdriver(&blocklayout_type);
- rpc_unlink(bl_device_pipe);
- rpc_put_mount();
}
MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 42acf7ef599..03350690118 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -37,6 +37,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include "../pnfs.h"
+#include "../netns.h"
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
@@ -50,6 +51,7 @@ struct pnfs_block_dev {
struct list_head bm_node;
struct nfs4_deviceid bm_mdevid; /* associated devid */
struct block_device *bm_mdev; /* meta device itself */
+ struct net *net;
};
enum exstate4 {
@@ -70,6 +72,7 @@ struct pnfs_inval_markings {
spinlock_t im_lock;
struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
sector_t im_block_size; /* Server blocksize in sectors */
+ struct list_head im_extents; /* Short extents for INVAL->RW conversion */
};
struct pnfs_inval_tracking {
@@ -105,6 +108,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
{
spin_lock_init(&marks->im_lock);
INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
+ INIT_LIST_HEAD(&marks->im_extents);
marks->im_block_size = blocksize;
marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
blocksize);
@@ -149,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
return BLK_LO2EXT(lseg->pls_layout);
}
-struct bl_dev_msg {
- int32_t status;
- uint32_t major, minor;
+struct bl_pipe_msg {
+ struct rpc_pipe_msg msg;
+ wait_queue_head_t *bl_wq;
};
struct bl_msg_hdr {
@@ -159,9 +163,6 @@ struct bl_msg_hdr {
u16 totallen; /* length of entire message, including hdr itself */
};
-extern struct dentry *bl_device_pipe;
-extern wait_queue_head_t bl_wq;
-
#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/
#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
@@ -186,8 +187,7 @@ struct pnfs_block_extent *
bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent **cow_read);
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages);
+ sector_t offset, sector_t length);
void bl_put_extent(struct pnfs_block_extent *be);
struct pnfs_block_extent *bl_alloc_extent(void);
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
@@ -200,6 +200,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
int bl_add_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length);
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new);
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index d08ba9107fd..c96554245cc 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
*rp = xdr_decode_hyper(*rp, &s);
if (s & 0x1ff) {
- printk(KERN_WARNING "%s: sector not aligned\n", __func__);
+ printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
return -1;
}
*sp = s >> SECTOR_SHIFT;
@@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev)
return blkdev_put(bdev, FMODE_READ);
}
-static struct bl_dev_msg bl_mount_reply;
-
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
size_t mlen)
{
+ struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+ nfs_net_id);
+
if (mlen != sizeof (struct bl_dev_msg))
return -EINVAL;
- if (copy_from_user(&bl_mount_reply, src, mlen) != 0)
+ if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
return -EFAULT;
- wake_up(&bl_wq);
+ wake_up(&nn->bl_wq);
return mlen;
}
void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
+ struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
+
if (msg->errno >= 0)
return;
- wake_up(&bl_wq);
+ wake_up(bl_pipe_msg->bl_wq);
}
/*
@@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server,
{
struct pnfs_block_dev *rv;
struct block_device *bd = NULL;
- struct rpc_pipe_msg msg;
+ struct bl_pipe_msg bl_pipe_msg;
+ struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
struct bl_msg_hdr bl_msg = {
.type = BL_DEVICE_MOUNT,
.totallen = dev->mincount,
};
uint8_t *dataptr;
DECLARE_WAITQUEUE(wq, current);
- struct bl_dev_msg *reply = &bl_mount_reply;
int offset, len, i, rc;
+ struct net *net = server->nfs_client->cl_net;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct bl_dev_msg *reply = &nn->bl_mount_reply;
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
dev->mincount);
- memset(&msg, 0, sizeof(msg));
- msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
- if (!msg.data) {
+ bl_pipe_msg.bl_wq = &nn->bl_wq;
+ memset(msg, 0, sizeof(*msg));
+ msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS);
+ if (!msg->data) {
rv = ERR_PTR(-ENOMEM);
goto out;
}
- memcpy(msg.data, &bl_msg, sizeof(bl_msg));
- dataptr = (uint8_t *) msg.data;
+ memcpy(msg->data, &bl_msg, sizeof(bl_msg));
+ dataptr = (uint8_t *) msg->data;
len = dev->mincount;
offset = sizeof(bl_msg);
for (i = 0; len > 0; i++) {
@@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server,
len -= PAGE_CACHE_SIZE;
offset += PAGE_CACHE_SIZE;
}
- msg.len = sizeof(bl_msg) + dev->mincount;
+ msg->len = sizeof(bl_msg) + dev->mincount;
dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
- add_wait_queue(&bl_wq, &wq);
- rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg);
+ add_wait_queue(&nn->bl_wq, &wq);
+ rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
if (rc < 0) {
- remove_wait_queue(&bl_wq, &wq);
+ remove_wait_queue(&nn->bl_wq, &wq);
rv = ERR_PTR(rc);
goto out;
}
@@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server,
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&bl_wq, &wq);
+ remove_wait_queue(&nn->bl_wq, &wq);
if (reply->status != BL_DEVICE_REQUEST_PROC) {
dprintk("%s failed to open device: %d\n",
@@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server,
rv->bm_mdev = bd;
memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid));
+ rv->net = net;
dprintk("%s Created device %s with bd_block_size %u\n",
__func__,
bd->bd_disk->disk_name,
bd->bd_block_size);
out:
- kfree(msg.data);
+ kfree(msg->data);
return rv;
}
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index d055c755807..737d839bc17 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -38,9 +38,10 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static void dev_remove(dev_t dev)
+static void dev_remove(struct net *net, dev_t dev)
{
- struct rpc_pipe_msg msg;
+ struct bl_pipe_msg bl_pipe_msg;
+ struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
struct bl_dev_msg bl_umount_request;
struct bl_msg_hdr bl_msg = {
.type = BL_DEVICE_UMOUNT,
@@ -48,36 +49,38 @@ static void dev_remove(dev_t dev)
};
uint8_t *dataptr;
DECLARE_WAITQUEUE(wq, current);
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
dprintk("Entering %s\n", __func__);
- memset(&msg, 0, sizeof(msg));
- msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
- if (!msg.data)
+ bl_pipe_msg.bl_wq = &nn->bl_wq;
+ memset(msg, 0, sizeof(*msg));
+ msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+ if (!msg->data)
goto out;
memset(&bl_umount_request, 0, sizeof(bl_umount_request));
bl_umount_request.major = MAJOR(dev);
bl_umount_request.minor = MINOR(dev);
- memcpy(msg.data, &bl_msg, sizeof(bl_msg));
- dataptr = (uint8_t *) msg.data;
+ memcpy(msg->data, &bl_msg, sizeof(bl_msg));
+ dataptr = (uint8_t *) msg->data;
memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
- msg.len = sizeof(bl_msg) + bl_msg.totallen;
+ msg->len = sizeof(bl_msg) + bl_msg.totallen;
- add_wait_queue(&bl_wq, &wq);
- if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) {
- remove_wait_queue(&bl_wq, &wq);
+ add_wait_queue(&nn->bl_wq, &wq);
+ if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
+ remove_wait_queue(&nn->bl_wq, &wq);
goto out;
}
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&bl_wq, &wq);
+ remove_wait_queue(&nn->bl_wq, &wq);
out:
- kfree(msg.data);
+ kfree(msg->data);
}
/*
@@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
dprintk("%s Releasing\n", __func__);
rv = nfs4_blkdev_put(bdev->bm_mdev);
if (rv)
- printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n",
+ printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
__func__, rv);
- dev_remove(bdev->bm_mdev->bd_dev);
+ dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
}
void bl_free_block_dev(struct pnfs_block_dev *bdev)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 19fa7b0b8c0..1f9a6032796 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
return 0;
} else {
struct pnfs_inval_tracking *new;
- if (storage)
- new = storage;
- else {
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
- }
+ new = storage;
new->it_sector = s;
new->it_tags = (1 << tag);
list_add(&new->it_link, &pos->it_link);
@@ -139,11 +133,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
}
/* Ensure that future operations on given range of tree will not malloc */
-static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
+static int _preload_range(struct pnfs_inval_markings *marks,
+ u64 offset, u64 length)
{
u64 start, end, s;
int count, i, used = 0, status = -ENOMEM;
struct pnfs_inval_tracking **storage;
+ struct my_tree *tree = &marks->im_tree;
dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
start = normalize(offset, tree->mtt_step_size);
@@ -151,7 +147,7 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
count = (int)(end - start) / (int)tree->mtt_step_size;
/* Pre-malloc what memory we might need */
- storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
+ storage = kcalloc(count, sizeof(*storage), GFP_NOFS);
if (!storage)
return -ENOMEM;
for (i = 0; i < count; i++) {
@@ -161,12 +157,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
goto out_cleanup;
}
- /* Now need lock - HOW??? */
-
+ spin_lock_bh(&marks->im_lock);
for (s = start; s < end; s += tree->mtt_step_size)
used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
+ spin_unlock_bh(&marks->im_lock);
- /* Unlock - HOW??? */
status = 0;
out_cleanup:
@@ -179,41 +174,14 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
return status;
}
-static void set_needs_init(sector_t *array, sector_t offset)
-{
- sector_t *p = array;
-
- dprintk("%s enter\n", __func__);
- if (!p)
- return;
- while (*p < offset)
- p++;
- if (*p == offset)
- return;
- else if (*p == ~0) {
- *p++ = offset;
- *p = ~0;
- return;
- } else {
- sector_t *save = p;
- dprintk("%s Adding %llu\n", __func__, (u64)offset);
- while (*p != ~0)
- p++;
- p++;
- memmove(save + 1, save, (char *)p - (char *)save);
- *save = offset;
- return;
- }
-}
-
/* We are relying on page lock to serialize this */
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
int rv;
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}
@@ -253,78 +221,39 @@ static int is_range_written(struct pnfs_inval_markings *marks,
{
int rv;
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return rv;
}
/* Marks sectors in [offest, offset_length) as having been initialized.
* All lengths are step-aligned, where step is min(pagesize, blocksize).
- * Notes where partial block is initialized, and helps prepare it for
- * complete initialization later.
+ * Currently assumes offset is page-aligned
*/
-/* Currently assumes offset is page-aligned */
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
- sector_t offset, sector_t length,
- sector_t **pages)
+ sector_t offset, sector_t length)
{
- sector_t s, start, end;
- sector_t *array = NULL; /* Pages to mark */
+ sector_t start, end;
dprintk("%s(offset=%llu,len=%llu) enter\n",
__func__, (u64)offset, (u64)length);
- s = max((sector_t) 3,
- 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
- dprintk("%s set max=%llu\n", __func__, (u64)s);
- if (pages) {
- array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
- if (!array)
- goto outerr;
- array[0] = ~0;
- }
start = normalize(offset, marks->im_block_size);
end = normalize_up(offset + length, marks->im_block_size);
- if (_preload_range(&marks->im_tree, start, end - start))
+ if (_preload_range(marks, start, end - start))
goto outerr;
- spin_lock(&marks->im_lock);
-
- for (s = normalize_up(start, PAGE_CACHE_SECTORS);
- s < offset; s += PAGE_CACHE_SECTORS) {
- dprintk("%s pre-area pages\n", __func__);
- /* Portion of used block is not initialized */
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
+ spin_lock_bh(&marks->im_lock);
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock;
- for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
- s < end; s += PAGE_CACHE_SECTORS) {
- dprintk("%s post-area pages\n", __func__);
- if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
- set_needs_init(array, s);
- }
-
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
- if (pages) {
- if (array[0] == ~0) {
- kfree(array);
- *pages = NULL;
- } else
- *pages = array;
- }
return 0;
- out_unlock:
- spin_unlock(&marks->im_lock);
- outerr:
- if (pages) {
- kfree(array);
- *pages = NULL;
- }
+out_unlock:
+ spin_unlock_bh(&marks->im_lock);
+outerr:
return -ENOMEM;
}
@@ -338,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,
dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
(u64)offset, (u64)length);
- spin_lock(&marks->im_lock);
+ spin_lock_bh(&marks->im_lock);
status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
- spin_unlock(&marks->im_lock);
+ spin_unlock_bh(&marks->im_lock);
return status;
}
@@ -440,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
/* Note the range described by offset, length is guaranteed to be contained
* within be.
+ * new will be freed, either by this function or add_to_commitlist if they
+ * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length)
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new)
{
sector_t new_end, end = offset + length;
- struct pnfs_block_short_extent *new;
struct pnfs_block_layout *bl = container_of(be->be_inval,
struct pnfs_block_layout,
bl_inval);
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
-
mark_written_sectors(be->be_inval, offset, length);
/* We want to add the range to commit list, but it must be
* block-normalized, and verified that the normalized range has
@@ -483,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
new->bse_mdev = be->be_mdev;
spin_lock(&bl->bl_ext_lock);
- /* new will be freed, either by add_to_commitlist if it decides not
- * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
- */
add_to_commitlist(bl, new);
spin_unlock(&bl->bl_ext_lock);
return 0;
@@ -933,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
}
}
}
+
+int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *new;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (unlikely(!new))
+ return -ENOMEM;
+
+ spin_lock_bh(&marks->im_lock);
+ list_add(&new->bse_node, &marks->im_extents);
+ spin_unlock_bh(&marks->im_lock);
+
+ return 0;
+}
+
+struct pnfs_block_short_extent *
+bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
+{
+ struct pnfs_block_short_extent *rv = NULL;
+
+ spin_lock_bh(&marks->im_lock);
+ if (!list_empty(&marks->im_extents)) {
+ rv = list_entry((&marks->im_extents)->next,
+ struct pnfs_block_short_extent, bse_node);
+ list_del_init(&rv->bse_node);
+ }
+ spin_unlock_bh(&marks->im_lock);
+
+ return rv;
+}
+
+void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
+{
+ struct pnfs_block_short_extent *se = NULL, *tmp;
+
+ if (num_to_free <= 0)
+ return;
+
+ spin_lock(&marks->im_lock);
+ list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
+ list_del(&se->bse_node);
+ kfree(se);
+ if (--num_to_free == 0)
+ break;
+ }
+ spin_unlock(&marks->im_lock);
+
+ BUG_ON(num_to_free > 0);
+}
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index c98b439332f..dded2636811 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <net/net_namespace.h>
#include "cache_lib.h"
@@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
return 0;
}
-int nfs_cache_register(struct cache_detail *cd)
+int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd)
{
- struct vfsmount *mnt;
- struct path path;
int ret;
+ struct dentry *dir;
- mnt = rpc_get_mount();
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
- ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path);
- if (ret)
- goto err;
- ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
- path_put(&path);
- if (!ret)
- return ret;
-err:
- rpc_put_mount();
+ dir = rpc_d_lookup_sb(sb, "cache");
+ BUG_ON(dir == NULL);
+ ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd);
+ dput(dir);
return ret;
}
-void nfs_cache_unregister(struct cache_detail *cd)
+int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
{
- sunrpc_cache_unregister_pipefs(cd);
- rpc_put_mount();
+ struct super_block *pipefs_sb;
+ int ret = 0;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ ret = nfs_cache_register_sb(pipefs_sb, cd);
+ rpc_put_sb_net(net);
+ }
+ return ret;
+}
+
+void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
+{
+ if (cd->u.pipefs.dir)
+ sunrpc_cache_unregister_pipefs(cd);
+}
+
+void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
+{
+ struct super_block *pipefs_sb;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ nfs_cache_unregister_sb(pipefs_sb, cd);
+ rpc_put_sb_net(net);
+ }
+}
+
+void nfs_cache_init(struct cache_detail *cd)
+{
+ sunrpc_init_cache_detail(cd);
}
+void nfs_cache_destroy(struct cache_detail *cd)
+{
+ sunrpc_destroy_cache_detail(cd);
+}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 7cf6cafcc00..317db95e37f 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
-extern int nfs_cache_register(struct cache_detail *cd);
-extern void nfs_cache_unregister(struct cache_detail *cd);
+extern void nfs_cache_init(struct cache_detail *cd);
+extern void nfs_cache_destroy(struct cache_detail *cd);
+extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
+extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
+extern int nfs_cache_register_sb(struct super_block *sb,
+ struct cache_detail *cd);
+extern void nfs_cache_unregister_sb(struct super_block *sb,
+ struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 516f3375e06..23ff18fe080 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp)
}
if (err < 0) {
if (err != preverr) {
- printk(KERN_WARNING "%s: unexpected error "
+ printk(KERN_WARNING "NFS: %s: unexpected error "
"from svc_recv (%d)\n", __func__, err);
preverr = err;
}
@@ -101,8 +101,8 @@ nfs4_callback_svc(void *vrqstp)
/*
* Prepare to bring up the NFSv4 callback service
*/
-struct svc_rqst *
-nfs4_callback_up(struct svc_serv *serv)
+static struct svc_rqst *
+nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
{
int ret;
@@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp)
/*
* Bring up the NFSv4.1 callback service
*/
-struct svc_rqst *
+static struct svc_rqst *
nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
{
struct svc_rqst *rqstp;
@@ -253,6 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
char svc_name[12];
int ret = 0;
int minorversion_setup;
+ struct net *net = &init_net;
mutex_lock(&nfs_callback_mutex);
if (cb_info->users++ || cb_info->task != NULL) {
@@ -265,11 +266,17 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
goto out_err;
}
+ ret = svc_bind(serv, net);
+ if (ret < 0) {
+ printk(KERN_WARNING "NFS: bind callback service failed\n");
+ goto out_err;
+ }
+
minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion,
serv, xprt, &rqstp, &callback_svc);
if (!minorversion_setup) {
/* v4.0 callback setup */
- rqstp = nfs4_callback_up(serv);
+ rqstp = nfs4_callback_up(serv, xprt);
callback_svc = nfs4_callback_svc;
}
@@ -306,6 +313,8 @@ out_err:
dprintk("NFS: Couldn't create callback socket or server thread; "
"err = %d\n", ret);
cb_info->users--;
+ if (serv)
+ svc_shutdown_net(serv, net);
goto out;
}
@@ -320,6 +329,7 @@ void nfs_callback_down(int minorversion)
cb_info->users--;
if (cb_info->users == 0 && cb_info->task != NULL) {
kthread_stop(cb_info->task);
+ svc_shutdown_net(cb_info->serv, &init_net);
svc_exit_thread(cb_info->rqst);
cb_info->serv = NULL;
cb_info->rqst = NULL;
@@ -332,8 +342,7 @@ void nfs_callback_down(int minorversion)
int
check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
{
- struct rpc_clnt *r = clp->cl_rpcclient;
- char *p = svc_gss_principal(rqstp);
+ char *p = rqstp->rq_cred.cr_principal;
if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
return 1;
@@ -353,7 +362,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
if (memcmp(p, "nfs@", 4) != 0)
return 0;
p += 4;
- if (strcmp(p, r->cl_server) != 0)
+ if (strcmp(p, clp->cl_hostname) != 0)
return 0;
return 1;
}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 07df5f1d85e..a5527c90a5a 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,7 +38,8 @@ enum nfs4_callback_opnum {
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
- int slotid;
+ u32 slotid;
+ struct net *net;
};
struct cb_compound_hdr_arg {
@@ -162,7 +163,7 @@ struct cb_layoutrecallargs {
};
};
-extern unsigned nfs4_callback_layoutrecall(
+extern __be32 nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 43926add945..1b5d809a105 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -8,6 +8,7 @@
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
@@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- dprintk("NFS: GETATTR callback request from %s\n",
+ dprintk_rcu("NFS: GETATTR callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
inode = nfs_delegation_find_inode(cps->clp, &args->fh);
@@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */
goto out;
- dprintk("NFS: RECALL callback request from %s\n",
+ dprintk_rcu("NFS: RECALL callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
res = htonl(NFS4ERR_BADHANDLE);
@@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
res = 0;
break;
case -ENOENT:
- if (res != 0)
- res = htonl(NFS4ERR_BAD_STATEID);
+ res = htonl(NFS4ERR_BAD_STATEID);
break;
default:
res = htonl(NFS4ERR_RESOURCE);
@@ -98,52 +98,64 @@ out:
return res;
}
-int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
-{
- if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
- return 0;
- return 1;
-}
-
#if defined(CONFIG_NFS_V4_1)
-static u32 initiate_file_draining(struct nfs_client *clp,
- struct cb_layoutrecallargs *args)
+/*
+ * Lookup a layout by filehandle.
+ *
+ * Note: gets a refcount on the layout hdr and on its respective inode.
+ * Caller must put the layout hdr and the inode.
+ *
+ * TODO: keep track of all layouts (and delegations) in a hash table
+ * hashed by filehandle.
+ */
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
{
struct nfs_server *server;
- struct pnfs_layout_hdr *lo;
struct inode *ino;
- bool found = false;
- u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
- LIST_HEAD(free_me_list);
+ struct pnfs_layout_hdr *lo;
- spin_lock(&clp->cl_lock);
- rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
- if (nfs_compare_fh(&args->cbl_fh,
- &NFS_I(lo->plh_inode)->fh))
+ if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
continue;
- found = true;
- /* Without this, layout can be freed as soon
- * as we release cl_lock.
- */
get_layout_hdr(lo);
- break;
+ return lo;
}
- if (found)
- break;
}
+
+ return NULL;
+}
+
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+{
+ struct pnfs_layout_hdr *lo;
+
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+ lo = get_layout_by_fh_locked(clp, fh);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (!found)
+ return lo;
+}
+
+static u32 initiate_file_draining(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct inode *ino;
+ struct pnfs_layout_hdr *lo;
+ u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
+ LIST_HEAD(free_me_list);
+
+ lo = get_layout_by_fh(clp, &args->cbl_fh);
+ if (!lo)
return NFS4ERR_NOMATCHING_LAYOUT;
+ ino = lo->plh_inode;
spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
mark_matching_lsegs_invalid(lo, &free_me_list,
@@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
- u32 res = NFS4ERR_DELAY;
+ u32 res;
dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
- if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state))
- goto out;
if (args->cbl_recall_type == RETURN_FILE)
res = initiate_file_draining(clp, args);
else
res = initiate_bulk_draining(clp, args);
- clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state);
-out:
dprintk("%s returning %i\n", __func__, res);
return res;
@@ -303,21 +311,6 @@ out:
return res;
}
-int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
-{
- if (delegation == NULL)
- return 0;
-
- if (stateid->stateid.seqid != 0)
- return 0;
- if (memcmp(&delegation->stateid.stateid.other,
- &stateid->stateid.other,
- NFS4_STATEID_OTHER_SIZE))
- return 0;
-
- return 1;
-}
-
/*
* Validate the sequenceID sent by the server.
* Return success if the sequenceID is one more than what we last saw on
@@ -339,7 +332,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
dprintk("%s enter. slotid %d seqid %d\n",
__func__, args->csa_slotid, args->csa_sequenceid);
- if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
+ if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
return htonl(NFS4ERR_BADSLOT);
slot = tbl->slots + args->csa_slotid;
@@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
- clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
+ clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid);
if (clp == NULL)
goto out;
@@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
if (!cps->clp) /* set in cb_sequence */
goto out;
- dprintk("NFS: RECALL_ANY callback request from %s\n",
+ dprintk_rcu("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR));
status = cpu_to_be32(NFS4ERR_INVAL);
@@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
if (!cps->clp) /* set in cb_sequence */
goto out;
- dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+ dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
args->crsa_target_max_slots);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 726e59a9e50..e64b01d2a33 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -9,6 +9,8 @@
#include <linux/sunrpc/svc.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/sunrpc/bc_xprt.h>
#include "nfs4_fs.h"
@@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
p = xdr_inline_decode(xdr, nbytes);
if (unlikely(p == NULL))
- printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
+ printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
return p;
}
@@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
__be32 *p;
- p = read_buf(xdr, 16);
+ p = read_buf(xdr, NFS4_STATEID_SIZE);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
- memcpy(stateid->data, p, 16);
+ memcpy(stateid, p, NFS4_STATEID_SIZE);
return 0;
}
@@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
return status;
/* We do not like overly long tags! */
if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
- printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
+ printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
__func__, hdr->taglen);
return htonl(NFS4ERR_RESOURCE);
}
@@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
if (hdr->minorversion <= 1) {
hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
} else {
- printk(KERN_WARNING "%s: NFSv4 server callback with "
+ pr_warn_ratelimited("NFS: %s: NFSv4 server callback with "
"illegal minor version %u!\n",
__func__, hdr->minorversion);
return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
@@ -305,6 +307,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
n = ntohl(*p++);
if (n <= 0)
goto out;
+ if (n > ULONG_MAX / sizeof(*args->devs)) {
+ status = htonl(NFS4ERR_BADXDR);
+ goto out;
+ }
args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
if (!args->devs) {
@@ -449,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
args->csa_nrclists = ntohl(*p++);
args->csa_rclists = NULL;
if (args->csa_nrclists) {
- args->csa_rclists = kmalloc(args->csa_nrclists *
- sizeof(*args->csa_rclists),
- GFP_KERNEL);
+ args->csa_rclists = kmalloc_array(args->csa_nrclists,
+ sizeof(*args->csa_rclists),
+ GFP_KERNEL);
if (unlikely(args->csa_rclists == NULL))
goto out;
@@ -690,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
const struct cb_sequenceres *res)
{
__be32 *p;
- unsigned status = res->csr_status;
+ __be32 status = res->csr_status;
if (unlikely(status != 0))
goto out;
@@ -755,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
* Let the state manager know callback processing done.
* A single slot, so highest used slotid is either 0 or -1
*/
- tbl->highest_used_slotid = -1;
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
nfs4_check_drain_bc_complete(session);
spin_unlock(&tbl->slot_tbl_lock);
}
static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
- if (cps->slotid != -1)
+ if (cps->slotid != NFS4_NO_SLOT)
nfs4_callback_free_slot(cps->clp->cl_session);
}
@@ -856,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_process_state cps = {
.drc_status = 0,
.clp = NULL,
- .slotid = -1,
+ .slotid = NFS4_NO_SLOT,
+ .net = rqstp->rq_xprt->xpt_net,
};
unsigned int nops = 0;
@@ -872,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
return rpc_garbage_args;
if (hdr_arg.minorversion == 0) {
- cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
+ cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident);
if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
return rpc_drop_reply;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 873bf00d51a..f005b5bebdc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -36,11 +36,13 @@
#include <linux/inet.h>
#include <linux/in6.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <net/ipv6.h>
#include <linux/nfs_xdr.h>
#include <linux/sunrpc/bc_xprt.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
-#include <asm/system.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -49,15 +51,12 @@
#include "internal.h"
#include "fscache.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
-static DEFINE_SPINLOCK(nfs_client_lock);
-static LIST_HEAD(nfs_client_list);
-static LIST_HEAD(nfs_volume_list);
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
#ifdef CONFIG_NFS_V4
-static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
/*
* Get a unique NFSv4.0 callback identifier which will be used
@@ -66,15 +65,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */
static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
{
int ret = 0;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
if (clp->rpc_ops->version != 4 || minorversion != 0)
return ret;
retry:
- if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL))
+ if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
return -ENOMEM;
- spin_lock(&nfs_client_lock);
- ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident);
- spin_unlock(&nfs_client_lock);
+ spin_lock(&nn->nfs_client_lock);
+ ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+ spin_unlock(&nn->nfs_client_lock);
if (ret == -EAGAIN)
goto retry;
return ret;
@@ -84,13 +84,15 @@ retry:
/*
* Turn off NFSv4 uid/gid mapping when using AUTH_SYS
*/
-static int nfs4_disable_idmapping = 0;
+static bool nfs4_disable_idmapping = true;
/*
* RPC cruft for NFS
*/
-static struct rpc_version *nfs_version[5] = {
+static const struct rpc_version *nfs_version[5] = {
+#ifdef CONFIG_NFS_V2
[2] = &nfs_version2,
+#endif
#ifdef CONFIG_NFS_V3
[3] = &nfs_version3,
#endif
@@ -99,7 +101,7 @@ static struct rpc_version *nfs_version[5] = {
#endif
};
-struct rpc_program nfs_program = {
+const struct rpc_program nfs_program = {
.name = "nfs",
.number = NFS_PROGRAM,
.nrvers = ARRAY_SIZE(nfs_version),
@@ -115,11 +117,11 @@ struct rpc_stat nfs_rpcstat = {
#ifdef CONFIG_NFS_V3_ACL
static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version * nfsacl_version[] = {
+static const struct rpc_version *nfsacl_version[] = {
[3] = &nfsacl_version3,
};
-struct rpc_program nfsacl_program = {
+const struct rpc_program nfsacl_program = {
.name = "nfsacl",
.number = NFS_ACL_PROGRAM,
.nrvers = ARRAY_SIZE(nfsacl_version),
@@ -129,12 +131,14 @@ struct rpc_program nfsacl_program = {
#endif /* CONFIG_NFS_V3_ACL */
struct nfs_client_initdata {
+ unsigned long init_flags;
const char *hostname;
const struct sockaddr *addr;
size_t addrlen;
const struct nfs_rpc_ops *rpc_ops;
int proto;
u32 minorversion;
+ struct net *net;
};
/*
@@ -171,6 +175,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_rpcclient = ERR_PTR(-EINVAL);
clp->cl_proto = cl_init->proto;
+ clp->cl_net = get_net(cl_init->net);
#ifdef CONFIG_NFS_V4
err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
@@ -180,12 +185,11 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
- clp->cl_boot_time = CURRENT_TIME;
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
#endif
- cred = rpc_lookup_machine_cred();
+ cred = rpc_lookup_machine_cred("*");
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
nfs_fscache_get_client_cookie(clp);
@@ -202,8 +206,11 @@ error_0:
#ifdef CONFIG_NFS_V4_1
static void nfs4_shutdown_session(struct nfs_client *clp)
{
- if (nfs4_has_session(clp))
+ if (nfs4_has_session(clp)) {
nfs4_destroy_session(clp->cl_session);
+ nfs4_destroy_clientid(clp);
+ }
+
}
#else /* CONFIG_NFS_V4_1 */
static void nfs4_shutdown_session(struct nfs_client *clp)
@@ -230,19 +237,26 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
nfs_idmap_delete(clp);
rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+ kfree(clp->cl_serverowner);
+ kfree(clp->cl_serverscope);
+ kfree(clp->cl_implid);
}
/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
-void nfs_cleanup_cb_ident_idr(void)
+void nfs_cleanup_cb_ident_idr(struct net *net)
{
- idr_destroy(&cb_ident_idr);
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ idr_destroy(&nn->cb_ident_idr);
}
/* nfs_client_lock held */
static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
{
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+
if (clp->cl_cb_ident)
- idr_remove(&cb_ident_idr, clp->cl_cb_ident);
+ idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident);
}
static void pnfs_init_server(struct nfs_server *server)
@@ -250,12 +264,17 @@ static void pnfs_init_server(struct nfs_server *server)
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
}
+static void nfs4_destroy_server(struct nfs_server *server)
+{
+ nfs4_purge_state_owners(server);
+}
+
#else
static void nfs4_shutdown_client(struct nfs_client *clp)
{
}
-void nfs_cleanup_cb_ident_idr(void)
+void nfs_cleanup_cb_ident_idr(struct net *net)
{
}
@@ -287,10 +306,8 @@ static void nfs_free_client(struct nfs_client *clp)
if (clp->cl_machine_cred != NULL)
put_rpccred(clp->cl_machine_cred);
- nfs4_deviceid_purge_client(clp);
-
+ put_net(clp->cl_net);
kfree(clp->cl_hostname);
- kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -301,15 +318,18 @@ static void nfs_free_client(struct nfs_client *clp)
*/
void nfs_put_client(struct nfs_client *clp)
{
+ struct nfs_net *nn;
+
if (!clp)
return;
dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+ nn = net_generic(clp->cl_net, nfs_net_id);
- if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+ if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
list_del(&clp->cl_share_link);
nfs_cb_idr_remove_locked(clp);
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
BUG_ON(!list_empty(&clp->cl_superblocks));
@@ -387,6 +407,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
(sin1->sin_port == sin2->sin_port);
}
+#if defined(CONFIG_NFS_V4_1)
/*
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields, excluding the port number.
@@ -405,6 +426,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
}
return 0;
}
+#endif /* CONFIG_NFS_V4_1 */
/*
* Test if two socket addresses represent the same actual socket,
@@ -425,10 +447,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
return 0;
}
+#if defined(CONFIG_NFS_V4_1)
/* Common match routine for v4.0 and v4.1 callback services */
-bool
-nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
- u32 minorversion)
+static bool nfs4_cb_match_client(const struct sockaddr *addr,
+ struct nfs_client *clp, u32 minorversion)
{
struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
@@ -437,6 +459,8 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
clp->cl_cons_state == NFS_CS_SESSION_INITING))
return false;
+ smp_rmb();
+
/* Match the version and minorversion */
if (clp->rpc_ops->version != 4 ||
clp->cl_minorversion != minorversion)
@@ -448,6 +472,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp,
return true;
}
+#endif /* CONFIG_NFS_V4_1 */
/*
* Find an nfs_client on the list that matches the initialisation data
@@ -457,8 +482,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
{
struct nfs_client *clp;
const struct sockaddr *sap = data->addr;
+ struct nfs_net *nn = net_generic(data->net, nfs_net_id);
- list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
/* Don't match clients that failed to initialise properly */
if (clp->cl_cons_state < 0)
@@ -483,6 +509,45 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
return NULL;
}
+static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+{
+ return clp->cl_cons_state != NFS_CS_INITING;
+}
+
+int nfs_wait_client_init_complete(const struct nfs_client *clp)
+{
+ return wait_event_killable(nfs_client_active_wq,
+ nfs_client_init_is_complete(clp));
+}
+
+/*
+ * Found an existing client. Make sure it's ready before returning.
+ */
+static struct nfs_client *
+nfs_found_client(const struct nfs_client_initdata *cl_init,
+ struct nfs_client *clp)
+{
+ int error;
+
+ error = nfs_wait_client_init_complete(clp);
+ if (error < 0) {
+ nfs_put_client(clp);
+ return ERR_PTR(-ERESTARTSYS);
+ }
+
+ if (clp->cl_cons_state < NFS_CS_READY) {
+ error = clp->cl_cons_state;
+ nfs_put_client(clp);
+ return ERR_PTR(error);
+ }
+
+ smp_rmb();
+
+ dprintk("<-- %s found nfs_client %p for %s\n",
+ __func__, clp, cl_init->hostname ?: "");
+ return clp;
+}
+
/*
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
@@ -491,74 +556,42 @@ static struct nfs_client *
nfs_get_client(const struct nfs_client_initdata *cl_init,
const struct rpc_timeout *timeparms,
const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport)
+ rpc_authflavor_t authflavour)
{
struct nfs_client *clp, *new = NULL;
- int error;
+ struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
dprintk("--> nfs_get_client(%s,v%u)\n",
cl_init->hostname ?: "", cl_init->rpc_ops->version);
/* see if the client already exists */
do {
- spin_lock(&nfs_client_lock);
+ spin_lock(&nn->nfs_client_lock);
clp = nfs_match_client(cl_init);
- if (clp)
- goto found_client;
- if (new)
- goto install_client;
+ if (clp) {
+ spin_unlock(&nn->nfs_client_lock);
+ if (new)
+ nfs_free_client(new);
+ return nfs_found_client(cl_init, clp);
+ }
+ if (new) {
+ list_add(&new->cl_share_link, &nn->nfs_client_list);
+ spin_unlock(&nn->nfs_client_lock);
+ new->cl_flags = cl_init->init_flags;
+ return cl_init->rpc_ops->init_client(new,
+ timeparms, ip_addr,
+ authflavour);
+ }
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
new = nfs_alloc_client(cl_init);
} while (!IS_ERR(new));
- dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new));
+ dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
+ cl_init->hostname ?: "", PTR_ERR(new));
return new;
-
- /* install a new client and return with it unready */
-install_client:
- clp = new;
- list_add(&clp->cl_share_link, &nfs_client_list);
- spin_unlock(&nfs_client_lock);
-
- error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
- authflavour, noresvport);
- if (error < 0) {
- nfs_put_client(clp);
- return ERR_PTR(error);
- }
- dprintk("--> nfs_get_client() = %p [new]\n", clp);
- return clp;
-
- /* found an existing client
- * - make sure it's ready before returning
- */
-found_client:
- spin_unlock(&nfs_client_lock);
-
- if (new)
- nfs_free_client(new);
-
- error = wait_event_killable(nfs_client_active_wq,
- clp->cl_cons_state < NFS_CS_INITING);
- if (error < 0) {
- nfs_put_client(clp);
- return ERR_PTR(-ERESTARTSYS);
- }
-
- if (clp->cl_cons_state < NFS_CS_READY) {
- error = clp->cl_cons_state;
- nfs_put_client(clp);
- return ERR_PTR(error);
- }
-
- BUG_ON(clp->cl_cons_state != NFS_CS_READY);
-
- dprintk("--> nfs_get_client() = %p [share]\n", clp);
- return clp;
}
/*
@@ -566,27 +599,12 @@ found_client:
*/
void nfs_mark_client_ready(struct nfs_client *clp, int state)
{
+ smp_wmb();
clp->cl_cons_state = state;
wake_up_all(&nfs_client_active_wq);
}
/*
- * With sessions, the client is not marked ready until after a
- * successful EXCHANGE_ID and CREATE_SESSION.
- *
- * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
- * other versions of NFS can be tried.
- */
-int nfs4_check_client_ready(struct nfs_client *clp)
-{
- if (!nfs4_has_session(clp))
- return 0;
- if (clp->cl_cons_state < NFS_CS_READY)
- return -EPROTONOSUPPORT;
- return 0;
-}
-
-/*
* Initialise the timeout values for a connection
*/
static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
@@ -632,12 +650,11 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
*/
static int nfs_create_rpc_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- rpc_authflavor_t flavor,
- int discrtry, int noresvport)
+ rpc_authflavor_t flavor)
{
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
- .net = &init_net,
+ .net = clp->cl_net,
.protocol = clp->cl_proto,
.address = (struct sockaddr *)&clp->cl_addr,
.addrsize = clp->cl_addrlen,
@@ -648,9 +665,9 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
.authflavor = flavor,
};
- if (discrtry)
+ if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
- if (noresvport)
+ if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (!IS_ERR(clp->cl_rpcclient))
@@ -691,6 +708,7 @@ static int nfs_start_lockd(struct nfs_server *server)
.nfs_version = clp->rpc_ops->version,
.noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
1 : 0,
+ .net = clp->cl_net,
};
if (nlm_init.nfs_version > 3)
@@ -782,36 +800,43 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
return 0;
}
-/*
- * Initialise an NFS2 or NFS3 client
+/**
+ * nfs_init_client - Initialise an NFS2 or NFS3 client
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: IP presentation address (not used)
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
*/
-int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour,
- int noresvport)
+struct nfs_client *nfs_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr, rpc_authflavor_t authflavour)
{
int error;
if (clp->cl_cons_state == NFS_CS_READY) {
/* the client is already initialised */
dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
- return 0;
+ return clp;
}
/*
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
- error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
- 0, noresvport);
+ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
- return 0;
+ return clp;
error:
nfs_mark_client_ready(clp, error);
+ nfs_put_client(clp);
dprintk("<-- nfs_init_client() = xerror %d\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
@@ -824,8 +849,9 @@ static int nfs_init_server(struct nfs_server *server,
.hostname = data->nfs_server.hostname,
.addr = (const struct sockaddr *)&data->nfs_server.address,
.addrlen = data->nfs_server.addrlen,
- .rpc_ops = &nfs_v2_clientops,
+ .rpc_ops = NULL,
.proto = data->nfs_server.protocol,
+ .net = data->net,
};
struct rpc_timeout timeparms;
struct nfs_client *clp;
@@ -833,17 +859,28 @@ static int nfs_init_server(struct nfs_server *server,
dprintk("--> nfs_init_server()\n");
+ switch (data->version) {
+#ifdef CONFIG_NFS_V2
+ case 2:
+ cl_init.rpc_ops = &nfs_v2_clientops;
+ break;
+#endif
#ifdef CONFIG_NFS_V3
- if (data->version == 3)
+ case 3:
cl_init.rpc_ops = &nfs_v3_clientops;
+ break;
#endif
+ default:
+ return -EPROTONOSUPPORT;
+ }
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
+ if (data->flags & NFS_MOUNT_NORESVPORT)
+ set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
- data->flags & NFS_MOUNT_NORESVPORT);
+ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
@@ -856,7 +893,7 @@ static int nfs_init_server(struct nfs_server *server,
server->options = data->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+ NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1024,25 +1061,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
static void nfs_server_insert_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
- spin_lock(&nfs_client_lock);
+ spin_lock(&nn->nfs_client_lock);
list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
+ list_add_tail(&server->master_link, &nn->nfs_volume_list);
clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
}
static void nfs_server_remove_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
+ struct nfs_net *nn;
- spin_lock(&nfs_client_lock);
+ if (clp == NULL)
+ return;
+ nn = net_generic(clp->cl_net, nfs_net_id);
+ spin_lock(&nn->nfs_client_lock);
list_del_rcu(&server->client_link);
- if (clp && list_empty(&clp->cl_superblocks))
+ if (list_empty(&clp->cl_superblocks))
set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
list_del(&server->master_link);
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
synchronize_rcu();
}
@@ -1065,6 +1107,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
INIT_LIST_HEAD(&server->layouts);
+ INIT_LIST_HEAD(&server->state_owners_lru);
atomic_set(&server->active, 0);
@@ -1080,6 +1123,8 @@ static struct nfs_server *nfs_alloc_server(void)
return NULL;
}
+ ida_init(&server->openowner_id);
+ ida_init(&server->lockowner_id);
pnfs_init_server(server);
return server;
@@ -1105,6 +1150,8 @@ void nfs_free_server(struct nfs_server *server)
nfs_put_client(server->nfs_client);
+ ida_destroy(&server->lockowner_id);
+ ida_destroy(&server->openowner_id);
nfs_free_iostats(server->io_stats);
bdi_destroy(&server->backing_dev_info);
kfree(server);
@@ -1183,45 +1230,19 @@ error:
/*
* NFSv4.0 callback thread helper
*
- * Find a client by IP address, protocol version, and minorversion
- *
- * Called from the pg_authenticate method. The callback identifier
- * is not used as it has not been decoded.
- *
- * Returns NULL if no such client
- */
-struct nfs_client *
-nfs4_find_client_no_ident(const struct sockaddr *addr)
-{
- struct nfs_client *clp;
-
- spin_lock(&nfs_client_lock);
- list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, 0) == false)
- continue;
- atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
- return clp;
- }
- spin_unlock(&nfs_client_lock);
- return NULL;
-}
-
-/*
- * NFSv4.0 callback thread helper
- *
* Find a client by callback identifier
*/
struct nfs_client *
-nfs4_find_client_ident(int cb_ident)
+nfs4_find_client_ident(struct net *net, int cb_ident)
{
struct nfs_client *clp;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
- spin_lock(&nfs_client_lock);
- clp = idr_find(&cb_ident_idr, cb_ident);
+ spin_lock(&nn->nfs_client_lock);
+ clp = idr_find(&nn->cb_ident_idr, cb_ident);
if (clp)
atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
return clp;
}
@@ -1234,13 +1255,14 @@ nfs4_find_client_ident(int cb_ident)
* Returns NULL if no such client
*/
struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *addr,
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
struct nfs4_sessionid *sid)
{
struct nfs_client *clp;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
- spin_lock(&nfs_client_lock);
- list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ spin_lock(&nn->nfs_client_lock);
+ list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
if (nfs4_cb_match_client(addr, clp, 1) == false)
continue;
@@ -1253,17 +1275,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
continue;
atomic_inc(&clp->cl_count);
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
return clp;
}
- spin_unlock(&nfs_client_lock);
+ spin_unlock(&nn->nfs_client_lock);
return NULL;
}
#else /* CONFIG_NFS_V4_1 */
struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *addr,
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
struct nfs4_sessionid *sid)
{
return NULL;
@@ -1278,16 +1300,18 @@ static int nfs4_init_callback(struct nfs_client *clp)
int error;
if (clp->rpc_ops->version == 4) {
+ struct rpc_xprt *xprt;
+
+ xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
+
if (nfs4_has_session(clp)) {
- error = xprt_setup_backchannel(
- clp->cl_rpcclient->cl_xprt,
+ error = xprt_setup_backchannel(xprt,
NFS41_BC_MIN_CALLBACKS);
if (error < 0)
return error;
}
- error = nfs_callback_up(clp->cl_mvops->minor_version,
- clp->cl_rpcclient->cl_xprt);
+ error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
if (error < 0) {
dprintk("%s: failed to start callback. Error = %d\n",
__func__, error);
@@ -1322,37 +1346,58 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
* so that the client back channel can find the
* nfs_client struct
*/
- clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
}
#endif /* CONFIG_NFS_V4_1 */
return nfs4_init_callback(clp);
}
-/*
- * Initialise an NFS4 client record
+/**
+ * nfs4_init_client - Initialise an NFS4 client record
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: callback IP address in presentation format
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
*/
-int nfs4_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport)
+struct nfs_client *nfs4_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour)
{
+ char buf[INET6_ADDRSTRLEN + 1];
int error;
if (clp->cl_cons_state == NFS_CS_READY) {
/* the client is initialised already */
dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
- return 0;
+ return clp;
}
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
- error = nfs_create_rpc_client(clp, timeparms, authflavour,
- 1, noresvport);
+ __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+ error = nfs_create_rpc_client(clp, timeparms, authflavour);
if (error < 0)
goto error;
+
+ /* If no clientaddr= option was specified, find a usable cb address */
+ if (ip_addr == NULL) {
+ struct sockaddr_storage cb_addr;
+ struct sockaddr *sap = (struct sockaddr *)&cb_addr;
+
+ error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
+ if (error < 0)
+ goto error;
+ error = rpc_ntop(sap, buf, sizeof(buf));
+ if (error < 0)
+ goto error;
+ ip_addr = (const char *)buf;
+ }
strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
error = nfs_idmap_new(clp);
@@ -1369,12 +1414,13 @@ int nfs4_init_client(struct nfs_client *clp,
if (!nfs4_has_session(clp))
nfs_mark_client_ready(clp, NFS_CS_READY);
- return 0;
+ return clp;
error:
nfs_mark_client_ready(clp, error);
+ nfs_put_client(clp);
dprintk("<-- nfs4_init_client() = xerror %d\n", error);
- return error;
+ return ERR_PTR(error);
}
/*
@@ -1387,7 +1433,7 @@ static int nfs4_set_client(struct nfs_server *server,
const char *ip_addr,
rpc_authflavor_t authflavour,
int proto, const struct rpc_timeout *timeparms,
- u32 minorversion)
+ u32 minorversion, struct net *net)
{
struct nfs_client_initdata cl_init = {
.hostname = hostname,
@@ -1396,15 +1442,18 @@ static int nfs4_set_client(struct nfs_server *server,
.rpc_ops = &nfs_v4_clientops,
.proto = proto,
.minorversion = minorversion,
+ .net = net,
};
struct nfs_client *clp;
int error;
dprintk("--> nfs4_set_client()\n");
+ if (server->flags & NFS_MOUNT_NORESVPORT)
+ set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
- server->flags & NFS_MOUNT_NORESVPORT);
+ clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
if (IS_ERR(clp)) {
error = PTR_ERR(clp);
goto error;
@@ -1438,8 +1487,8 @@ error:
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
- const struct sockaddr *ds_addr,
- int ds_addrlen, int ds_proto)
+ const struct sockaddr *ds_addr, int ds_addrlen,
+ int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
@@ -1447,13 +1496,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
.rpc_ops = &nfs_v4_clientops,
.proto = ds_proto,
.minorversion = mds_clp->cl_minorversion,
+ .net = mds_clp->cl_net,
};
- struct rpc_timeout ds_timeout = {
- .to_initval = 15 * HZ,
- .to_maxval = 15 * HZ,
- .to_retries = 1,
- .to_exponential = 1,
- };
+ struct rpc_timeout ds_timeout;
struct nfs_client *clp;
/*
@@ -1461,8 +1506,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
* (section 13.1 RFC 5661).
*/
+ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
- mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
+ mds_clp->cl_rpcclient->cl_auth->au_flavor);
dprintk("<-- %s %p\n", __func__, clp);
return clp;
@@ -1538,6 +1584,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
nfs_server_insert_lists(server);
server->mount_time = jiffies;
+ server->destroy = nfs4_destroy_server;
out:
nfs_free_fattr(fattr);
return error;
@@ -1573,7 +1620,8 @@ static int nfs4_init_server(struct nfs_server *server,
data->auth_flavors[0],
data->nfs_server.protocol,
&timeparms,
- data->minorversion);
+ data->minorversion,
+ data->net);
if (error < 0)
goto error;
@@ -1668,9 +1716,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
data->addrlen,
parent_client->cl_ipaddr,
data->authflavor,
- parent_server->client->cl_xprt->prot,
+ rpc_protocol(parent_server->client),
parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version);
+ parent_client->cl_mvops->minor_version,
+ parent_client->cl_net);
if (error < 0)
goto error;
@@ -1698,7 +1747,8 @@ error:
*/
struct nfs_server *nfs_clone_server(struct nfs_server *source,
struct nfs_fh *fh,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
{
struct nfs_server *server;
struct nfs_fattr *fattr_fsinfo;
@@ -1719,6 +1769,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
/* Copy data from the source */
server->nfs_client = source->nfs_client;
+ server->destroy = source->destroy;
atomic_inc(&server->nfs_client->cl_count);
nfs_server_copy_userdata(server, source);
@@ -1726,7 +1777,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
error = nfs_init_server_rpcclient(server,
source->client->cl_timeout,
- source->client->cl_auth->au_flavor);
+ flavor);
if (error < 0)
goto out_free_server;
if (!IS_ERR(source->client_acl))
@@ -1762,6 +1813,19 @@ out_free_server:
return ERR_PTR(error);
}
+void nfs_clients_init(struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ INIT_LIST_HEAD(&nn->nfs_client_list);
+ INIT_LIST_HEAD(&nn->nfs_volume_list);
+#ifdef CONFIG_NFS_V4
+ idr_init(&nn->cb_ident_idr);
+#endif
+ spin_lock_init(&nn->nfs_client_lock);
+ nn->boot_time = CURRENT_TIME;
+}
+
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *proc_fs_nfs;
@@ -1815,13 +1879,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
{
struct seq_file *m;
int ret;
+ struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
+ struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
ret = seq_open(file, &nfs_server_list_ops);
if (ret < 0)
return ret;
m = file->private_data;
- m->private = PDE(inode)->data;
+ m->private = net;
return 0;
}
@@ -1831,9 +1897,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
{
+ struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+
/* lock the list against modification */
- spin_lock(&nfs_client_lock);
- return seq_list_start_head(&nfs_client_list, *_pos);
+ spin_lock(&nn->nfs_client_lock);
+ return seq_list_start_head(&nn->nfs_client_list, *_pos);
}
/*
@@ -1841,7 +1909,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
{
- return seq_list_next(v, &nfs_client_list, pos);
+ struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+ return seq_list_next(v, &nn->nfs_client_list, pos);
}
/*
@@ -1849,7 +1919,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_server_list_stop(struct seq_file *p, void *v)
{
- spin_unlock(&nfs_client_lock);
+ struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+ spin_unlock(&nn->nfs_client_lock);
}
/*
@@ -1858,9 +1930,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
static int nfs_server_list_show(struct seq_file *m, void *v)
{
struct nfs_client *clp;
+ struct nfs_net *nn = net_generic(m->private, nfs_net_id);
/* display header on line 1 */
- if (v == &nfs_client_list) {
+ if (v == &nn->nfs_client_list) {
seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
return 0;
}
@@ -1872,12 +1945,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
if (clp->cl_cons_state != NFS_CS_READY)
return 0;
+ rcu_read_lock();
seq_printf(m, "v%u %s %s %3d %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
atomic_read(&clp->cl_count),
clp->cl_hostname);
+ rcu_read_unlock();
return 0;
}
@@ -1889,13 +1964,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
{
struct seq_file *m;
int ret;
+ struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
+ struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
ret = seq_open(file, &nfs_volume_list_ops);
if (ret < 0)
return ret;
m = file->private_data;
- m->private = PDE(inode)->data;
+ m->private = net;
return 0;
}
@@ -1905,9 +1982,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
{
+ struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+
/* lock the list against modification */
- spin_lock(&nfs_client_lock);
- return seq_list_start_head(&nfs_volume_list, *_pos);
+ spin_lock(&nn->nfs_client_lock);
+ return seq_list_start_head(&nn->nfs_volume_list, *_pos);
}
/*
@@ -1915,7 +1994,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
{
- return seq_list_next(v, &nfs_volume_list, pos);
+ struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+ return seq_list_next(v, &nn->nfs_volume_list, pos);
}
/*
@@ -1923,7 +2004,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_volume_list_stop(struct seq_file *p, void *v)
{
- spin_unlock(&nfs_client_lock);
+ struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+
+ spin_unlock(&nn->nfs_client_lock);
}
/*
@@ -1934,9 +2017,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
struct nfs_server *server;
struct nfs_client *clp;
char dev[8], fsid[17];
+ struct nfs_net *nn = net_generic(m->private, nfs_net_id);
/* display header on line 1 */
- if (v == &nfs_volume_list) {
+ if (v == &nn->nfs_volume_list) {
seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
return 0;
}
@@ -1951,6 +2035,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
+ rcu_read_lock();
seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
clp->rpc_ops->version,
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
@@ -1958,6 +2043,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
dev,
fsid,
nfs_server_fscache_state(server));
+ rcu_read_unlock();
return 0;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f265406980..bd3a9601d32 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -105,7 +105,7 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
- if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+ if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
@@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
if (delegation != NULL) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL) {
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ nfs4_stateid_copy(&delegation->stateid, &res->delegation);
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
oldcred = delegation->cred;
@@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
if (delegation == NULL)
return -ENOMEM;
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ nfs4_stateid_copy(&delegation->stateid, &res->delegation);
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->change_attr = inode->i_version;
@@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
old_delegation = rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
if (old_delegation != NULL) {
- if (memcmp(&delegation->stateid, &old_delegation->stateid,
- sizeof(old_delegation->stateid)) == 0 &&
+ if (nfs4_stateid_match(&delegation->stateid,
+ &old_delegation->stateid) &&
delegation->type == old_delegation->type) {
goto out;
}
/*
* Deal with broken servers that hand out two
* delegations for the same file.
+ * Allow for upgrades to a WRITE delegation, but
+ * nothing else.
*/
dfprintk(FILE, "%s: server %s handed out "
"a duplicate delegation!\n",
__func__, clp->cl_hostname);
- if (delegation->type <= old_delegation->type) {
+ if (delegation->type == old_delegation->type ||
+ !(delegation->type & FMODE_WRITE)) {
freeme = delegation;
delegation = NULL;
goto out;
@@ -315,6 +316,10 @@ out:
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
+ * Note that this function is designed to be called by the state
+ * manager thread. For this reason, it cannot flush the dirty data,
+ * since that could deadlock in case of a state recovery error.
+ *
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
@@ -339,11 +344,9 @@ restart:
server);
rcu_read_unlock();
- if (delegation != NULL) {
- filemap_flush(inode->i_mapping);
+ if (delegation != NULL)
err = __nfs_inode_return_delegation(inode,
delegation, 0);
- }
iput(inode);
if (!err)
goto restart;
@@ -379,6 +382,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
* nfs_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
+ * This routine will always flush any dirty data to disk on the
+ * assumption that if we need to return the delegation, then
+ * we should stop caching.
+ *
* Returns zero on success, or a negative errno value.
*/
int nfs_inode_return_delegation(struct inode *inode)
@@ -388,10 +395,10 @@ int nfs_inode_return_delegation(struct inode *inode)
struct nfs_delegation *delegation;
int err = 0;
+ nfs_wb_all(inode);
if (rcu_access_pointer(nfsi->delegation) != NULL) {
delegation = nfs_detach_delegation(nfsi, server);
if (delegation != NULL) {
- nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
}
}
@@ -455,17 +462,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
rcu_read_unlock();
}
-static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
-{
- nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
-}
-
static void nfs_delegation_run_state_manager(struct nfs_client *clp)
{
if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
nfs4_schedule_state_manager(clp);
}
+void nfs_remove_bad_delegation(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+
+ delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
+ if (delegation) {
+ nfs_inode_find_state_and_recover(inode, &delegation->stateid);
+ nfs_free_delegation(delegation);
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+
/**
* nfs_expire_all_delegation_types
* @clp: client to process
@@ -488,18 +502,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
}
-/**
- * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
- * @clp: client to process
- *
- */
-void nfs_handle_cb_pathdown(struct nfs_client *clp)
-{
- if (clp == NULL)
- return;
- nfs_client_mark_return_all_delegations(clp);
-}
-
static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
@@ -531,7 +533,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
/**
* nfs_async_inode_return_delegation - asynchronously return a delegation
* @inode: inode to process
- * @stateid: state ID information from CB_RECALL arguments
+ * @stateid: state ID information
*
* Returns zero on success, or a negative errno value.
*/
@@ -542,10 +544,12 @@ int nfs_async_inode_return_delegation(struct inode *inode,
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
+ filemap_flush(inode->i_mapping);
+
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
rcu_read_unlock();
return -ENOENT;
}
@@ -684,21 +688,25 @@ int nfs_delegations_present(struct nfs_client *clp)
* nfs4_copy_delegation_stateid - Copy inode's state ID information
* @dst: stateid data structure to fill in
* @inode: inode to check
+ * @flags: delegation type requirement
*
- * Returns one and fills in "dst->data" * if inode had a delegation,
- * otherwise zero is returned.
+ * Returns "true" and fills in "dst->data" * if inode had a delegation,
+ * otherwise "false" is returned.
*/
-int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
+ fmode_t flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
- int ret = 0;
+ bool ret;
+ flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
- if (delegation != NULL) {
- memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
- ret = 1;
+ ret = (delegation != NULL && (delegation->type & flags) == flags);
+ if (ret) {
+ nfs4_stateid_copy(dst, &delegation->stateid);
+ nfs_mark_delegation_referenced(delegation);
}
rcu_read_unlock();
return ret;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d9322e490c5..72709c4193f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb);
void nfs_expire_all_delegations(struct nfs_client *clp);
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
-void nfs_handle_cb_pathdown(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
+void nfs_remove_bad_delegation(struct inode *inode);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
-int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs_have_delegation(struct inode *inode, fmode_t flags);
@@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
static inline int nfs_inode_return_delegation(struct inode *inode)
{
+ nfs_wb_all(inode);
return 0;
}
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac289909814..f430057ff3b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,13 +47,13 @@ static int nfs_opendir(struct inode *, struct file *);
static int nfs_closedir(struct inode *, struct file *);
static int nfs_readdir(struct file *, void *, filldir_t);
static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int nfs_mkdir(struct inode *, struct dentry *, int);
+static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
static int nfs_rmdir(struct inode *, struct dentry *);
static int nfs_unlink(struct inode *, struct dentry *);
static int nfs_symlink(struct inode *, struct dentry *, const char *);
static int nfs_link(struct dentry *, struct inode *, struct dentry *);
-static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
+static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
static int nfs_rename(struct inode *, struct dentry *,
struct inode *, struct dentry *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
#ifdef CONFIG_NFS_V4
static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_open_create,
.lookup = nfs_atomic_lookup,
@@ -207,7 +207,7 @@ struct nfs_cache_array_entry {
};
struct nfs_cache_array {
- unsigned int size;
+ int size;
int eof_index;
u64 last_cookie;
struct nfs_cache_array_entry array[0];
@@ -260,10 +260,10 @@ void nfs_readdir_clear_array(struct page *page)
struct nfs_cache_array *array;
int i;
- array = kmap_atomic(page, KM_USER0);
+ array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
- kunmap_atomic(array, KM_USER0);
+ kunmap_atomic(array);
}
/*
@@ -475,12 +475,32 @@ different:
}
static
+bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
+{
+ if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
+ return false;
+ if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+ return true;
+ if (filp->f_pos == 0)
+ return true;
+ return false;
+}
+
+/*
+ * This function is called by the lookup code to request the use of
+ * readdirplus to accelerate any future lookups in the same
+ * directory.
+ */
+static
+void nfs_advise_use_readdirplus(struct inode *dir)
+{
+ set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
+}
+
+static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
- struct qstr filename = {
- .len = entry->len,
- .name = entry->name,
- };
+ struct qstr filename = QSTR_INIT(entry->name, entry->len);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = parent->d_inode;
@@ -874,7 +894,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
desc->file = filp;
desc->dir_cookie = &dir_ctx->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
- desc->plus = NFS_USE_READDIRPLUS(inode);
+ desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
nfs_block_sillyrename(dentry);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
@@ -1114,7 +1134,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
if (!inode) {
if (nfs_neg_need_reval(dir, dentry, nd))
goto out_bad;
- goto out_valid;
+ goto out_valid_noent;
}
if (is_bad_inode(inode)) {
@@ -1143,7 +1163,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
if (fhandle == NULL || fattr == NULL)
goto out_error;
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
goto out_bad;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1156,6 +1176,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
out_set_verifier:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
+ /* Success: notify readdir to use READDIRPLUS */
+ nfs_advise_use_readdirplus(dir);
+ out_valid_noent:
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
__func__, dentry->d_parent->d_name.name,
@@ -1299,7 +1322,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
nfs_block_sillyrename(parent);
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
@@ -1311,6 +1334,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (IS_ERR(res))
goto out_unblock_sillyrename;
+ /* Success: notify readdir to use READDIRPLUS */
+ nfs_advise_use_readdirplus(dir);
+
no_entry:
res = d_materialise_unique(dentry, inode);
if (res != NULL) {
@@ -1328,10 +1354,10 @@ out:
}
#ifdef CONFIG_NFS_V4
-static int nfs_open_revalidate(struct dentry *, struct nameidata *);
+static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *);
const struct dentry_operations nfs4_dentry_operations = {
- .d_revalidate = nfs_open_revalidate,
+ .d_revalidate = nfs4_lookup_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -1368,18 +1394,7 @@ static fmode_t flags_to_mode(int flags)
static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
{
- struct nfs_open_context *ctx;
- struct rpc_cred *cred;
- fmode_t fmode = flags_to_mode(open_flags);
-
- cred = rpc_lookup_cred();
- if (IS_ERR(cred))
- return ERR_CAST(cred);
- ctx = alloc_nfs_open_context(dentry, cred, fmode);
- put_rpccred(cred);
- if (ctx == NULL)
- return ERR_PTR(-ENOMEM);
- return ctx;
+ return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
}
static int do_open(struct inode *inode, struct file *filp)
@@ -1440,6 +1455,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
}
open_flags = nd->intent.open.flags;
+ attr.ia_valid = ATTR_OPEN;
ctx = create_nfs_open_context(dentry, open_flags);
res = ERR_CAST(ctx);
@@ -1448,11 +1464,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
- attr.ia_valid = ATTR_MODE;
+ attr.ia_valid |= ATTR_MODE;
attr.ia_mode &= ~current_umask();
- } else {
+ } else
open_flags &= ~(O_EXCL | O_CREAT);
- attr.ia_valid = 0;
+
+ if (open_flags & O_TRUNC) {
+ attr.ia_valid |= ATTR_SIZE;
+ attr.ia_size = 0;
}
/* Open the file on the server */
@@ -1500,12 +1519,11 @@ no_open:
return nfs_lookup(dir, dentry, nd);
}
-static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *parent = NULL;
struct inode *inode;
struct inode *dir;
- struct nfs_open_context *ctx;
int openflags, ret = 0;
if (nd->flags & LOOKUP_RCU)
@@ -1534,49 +1552,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
/* We cannot do exclusive creation on a positive dentry */
if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
goto no_open_dput;
- /* We can't create new files, or truncate existing ones here */
- openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
- ctx = create_nfs_open_context(dentry, openflags);
- ret = PTR_ERR(ctx);
- if (IS_ERR(ctx))
- goto out;
- /*
- * Note: we're not holding inode->i_mutex and so may be racing with
- * operations that change the directory. We therefore save the
- * change attribute *before* we do the RPC call.
- */
- inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- switch (ret) {
- case -EPERM:
- case -EACCES:
- case -EDQUOT:
- case -ENOSPC:
- case -EROFS:
- goto out_put_ctx;
- default:
- goto out_drop;
- }
- }
- iput(inode);
- if (inode != dentry->d_inode)
- goto out_drop;
+ /* Let f_op->open() actually open (and revalidate) the file */
+ ret = 1;
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- ret = nfs_intent_set_file(nd, ctx);
- if (ret >= 0)
- ret = 1;
out:
dput(parent);
return ret;
-out_drop:
- d_drop(dentry);
- ret = 0;
-out_put_ctx:
- put_nfs_open_context(ctx);
- goto out;
no_open_dput:
dput(parent);
@@ -1584,8 +1566,8 @@ no_open:
return nfs_lookup_revalidate(dentry, nd);
}
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
+static int nfs_open_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, struct nameidata *nd)
{
struct nfs_open_context *ctx = NULL;
struct iattr attr;
@@ -1644,7 +1626,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (dentry->d_inode)
goto out;
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
goto out_error;
}
@@ -1675,8 +1657,8 @@ out_error:
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
+static int nfs_create(struct inode *dir, struct dentry *dentry,
+ umode_t mode, struct nameidata *nd)
{
struct iattr attr;
int error;
@@ -1704,7 +1686,7 @@ out_err:
* See comments for nfs_proc_create regarding failed operations.
*/
static int
-nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
int status;
@@ -1730,7 +1712,7 @@ out_err:
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct iattr attr;
int error;
@@ -1881,11 +1863,11 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
if (!page)
return -ENOMEM;
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memcpy(kaddr, symname, pathlen);
if (pathlen < PAGE_SIZE)
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
if (error != 0) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1940f1a56a5..48253372ab1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -51,12 +51,12 @@
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include "internal.h"
#include "iostat.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -82,16 +82,19 @@ struct nfs_direct_req {
struct completion completion; /* wait for i/o completion */
/* commit state */
- struct list_head rewrite_list; /* saved nfs_write_data structs */
- struct nfs_write_data * commit_data; /* special write_data for commits */
+ struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
+ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
+ struct work_struct work;
int flags;
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
struct nfs_writeverf verf; /* unstable write verifier */
};
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
-static const struct rpc_call_ops nfs_write_direct_ops;
+static void nfs_direct_write_schedule_work(struct work_struct *work);
static inline void get_dreq(struct nfs_direct_req *dreq)
{
@@ -125,22 +128,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
return -EINVAL;
}
-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
- unsigned int npages;
- unsigned int i;
-
- if (count == 0)
- return;
- pages += (pgbase >> PAGE_SHIFT);
- npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (!PageCompound(page))
- set_page_dirty(page);
- }
-}
-
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{
unsigned int i;
@@ -148,26 +135,30 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
page_cache_release(pages[i]);
}
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq)
+{
+ cinfo->lock = &dreq->lock;
+ cinfo->mds = &dreq->mds_cinfo;
+ cinfo->ds = &dreq->ds_cinfo;
+ cinfo->dreq = dreq;
+ cinfo->completion_ops = &nfs_direct_commit_completion_ops;
+}
+
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
- dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
+ dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
kref_get(&dreq->kref);
init_completion(&dreq->completion);
- INIT_LIST_HEAD(&dreq->rewrite_list);
- dreq->iocb = NULL;
- dreq->ctx = NULL;
- dreq->l_ctx = NULL;
+ INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
- atomic_set(&dreq->io_count, 0);
- dreq->count = 0;
- dreq->error = 0;
- dreq->flags = 0;
return dreq;
}
@@ -227,49 +218,80 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
nfs_direct_req_release(dreq);
}
-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+static void nfs_direct_readpage_release(struct nfs_page *req)
{
- struct nfs_read_data *data = calldata;
-
- nfs_readpage_result(task, data);
+ dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ nfs_release_request(req);
}
-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{
+ unsigned long bytes = 0;
+ struct nfs_direct_req *dreq = hdr->dreq;
- struct nfs_read_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- int status = data->task.tk_status;
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;
spin_lock(&dreq->lock);
- if (unlikely(status < 0)) {
- dreq->error = status;
- spin_unlock(&dreq->lock);
- } else {
- dreq->count += data->res.count;
- spin_unlock(&dreq->lock);
- nfs_direct_dirty_pages(data->pagevec,
- data->args.pgbase,
- data->res.count);
- }
- nfs_direct_release_pages(data->pagevec, data->npages);
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
+ dreq->error = hdr->error;
+ else
+ dreq->count += hdr->good_bytes;
+ spin_unlock(&dreq->lock);
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+ if (bytes > hdr->good_bytes)
+ zero_user(page, 0, PAGE_SIZE);
+ else if (hdr->good_bytes - bytes < PAGE_SIZE)
+ zero_user_segment(page,
+ hdr->good_bytes & ~PAGE_MASK,
+ PAGE_SIZE);
+ }
+ if (!PageCompound(page)) {
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ if (bytes < hdr->good_bytes)
+ set_page_dirty(page);
+ } else
+ set_page_dirty(page);
+ }
+ bytes += req->wb_bytes;
+ nfs_list_remove_request(req);
+ nfs_direct_readpage_release(req);
+ }
+out_put:
if (put_dreq(dreq))
nfs_direct_complete(dreq);
- nfs_readdata_free(data);
+ hdr->release(hdr);
+}
+
+static void nfs_read_sync_pgio_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_release_request(req);
+ }
}
-static const struct rpc_call_ops nfs_read_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
- .rpc_call_prepare = nfs_read_prepare,
-#endif /* CONFIG_NFS_V4_1 */
- .rpc_call_done = nfs_direct_read_result,
- .rpc_release = nfs_direct_read_release,
+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+ get_dreq(hdr->dreq);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
+ .error_cleanup = nfs_read_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_read_completion,
};
/*
@@ -279,107 +301,82 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_read_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;
do {
- struct nfs_read_data *data;
size_t bytes;
+ int i;
pgbase = user_addr & ~PAGE_MASK;
- bytes = min(rsize,count);
+ bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
result = -ENOMEM;
- data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pagevec)
break;
-
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 1, 0, data->pagevec, NULL);
+ npages, 1, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_readdata_free(data);
+ if (result < 0)
break;
- }
- if ((unsigned)result < data->npages) {
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_readdata_free(data);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- data->npages = result;
+ npages = result;
}
- get_dreq(dreq);
-
- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
- data->args.count = bytes;
- data->res.fattr = &data->fattr;
- data->res.eof = 0;
- data->res.count = bytes;
- nfs_fattr_init(&data->fattr);
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
- rpc_put_task(task);
-
- dprintk("NFS: %5u initiated direct read call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
-
- started += bytes;
- user_addr += bytes;
- pos += bytes;
- /* FIXME: Remove this unnecessary math from final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
- count -= bytes;
- } while (count != 0);
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+ /* XXX do we need to do the eof zeroing found in async_filler? */
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
+ /* The nfs_page now hold references to these pages */
+ nfs_direct_release_pages(pagevec, npages);
+ } while (count != 0 && result >= 0);
+
+ kfree(pagevec);
if (started)
return started;
@@ -391,15 +388,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
unsigned long nr_segs,
loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
+ nfs_pageio_init_read(&desc, dreq->inode,
+ &nfs_direct_read_completion_ops);
get_dreq(dreq);
+ desc.pg_dreq = dreq;
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+ result = nfs_direct_read_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -408,6 +409,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}
+ nfs_pageio_complete(&desc);
+
/*
* If no bytes were started, return the error, and let the
* generic layer handle the completion.
@@ -444,104 +447,75 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
+ NFS_I(inode)->read_io += result;
out_release:
nfs_direct_req_release(dreq);
out:
return result;
}
-static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
+static void nfs_inode_dio_write_done(struct inode *inode)
{
- while (!list_empty(&dreq->rewrite_list)) {
- struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
- nfs_direct_release_pages(data->pagevec, data->npages);
- nfs_writedata_free(data);
- }
+ nfs_zap_mapping(inode, inode->i_mapping);
+ inode_dio_done(inode);
}
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
- struct inode *inode = dreq->inode;
- struct list_head *p;
- struct nfs_write_data *data;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
+ struct nfs_pageio_descriptor desc;
+ struct nfs_page *req, *tmp;
+ LIST_HEAD(reqs);
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(failed);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
+ spin_lock(cinfo.lock);
+ nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
+ spin_unlock(cinfo.lock);
dreq->count = 0;
get_dreq(dreq);
- list_for_each(p, &dreq->rewrite_list) {
- data = list_entry(p, struct nfs_write_data, pages);
-
- get_dreq(dreq);
-
- /* Use stable writes */
- data->args.stable = NFS_FILE_SYNC;
-
- /*
- * Reset data->res.
- */
- nfs_fattr_init(&data->fattr);
- data->res.count = data->args.count;
- memset(&data->verf, 0, sizeof(data->verf));
-
- /*
- * Reuse data->task; data->args should not have changed
- * since the original request was sent.
- */
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- /*
- * We're called via an RPC callback, so BKL is already held.
- */
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
-
- dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
+
+ list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ if (!nfs_pageio_add_request(&desc, req)) {
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &failed);
+ spin_lock(cinfo.lock);
+ dreq->flags = 0;
+ dreq->error = -EIO;
+ spin_unlock(cinfo.lock);
+ }
+ nfs_release_request(req);
}
+ nfs_pageio_complete(&desc);
- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, inode);
-}
-
-static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
+ while (!list_empty(&failed)) {
+ req = nfs_list_entry(failed.next);
+ nfs_list_remove_request(req);
+ nfs_unlock_and_release_request(req);
+ }
- /* Call the NFS version-specific code */
- NFS_PROTO(data->inode)->commit_done(task, data);
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, dreq->inode);
}
-static void nfs_direct_commit_release(void *calldata)
+static void nfs_direct_commit_complete(struct nfs_commit_data *data)
{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ struct nfs_direct_req *dreq = data->dreq;
+ struct nfs_commit_info cinfo;
+ struct nfs_page *req;
int status = data->task.tk_status;
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
if (status < 0) {
dprintk("NFS: %5u commit failed with error %d.\n",
- data->task.tk_pid, status);
+ data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -549,64 +523,47 @@ static void nfs_direct_commit_release(void *calldata)
}
dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
- nfs_direct_write_complete(dreq, data->inode);
- nfs_commit_free(data);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+ /* Note the rewrite will go through mds */
+ nfs_mark_request_commit(req, NULL, &cinfo);
+ } else
+ nfs_release_request(req);
+ nfs_unlock_and_release_request(req);
+ }
+
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ nfs_direct_write_complete(dreq, data->inode);
}
-static const struct rpc_call_ops nfs_commit_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
- .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
- .rpc_call_done = nfs_direct_commit_result,
- .rpc_release = nfs_direct_commit_release,
+static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+{
+ /* There is no lock to clear */
+}
+
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
+ .completion = nfs_direct_commit_complete,
+ .error_cleanup = nfs_direct_error_cleanup,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- struct nfs_write_data *data = dreq->commit_data;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = NFS_CLIENT(dreq->inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_commit_direct_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
-
- data->inode = dreq->inode;
- data->cred = msg.rpc_cred;
-
- data->args.fh = NFS_FH(data->inode);
- data->args.offset = 0;
- data->args.count = 0;
- data->args.context = dreq->ctx;
- data->args.lock_context = dreq->l_ctx;
- data->res.count = 0;
- data->res.fattr = &data->fattr;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- NFS_PROTO(data->inode)->commit_setup(data, &msg);
-
- /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
- dreq->commit_data = NULL;
-
- dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
-
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ int res;
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(mds_list);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
+ res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
+ if (res < 0) /* res == -ENOMEM */
+ nfs_direct_write_reschedule(dreq);
}
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
{
+ struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
int flags = dreq->flags;
dreq->flags = 0;
@@ -618,91 +575,32 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
nfs_direct_write_reschedule(dreq);
break;
default:
- if (dreq->commit_data != NULL)
- nfs_commit_free(dreq->commit_data);
- nfs_direct_free_writedata(dreq);
- nfs_zap_mapping(inode, inode->i_mapping);
+ nfs_inode_dio_write_done(dreq->inode);
nfs_direct_complete(dreq);
}
}
-static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- dreq->commit_data = nfs_commitdata_alloc();
- if (dreq->commit_data != NULL)
- dreq->commit_data->req = (struct nfs_page *) dreq;
+ schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
}
+
#else
-static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
{
- dreq->commit_data = NULL;
}
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_direct_free_writedata(dreq);
- nfs_zap_mapping(inode, inode->i_mapping);
+ nfs_inode_dio_write_done(inode);
nfs_direct_complete(dreq);
}
#endif
-static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
/*
* NB: Return the value of the first error return code. Subsequent
* errors after the first one are ignored.
*/
-static void nfs_direct_write_release(void *calldata)
-{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- int status = data->task.tk_status;
-
- spin_lock(&dreq->lock);
-
- if (unlikely(status < 0)) {
- /* An error has occurred, so we should not commit */
- dreq->flags = 0;
- dreq->error = status;
- }
- if (unlikely(dreq->error != 0))
- goto out_unlock;
-
- dreq->count += data->res.count;
-
- if (data->res.verf->committed != NFS_FILE_SYNC) {
- switch (dreq->flags) {
- case 0:
- memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
- dreq->flags = NFS_ODIRECT_DO_COMMIT;
- break;
- case NFS_ODIRECT_DO_COMMIT:
- if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
- dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- }
- }
- }
-out_unlock:
- spin_unlock(&dreq->lock);
-
- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, data->inode);
-}
-
-static const struct rpc_call_ops nfs_write_direct_ops = {
-#if defined(CONFIG_NFS_V4_1)
- .rpc_call_prepare = nfs_write_prepare,
-#endif /* CONFIG_NFS_V4_1 */
- .rpc_call_done = nfs_direct_write_result,
- .rpc_release = nfs_direct_write_release,
-};
-
/*
* For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
* operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -710,132 +608,189 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
* handled automatically by nfs_direct_write_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;
do {
- struct nfs_write_data *data;
size_t bytes;
+ int i;
pgbase = user_addr & ~PAGE_MASK;
- bytes = min(wsize,count);
+ bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
result = -ENOMEM;
- data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+ if (!pagevec)
break;
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 0, 0, data->pagevec, NULL);
+ npages, 0, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_writedata_free(data);
+ if (result < 0)
break;
- }
- if ((unsigned)result < data->npages) {
+
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
- nfs_writedata_free(data);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- data->npages = result;
+ npages = result;
}
- get_dreq(dreq);
-
- list_move_tail(&data->pages, &dreq->rewrite_list);
-
- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
- data->args.count = bytes;
- data->args.stable = sync;
- data->res.fattr = &data->fattr;
- data->res.count = bytes;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
- rpc_put_task(task);
-
- dprintk("NFS: %5u initiated direct write call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- started += bytes;
- user_addr += bytes;
- pos += bytes;
-
- /* FIXME: Remove this useless math from the final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
+ /* The nfs_page now hold references to these pages */
+ nfs_direct_release_pages(pagevec, npages);
+ } while (count != 0 && result >= 0);
- count -= bytes;
- } while (count != 0);
+ kfree(pagevec);
if (started)
return started;
return result < 0 ? (ssize_t) result : -EFAULT;
}
+static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+ struct nfs_commit_info cinfo;
+ int bit = -1;
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+ spin_lock(&dreq->lock);
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ dreq->flags = 0;
+ dreq->error = hdr->error;
+ }
+ if (dreq->error != 0)
+ bit = NFS_IOHDR_ERROR;
+ else {
+ dreq->count += hdr->good_bytes;
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+ bit = NFS_IOHDR_NEED_RESCHED;
+ else if (dreq->flags == 0) {
+ memcpy(&dreq->verf, hdr->verf,
+ sizeof(dreq->verf));
+ bit = NFS_IOHDR_NEED_COMMIT;
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
+ } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
+ if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else
+ bit = NFS_IOHDR_NEED_COMMIT;
+ }
+ }
+ }
+ spin_unlock(&dreq->lock);
+
+ while (!list_empty(&hdr->pages)) {
+ req = nfs_list_entry(hdr->pages.next);
+ nfs_list_remove_request(req);
+ switch (bit) {
+ case NFS_IOHDR_NEED_RESCHED:
+ case NFS_IOHDR_NEED_COMMIT:
+ kref_get(&req->wb_kref);
+ nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+ }
+ nfs_unlock_and_release_request(req);
+ }
+
+out_put:
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, hdr->inode);
+ hdr->release(hdr);
+}
+
+static void nfs_write_sync_pgio_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_unlock_and_release_request(req);
+ }
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+ .error_cleanup = nfs_write_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_write_completion,
+};
+
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
ssize_t result = 0;
size_t requested_bytes = 0;
unsigned long seg;
+ nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
get_dreq(dreq);
+ atomic_inc(&inode->i_dio_count);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(dreq, vec,
- pos, sync);
+ result = nfs_direct_write_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -843,12 +798,15 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
pos += vec->iov_len;
}
+ nfs_pageio_complete(&desc);
+ NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
/*
* If no bytes were started, return the error, and let the
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
+ inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
@@ -865,16 +823,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
- size_t wsize = NFS_SERVER(inode)->wsize;
- int sync = NFS_UNSTABLE;
dreq = nfs_direct_req_alloc();
if (!dreq)
goto out;
- nfs_alloc_commit_data(dreq);
-
- if (dreq->commit_data == NULL || count <= wsize)
- sync = NFS_FILE_SYNC;
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -884,7 +836,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -1004,10 +956,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
task_io_account_write(count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+ if (retval > 0) {
+ struct inode *inode = mapping->host;
- if (retval > 0)
iocb->ki_pos = pos + retval;
-
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
out:
return retval;
}
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index a6e711ad130..b3924b8a600 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -10,8 +10,9 @@
#include <linux/sunrpc/clnt.h>
#include <linux/dns_resolver.h>
+#include "dns_resolve.h"
-ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
+ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
struct sockaddr *sa, size_t salen)
{
ssize_t ret;
@@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
if (ip_len > 0)
- ret = rpc_pton(ip_addr, ip_len, sa, salen);
+ ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
ret = -ESRCH;
kfree(ip_addr);
@@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#include "dns_resolve.h"
#include "cache_lib.h"
+#include "netns.h"
#define NFS_DNS_HASHBITS 4
#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
-static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
-
struct nfs_dns_ent {
struct cache_head h;
@@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
len = qword_get(&buf, buf1, sizeof(buf1));
if (len <= 0)
goto out;
- key.addrlen = rpc_pton(buf1, len,
+ key.addrlen = rpc_pton(cd->net, buf1, len,
(struct sockaddr *)&key.addr,
sizeof(key.addr));
@@ -259,21 +260,6 @@ out:
return ret;
}
-static struct cache_detail nfs_dns_resolve = {
- .owner = THIS_MODULE,
- .hash_size = NFS_DNS_HASHTBL_SIZE,
- .hash_table = nfs_dns_table,
- .name = "dns_resolve",
- .cache_put = nfs_dns_ent_put,
- .cache_upcall = nfs_dns_upcall,
- .cache_parse = nfs_dns_parse,
- .cache_show = nfs_dns_show,
- .match = nfs_dns_match,
- .init = nfs_dns_ent_init,
- .update = nfs_dns_ent_update,
- .alloc = nfs_dns_ent_alloc,
-};
-
static int do_cache_lookup(struct cache_detail *cd,
struct nfs_dns_ent *key,
struct nfs_dns_ent **item,
@@ -336,8 +322,8 @@ out:
return ret;
}
-ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
- struct sockaddr *sa, size_t salen)
+ssize_t nfs_dns_resolve_name(struct net *net, char *name,
+ size_t namelen, struct sockaddr *sa, size_t salen)
{
struct nfs_dns_ent key = {
.hostname = name,
@@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
};
struct nfs_dns_ent *item = NULL;
ssize_t ret;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
- ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
+ ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
if (ret == 0) {
if (salen >= item->addrlen) {
memcpy(sa, &item->addr, item->addrlen);
ret = item->addrlen;
} else
ret = -EOVERFLOW;
- cache_put(&item->h, &nfs_dns_resolve);
+ cache_put(&item->h, nn->nfs_dns_resolve);
} else if (ret == -ENOENT)
ret = -ESRCH;
return ret;
}
+int nfs_dns_resolver_cache_init(struct net *net)
+{
+ int err = -ENOMEM;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct cache_detail *cd;
+ struct cache_head **tbl;
+
+ cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
+ if (cd == NULL)
+ goto err_cd;
+
+ tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
+ GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_tbl;
+
+ cd->owner = THIS_MODULE,
+ cd->hash_size = NFS_DNS_HASHTBL_SIZE,
+ cd->hash_table = tbl,
+ cd->name = "dns_resolve",
+ cd->cache_put = nfs_dns_ent_put,
+ cd->cache_upcall = nfs_dns_upcall,
+ cd->cache_parse = nfs_dns_parse,
+ cd->cache_show = nfs_dns_show,
+ cd->match = nfs_dns_match,
+ cd->init = nfs_dns_ent_init,
+ cd->update = nfs_dns_ent_update,
+ cd->alloc = nfs_dns_ent_alloc,
+
+ nfs_cache_init(cd);
+ err = nfs_cache_register_net(net, cd);
+ if (err)
+ goto err_reg;
+ nn->nfs_dns_resolve = cd;
+ return 0;
+
+err_reg:
+ nfs_cache_destroy(cd);
+ kfree(cd->hash_table);
+err_tbl:
+ kfree(cd);
+err_cd:
+ return err;
+}
+
+void nfs_dns_resolver_cache_destroy(struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct cache_detail *cd = nn->nfs_dns_resolve;
+
+ nfs_cache_unregister_net(net, cd);
+ nfs_cache_destroy(cd);
+ kfree(cd->hash_table);
+ kfree(cd);
+}
+
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct super_block *sb = ptr;
+ struct net *net = sb->s_fs_info;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct cache_detail *cd = nn->nfs_dns_resolve;
+ int ret = 0;
+
+ if (cd == NULL)
+ return 0;
+
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ ret = nfs_cache_register_sb(sb, cd);
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ nfs_cache_unregister_sb(sb, cd);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static struct notifier_block nfs_dns_resolver_block = {
+ .notifier_call = rpc_pipefs_event,
+};
+
int nfs_dns_resolver_init(void)
{
- return nfs_cache_register(&nfs_dns_resolve);
+ return rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
}
void nfs_dns_resolver_destroy(void)
{
- nfs_cache_unregister(&nfs_dns_resolve);
+ rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
}
-
#endif
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 199bb5543a9..2e4f596d292 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void)
static inline void nfs_dns_resolver_destroy(void)
{}
+
+static inline int nfs_dns_resolver_cache_init(struct net *net)
+{
+ return 0;
+}
+
+static inline void nfs_dns_resolver_cache_destroy(struct net *net)
+{}
#else
extern int nfs_dns_resolver_init(void);
extern void nfs_dns_resolver_destroy(void);
+extern int nfs_dns_resolver_cache_init(struct net *net);
+extern void nfs_dns_resolver_cache_destroy(struct net *net);
#endif
-extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
- struct sockaddr *sa, size_t salen);
+extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
+ size_t namelen, struct sockaddr *sa, size_t salen);
#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 606ef0f20ae..a6708e6b438 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -30,7 +30,6 @@
#include <linux/swap.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include "delegation.h"
#include "internal.h"
@@ -175,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id)
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
+ /*
+ * If we're holding a write delegation, then just start the i/o
+ * but don't wait for completion (or send a commit).
+ */
+ if (nfs_have_delegation(inode, FMODE_WRITE))
+ return filemap_fdatawrite(file->f_mapping);
+
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
}
@@ -272,13 +278,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
datasync);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
- return ret;
mutex_lock(&inode->i_mutex);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (status >= 0 && ret < 0)
+ status = ret;
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
if (have_error)
ret = xchg(&ctx->error, 0);
@@ -418,6 +424,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
if (status < 0)
return status;
+ NFS_I(mapping->host)->write_io += copied;
return copied;
}
@@ -530,6 +537,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (mapping != dentry->d_inode->i_mapping)
goto out_unlock;
+ wait_on_page_writeback(page);
+
pagelen = nfs_page_length(page);
if (pagelen == 0)
goto out_unlock;
@@ -870,12 +879,81 @@ const struct file_operations nfs_file_operations = {
static int
nfs4_file_open(struct inode *inode, struct file *filp)
{
+ struct nfs_open_context *ctx;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct dentry *parent = NULL;
+ struct inode *dir;
+ unsigned openflags = filp->f_flags;
+ struct iattr attr;
+ int err;
+
+ BUG_ON(inode != dentry->d_inode);
/*
- * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
- * this point, then something is very wrong
+ * If no cached dentry exists or if it's negative, NFSv4 handled the
+ * opens in ->lookup() or ->create().
+ *
+ * We only get this far for a cached positive dentry. We skipped
+ * revalidation, so handle it here by dropping the dentry and returning
+ * -EOPENSTALE. The VFS will retry the lookup/create/open.
*/
- dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
- return -ENOTDIR;
+
+ dprintk("NFS: open file(%s/%s)\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
+
+ if ((openflags & O_ACCMODE) == 3)
+ openflags--;
+
+ /* We can't create new files here */
+ openflags &= ~(O_CREAT|O_EXCL);
+
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+
+ ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+ err = PTR_ERR(ctx);
+ if (IS_ERR(ctx))
+ goto out;
+
+ attr.ia_valid = ATTR_OPEN;
+ if (openflags & O_TRUNC) {
+ attr.ia_valid |= ATTR_SIZE;
+ attr.ia_size = 0;
+ nfs_wb_all(inode);
+ }
+
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ switch (err) {
+ case -EPERM:
+ case -EACCES:
+ case -EDQUOT:
+ case -ENOSPC:
+ case -EROFS:
+ goto out_put_ctx;
+ default:
+ goto out_drop;
+ }
+ }
+ iput(inode);
+ if (inode != dentry->d_inode)
+ goto out_drop;
+
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_file_set_open_context(filp, ctx);
+ err = 0;
+
+out_put_ctx:
+ put_nfs_open_context(ctx);
+out:
+ dput(parent);
+ return err;
+
+out_drop:
+ d_drop(dentry);
+ err = -EOPENSTALE;
+ goto out_put_ctx;
}
const struct file_operations nfs4_file_operations = {
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 419119c371b..c817787fbdb 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -64,23 +64,12 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
* either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
* superblock across an automount point of some nature.
*/
-void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq,
- struct nfs_clone_mount *mntdata)
+void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
{
struct nfs_fscache_key *key, *xkey;
struct nfs_server *nfss = NFS_SB(sb);
struct rb_node **p, *parent;
- int diff, ulen;
-
- if (uniq) {
- ulen = strlen(uniq);
- } else if (mntdata) {
- struct nfs_server *mnt_s = NFS_SB(mntdata->sb);
- if (mnt_s->fscache_key) {
- uniq = mnt_s->fscache_key->key.uniquifier;
- ulen = mnt_s->fscache_key->key.uniq_len;
- }
- }
+ int diff;
if (!uniq) {
uniq = "";
@@ -327,7 +316,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_server *nfss = NFS_SERVER(inode);
- struct fscache_cookie *old = nfsi->fscache;
+ NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
nfs_fscache_inode_lock(inode);
if (nfsi->fscache) {
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index b9c572d0679..c5b11b53ff3 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -73,9 +73,7 @@ extern void nfs_fscache_unregister(void);
extern void nfs_fscache_get_client_cookie(struct nfs_client *);
extern void nfs_fscache_release_client_cookie(struct nfs_client *);
-extern void nfs_fscache_get_super_cookie(struct super_block *,
- const char *,
- struct nfs_clone_mount *);
+extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode_cookie(struct inode *);
@@ -172,12 +170,6 @@ static inline void nfs_fscache_unregister(void) {}
static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
-static inline void nfs_fscache_get_super_cookie(
- struct super_block *sb,
- const char *uniq,
- struct nfs_clone_mount *mntdata)
-{
-}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index dcb61548887..8abfb19bd3a 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -32,7 +32,6 @@
#include <linux/namei.h>
#include <linux/security.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "nfs4_fs.h"
@@ -49,11 +48,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
{
/* The mntroot acts as the dummy root dentry for this superblock */
if (sb->s_root == NULL) {
- sb->s_root = d_alloc_root(inode);
- if (sb->s_root == NULL) {
- iput(inode);
+ sb->s_root = d_make_root(inode);
+ if (sb->s_root == NULL)
return -ENOMEM;
- }
ihold(inode);
/*
* Ensure that this dentry is invisible to d_find_alias().
@@ -153,7 +150,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
/* Start by getting the root filehandle from the server */
- ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
@@ -181,87 +178,4 @@ out:
return ret;
}
-/*
- * get an NFS4 root dentry from the root filehandle
- */
-struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
- const char *devname)
-{
- struct nfs_server *server = NFS_SB(sb);
- struct nfs_fattr *fattr = NULL;
- struct dentry *ret;
- struct inode *inode;
- void *name = kstrdup(devname, GFP_KERNEL);
- int error;
-
- dprintk("--> nfs4_get_root()\n");
-
- if (!name)
- return ERR_PTR(-ENOMEM);
-
- /* get the info about the server and filesystem */
- error = nfs4_server_capabilities(server, mntfh);
- if (error < 0) {
- dprintk("nfs_get_root: getcaps error = %d\n",
- -error);
- kfree(name);
- return ERR_PTR(error);
- }
-
- fattr = nfs_alloc_fattr();
- if (fattr == NULL) {
- kfree(name);
- return ERR_PTR(-ENOMEM);
- }
-
- /* get the actual root for this mount */
- error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
- if (error < 0) {
- dprintk("nfs_get_root: getattr error = %d\n", -error);
- ret = ERR_PTR(error);
- goto out;
- }
-
- if (fattr->valid & NFS_ATTR_FATTR_FSID &&
- !nfs_fsid_equal(&server->fsid, &fattr->fsid))
- memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
-
- inode = nfs_fhget(sb, mntfh, fattr);
- if (IS_ERR(inode)) {
- dprintk("nfs_get_root: get root inode failed\n");
- ret = ERR_CAST(inode);
- goto out;
- }
-
- error = nfs_superblock_set_dummy_root(sb, inode);
- if (error != 0) {
- ret = ERR_PTR(error);
- goto out;
- }
-
- /* root dentries normally start off anonymous and get spliced in later
- * if the dentry tree reaches them; however if the dentry already
- * exists, we'll pick it up at this point and use it as the root
- */
- ret = d_obtain_alias(inode);
- if (IS_ERR(ret)) {
- dprintk("nfs_get_root: get root dentry failed\n");
- goto out;
- }
-
- security_d_instantiate(ret, inode);
- spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
- ret->d_fsdata = name;
- name = NULL;
- }
- spin_unlock(&ret->d_lock);
-out:
- if (name)
- kfree(name);
- nfs_free_fattr(fattr);
- dprintk("<-- nfs4_get_root()\n");
- return ret;
-}
-
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 47d1c6ff2d8..864c51e4b40 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,10 +34,116 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/fs.h>
#include <linux/nfs_idmap.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <linux/module.h>
+
+#include "internal.h"
+#include "netns.h"
+
+#define NFS_UINT_MAXLEN 11
+
+/* Default cache timeout is 10 minutes */
+unsigned int nfs_idmap_cache_timeout = 600;
+static const struct cred *id_resolver_cache;
+static struct key_type key_type_id_resolver_legacy;
+
+struct idmap {
+ struct rpc_pipe *idmap_pipe;
+ struct key_construction *idmap_key_cons;
+ struct mutex idmap_mutex;
+};
+
+/**
+ * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
+ * @fattr: fully initialised struct nfs_fattr
+ * @owner_name: owner name string cache
+ * @group_name: group name string cache
+ */
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+ struct nfs4_string *owner_name,
+ struct nfs4_string *group_name)
+{
+ fattr->owner_name = owner_name;
+ fattr->group_name = group_name;
+}
+
+static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
+{
+ fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
+ kfree(fattr->owner_name->data);
+}
+
+static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
+{
+ fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
+ kfree(fattr->group_name->data);
+}
+
+static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+ struct nfs4_string *owner = fattr->owner_name;
+ __u32 uid;
+
+ if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
+ return false;
+ if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
+ fattr->uid = uid;
+ fattr->valid |= NFS_ATTR_FATTR_OWNER;
+ }
+ return true;
+}
+
+static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+ struct nfs4_string *group = fattr->group_name;
+ __u32 gid;
+
+ if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
+ return false;
+ if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
+ fattr->gid = gid;
+ fattr->valid |= NFS_ATTR_FATTR_GROUP;
+ }
+ return true;
+}
+
+/**
+ * nfs_fattr_free_names - free up the NFSv4 owner and group strings
+ * @fattr: a fully initialised nfs_fattr structure
+ */
+void nfs_fattr_free_names(struct nfs_fattr *fattr)
+{
+ if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
+ nfs_fattr_free_owner_name(fattr);
+ if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
+ nfs_fattr_free_group_name(fattr);
+}
+
+/**
+ * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
+ * @server: pointer to the filesystem nfs_server structure
+ * @fattr: a fully initialised nfs_fattr structure
+ *
+ * This helper maps the cached NFSv4 owner/group strings in fattr into
+ * their numeric uid/gid equivalents, and then frees the cached strings.
+ */
+void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+ if (nfs_fattr_map_owner_name(server, fattr))
+ nfs_fattr_free_owner_name(fattr);
+ if (nfs_fattr_map_group_name(server, fattr))
+ nfs_fattr_free_group_name(fattr);
+}
static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
{
@@ -59,24 +165,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
return snprintf(buf, buflen, "%u", id);
}
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
-
-#include <linux/cred.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/nfs4.h>
-#include <linux/nfs_fs_sb.h>
-#include <linux/keyctl.h>
-#include <linux/key-type.h>
-#include <linux/rcupdate.h>
-#include <linux/err.h>
-
-#include <keys/user-type.h>
-
-#define NFS_UINT_MAXLEN 11
-
-const struct cred *id_resolver_cache;
-
-struct key_type key_type_id_resolver = {
+static struct key_type key_type_id_resolver = {
.name = "id_resolver",
.instantiate = user_instantiate,
.match = user_match,
@@ -86,13 +175,14 @@ struct key_type key_type_id_resolver = {
.read = user_read,
};
-int nfs_idmap_init(void)
+static int nfs_idmap_init_keyring(void)
{
struct cred *cred;
struct key *keyring;
int ret = 0;
- printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
+ printk(KERN_NOTICE "NFS: Registering the %s key type\n",
+ key_type_id_resolver.name);
cred = prepare_kernel_cred(NULL);
if (!cred)
@@ -115,6 +205,7 @@ int nfs_idmap_init(void)
if (ret < 0)
goto failed_put_key;
+ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
cred->thread_keyring = keyring;
cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
id_resolver_cache = cred;
@@ -127,7 +218,7 @@ failed_put_cred:
return ret;
}
-void nfs_idmap_quit(void)
+static void nfs_idmap_quit_keyring(void)
{
key_revoke(id_resolver_cache->thread_keyring);
unregister_key_type(&key_type_id_resolver);
@@ -162,8 +253,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
return desclen;
}
-static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
- const char *type, void *data, size_t data_size)
+static ssize_t nfs_idmap_request_key(struct key_type *key_type,
+ const char *name, size_t namelen,
+ const char *type, void *data,
+ size_t data_size, struct idmap *idmap)
{
const struct cred *saved_cred;
struct key *rkey;
@@ -176,8 +269,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
goto out;
saved_cred = override_creds(id_resolver_cache);
- rkey = request_key(&key_type_id_resolver, desc, "");
+ if (idmap)
+ rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap);
+ else
+ rkey = request_key(&key_type_id_resolver, desc, "");
revert_creds(saved_cred);
+
kfree(desc);
if (IS_ERR(rkey)) {
ret = PTR_ERR(rkey);
@@ -210,31 +307,48 @@ out:
return ret;
}
+static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
+ const char *type, void *data,
+ size_t data_size, struct idmap *idmap)
+{
+ ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver,
+ name, namelen, type, data,
+ data_size, NULL);
+ if (ret < 0) {
+ mutex_lock(&idmap->idmap_mutex);
+ ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
+ name, namelen, type, data,
+ data_size, idmap);
+ mutex_unlock(&idmap->idmap_mutex);
+ }
+ return ret;
+}
/* ID -> Name */
-static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
+ size_t buflen, struct idmap *idmap)
{
char id_str[NFS_UINT_MAXLEN];
int id_len;
ssize_t ret;
id_len = snprintf(id_str, sizeof(id_str), "%u", id);
- ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
+ ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
if (ret < 0)
return -EINVAL;
return ret;
}
/* Name -> ID */
-static int nfs_idmap_lookup_id(const char *name, size_t namelen,
- const char *type, __u32 *id)
+static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
+ __u32 *id, struct idmap *idmap)
{
char id_str[NFS_UINT_MAXLEN];
long id_long;
ssize_t data_size;
int ret = 0;
- data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
+ data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
if (data_size <= 0) {
ret = -EINVAL;
} else {
@@ -244,114 +358,98 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
return ret;
}
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
-{
- if (nfs_map_string_to_numeric(name, namelen, uid))
- return 0;
- return nfs_idmap_lookup_id(name, namelen, "uid", uid);
-}
-
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
-{
- if (nfs_map_string_to_numeric(name, namelen, gid))
- return 0;
- return nfs_idmap_lookup_id(name, namelen, "gid", gid);
-}
-
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
-{
- int ret = -EINVAL;
-
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
- if (ret < 0)
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
- return ret;
-}
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
-{
- int ret = -EINVAL;
-
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
- if (ret < 0)
- ret = nfs_map_numeric_to_string(gid, buf, buflen);
- return ret;
-}
-
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
+/* idmap classic begins here */
+module_param(nfs_idmap_cache_timeout, int, 0644);
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/init.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/sched.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs_fs.h>
-
-#include "nfs4_fs.h"
-
-#define IDMAP_HASH_SZ 128
-
-/* Default cache timeout is 10 minutes */
-unsigned int nfs_idmap_cache_timeout = 600 * HZ;
-
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
- char *endp;
- int num = simple_strtol(val, &endp, 0);
- int jif = num * HZ;
- if (endp == val || *endp || num < 0 || jif < num)
- return -EINVAL;
- *((int *)kp->arg) = jif;
- return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
- &nfs_idmap_cache_timeout, 0644);
-
-struct idmap_hashent {
- unsigned long ih_expires;
- __u32 ih_id;
- size_t ih_namelen;
- char ih_name[IDMAP_NAMESZ];
+enum {
+ Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
};
-struct idmap_hashtable {
- __u8 h_type;
- struct idmap_hashent h_entries[IDMAP_HASH_SZ];
-};
-
-struct idmap {
- struct dentry *idmap_dentry;
- wait_queue_head_t idmap_wq;
- struct idmap_msg idmap_im;
- struct mutex idmap_lock; /* Serializes upcalls */
- struct mutex idmap_im_lock; /* Protects the hashtable */
- struct idmap_hashtable idmap_user_hash;
- struct idmap_hashtable idmap_group_hash;
+static const match_table_t nfs_idmap_tokens = {
+ { Opt_find_uid, "uid:%s" },
+ { Opt_find_gid, "gid:%s" },
+ { Opt_find_user, "user:%s" },
+ { Opt_find_group, "group:%s" },
+ { Opt_find_err, NULL }
};
+static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
size_t);
static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
-static unsigned int fnvhash32(const void *, size_t);
-
static const struct rpc_pipe_ops idmap_upcall_ops = {
.upcall = rpc_pipe_generic_upcall,
.downcall = idmap_pipe_downcall,
.destroy_msg = idmap_pipe_destroy_msg,
};
+static struct key_type key_type_id_resolver_legacy = {
+ .name = "id_resolver",
+ .instantiate = user_instantiate,
+ .match = user_match,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = user_describe,
+ .read = user_read,
+ .request_key = nfs_idmap_legacy_upcall,
+};
+
+static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
+{
+ if (pipe->dentry)
+ rpc_unlink(pipe->dentry);
+}
+
+static int __nfs_idmap_register(struct dentry *dir,
+ struct idmap *idmap,
+ struct rpc_pipe *pipe)
+{
+ struct dentry *dentry;
+
+ dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ pipe->dentry = dentry;
+ return 0;
+}
+
+static void nfs_idmap_unregister(struct nfs_client *clp,
+ struct rpc_pipe *pipe)
+{
+ struct net *net = clp->cl_net;
+ struct super_block *pipefs_sb;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ __nfs_idmap_unregister(pipe);
+ rpc_put_sb_net(net);
+ }
+}
+
+static int nfs_idmap_register(struct nfs_client *clp,
+ struct idmap *idmap,
+ struct rpc_pipe *pipe)
+{
+ struct net *net = clp->cl_net;
+ struct super_block *pipefs_sb;
+ int err = 0;
+
+ pipefs_sb = rpc_get_sb_net(net);
+ if (pipefs_sb) {
+ if (clp->cl_rpcclient->cl_dentry)
+ err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
+ idmap, pipe);
+ rpc_put_sb_net(net);
+ }
+ return err;
+}
+
int
nfs_idmap_new(struct nfs_client *clp)
{
struct idmap *idmap;
+ struct rpc_pipe *pipe;
int error;
BUG_ON(clp->cl_idmap != NULL);
@@ -360,19 +458,20 @@ nfs_idmap_new(struct nfs_client *clp)
if (idmap == NULL)
return -ENOMEM;
- idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
- "idmap", idmap, &idmap_upcall_ops, 0);
- if (IS_ERR(idmap->idmap_dentry)) {
- error = PTR_ERR(idmap->idmap_dentry);
+ pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
+ if (IS_ERR(pipe)) {
+ error = PTR_ERR(pipe);
kfree(idmap);
return error;
}
-
- mutex_init(&idmap->idmap_lock);
- mutex_init(&idmap->idmap_im_lock);
- init_waitqueue_head(&idmap->idmap_wq);
- idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
- idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
+ error = nfs_idmap_register(clp, idmap, pipe);
+ if (error) {
+ rpc_destroy_pipe_data(pipe);
+ kfree(idmap);
+ return error;
+ }
+ idmap->idmap_pipe = pipe;
+ mutex_init(&idmap->idmap_mutex);
clp->cl_idmap = idmap;
return 0;
@@ -385,211 +484,236 @@ nfs_idmap_delete(struct nfs_client *clp)
if (!idmap)
return;
- rpc_unlink(idmap->idmap_dentry);
+ nfs_idmap_unregister(clp, idmap->idmap_pipe);
+ rpc_destroy_pipe_data(idmap->idmap_pipe);
clp->cl_idmap = NULL;
kfree(idmap);
}
-/*
- * Helper routines for manipulating the hashtable
- */
-static inline struct idmap_hashent *
-idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
+static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
+ struct super_block *sb)
{
- return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
+ int err = 0;
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ BUG_ON(clp->cl_rpcclient->cl_dentry == NULL);
+ err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
+ clp->cl_idmap,
+ clp->cl_idmap->idmap_pipe);
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (clp->cl_idmap->idmap_pipe) {
+ struct dentry *parent;
+
+ parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
+ __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
+ /*
+ * Note: This is a dirty hack. SUNRPC hook has been
+ * called already but simple_rmdir() call for the
+ * directory returned with error because of idmap pipe
+ * inside. Thus now we have to remove this directory
+ * here.
+ */
+ if (rpc_rmdir(parent))
+ printk(KERN_ERR "NFS: %s: failed to remove "
+ "clnt dir!\n", __func__);
+ }
+ break;
+ default:
+ printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
+ event);
+ return -ENOTSUPP;
+ }
+ return err;
}
-static struct idmap_hashent *
-idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
{
- struct idmap_hashent *he = idmap_name_hash(h, name, len);
-
- if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
- return NULL;
- if (time_after(jiffies, he->ih_expires))
- return NULL;
- return he;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct dentry *cl_dentry;
+ struct nfs_client *clp;
+ int err;
+
+restart:
+ spin_lock(&nn->nfs_client_lock);
+ list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
+ /* Wait for initialisation to finish */
+ if (clp->cl_cons_state == NFS_CS_INITING) {
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+ err = nfs_wait_client_init_complete(clp);
+ nfs_put_client(clp);
+ if (err)
+ return NULL;
+ goto restart;
+ }
+ /* Skip nfs_clients that failed to initialise */
+ if (clp->cl_cons_state < 0)
+ continue;
+ smp_rmb();
+ if (clp->rpc_ops != &nfs_v4_clientops)
+ continue;
+ cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
+ if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
+ ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
+ continue;
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nn->nfs_client_lock);
+ return NULL;
}
-static inline struct idmap_hashent *
-idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
{
- return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
-}
+ struct super_block *sb = ptr;
+ struct nfs_client *clp;
+ int error = 0;
-static struct idmap_hashent *
-idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
-{
- struct idmap_hashent *he = idmap_id_hash(h, id);
- if (he->ih_id != id || he->ih_namelen == 0)
- return NULL;
- if (time_after(jiffies, he->ih_expires))
- return NULL;
- return he;
-}
+ if (!try_module_get(THIS_MODULE))
+ return 0;
-/*
- * Routines for allocating new entries in the hashtable.
- * For now, we just have 1 entry per bucket, so it's all
- * pretty trivial.
- */
-static inline struct idmap_hashent *
-idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
-{
- return idmap_name_hash(h, name, len);
+ while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
+ error = __rpc_pipefs_event(clp, event, sb);
+ nfs_put_client(clp);
+ if (error)
+ break;
+ }
+ module_put(THIS_MODULE);
+ return error;
}
-static inline struct idmap_hashent *
-idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+#define PIPEFS_NFS_PRIO 1
+
+static struct notifier_block nfs_idmap_block = {
+ .notifier_call = rpc_pipefs_event,
+ .priority = SUNRPC_PIPEFS_NFS_PRIO,
+};
+
+int nfs_idmap_init(void)
{
- return idmap_id_hash(h, id);
+ int ret;
+ ret = nfs_idmap_init_keyring();
+ if (ret != 0)
+ goto out;
+ ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
+ if (ret != 0)
+ nfs_idmap_quit_keyring();
+out:
+ return ret;
}
-static void
-idmap_update_entry(struct idmap_hashent *he, const char *name,
- size_t namelen, __u32 id)
+void nfs_idmap_quit(void)
{
- he->ih_id = id;
- memcpy(he->ih_name, name, namelen);
- he->ih_name[namelen] = '\0';
- he->ih_namelen = namelen;
- he->ih_expires = jiffies + nfs_idmap_cache_timeout;
+ rpc_pipefs_notifier_unregister(&nfs_idmap_block);
+ nfs_idmap_quit_keyring();
}
-/*
- * Name -> ID
- */
-static int
-nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
- const char *name, size_t namelen, __u32 *id)
+static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im,
+ struct rpc_pipe_msg *msg)
{
- struct rpc_pipe_msg msg;
- struct idmap_msg *im;
- struct idmap_hashent *he;
- DECLARE_WAITQUEUE(wq, current);
- int ret = -EIO;
-
- im = &idmap->idmap_im;
-
- /*
- * String sanity checks
- * Note that the userland daemon expects NUL terminated strings
- */
- for (;;) {
- if (namelen == 0)
- return -EINVAL;
- if (name[namelen-1] != '\0')
- break;
- namelen--;
- }
- if (namelen >= IDMAP_NAMESZ)
- return -EINVAL;
-
- mutex_lock(&idmap->idmap_lock);
- mutex_lock(&idmap->idmap_im_lock);
+ substring_t substr;
+ int token, ret;
- he = idmap_lookup_name(h, name, namelen);
- if (he != NULL) {
- *id = he->ih_id;
- ret = 0;
- goto out;
- }
+ memset(im, 0, sizeof(*im));
+ memset(msg, 0, sizeof(*msg));
- memset(im, 0, sizeof(*im));
- memcpy(im->im_name, name, namelen);
+ im->im_type = IDMAP_TYPE_GROUP;
+ token = match_token(desc, nfs_idmap_tokens, &substr);
- im->im_type = h->h_type;
- im->im_conv = IDMAP_CONV_NAMETOID;
+ switch (token) {
+ case Opt_find_uid:
+ im->im_type = IDMAP_TYPE_USER;
+ case Opt_find_gid:
+ im->im_conv = IDMAP_CONV_NAMETOID;
+ ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
+ break;
- memset(&msg, 0, sizeof(msg));
- msg.data = im;
- msg.len = sizeof(*im);
+ case Opt_find_user:
+ im->im_type = IDMAP_TYPE_USER;
+ case Opt_find_group:
+ im->im_conv = IDMAP_CONV_IDTONAME;
+ ret = match_int(&substr, &im->im_id);
+ break;
- add_wait_queue(&idmap->idmap_wq, &wq);
- if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
- remove_wait_queue(&idmap->idmap_wq, &wq);
+ default:
+ ret = -EINVAL;
goto out;
}
- set_current_state(TASK_UNINTERRUPTIBLE);
- mutex_unlock(&idmap->idmap_im_lock);
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&idmap->idmap_wq, &wq);
- mutex_lock(&idmap->idmap_im_lock);
-
- if (im->im_status & IDMAP_STATUS_SUCCESS) {
- *id = im->im_id;
- ret = 0;
- }
+ msg->data = im;
+ msg->len = sizeof(struct idmap_msg);
- out:
- memset(im, 0, sizeof(*im));
- mutex_unlock(&idmap->idmap_im_lock);
- mutex_unlock(&idmap->idmap_lock);
+out:
return ret;
}
-/*
- * ID -> Name
- */
-static int
-nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
- __u32 id, char *name)
+static int nfs_idmap_legacy_upcall(struct key_construction *cons,
+ const char *op,
+ void *aux)
{
- struct rpc_pipe_msg msg;
+ struct rpc_pipe_msg *msg;
struct idmap_msg *im;
- struct idmap_hashent *he;
- DECLARE_WAITQUEUE(wq, current);
- int ret = -EIO;
- unsigned int len;
+ struct idmap *idmap = (struct idmap *)aux;
+ struct key *key = cons->key;
+ int ret = -ENOMEM;
- im = &idmap->idmap_im;
+ /* msg and im are freed in idmap_pipe_destroy_msg */
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ goto out0;
- mutex_lock(&idmap->idmap_lock);
- mutex_lock(&idmap->idmap_im_lock);
+ im = kmalloc(sizeof(*im), GFP_KERNEL);
+ if (!im)
+ goto out1;
- he = idmap_lookup_id(h, id);
- if (he) {
- memcpy(name, he->ih_name, he->ih_namelen);
- ret = he->ih_namelen;
- goto out;
- }
+ ret = nfs_idmap_prepare_message(key->description, im, msg);
+ if (ret < 0)
+ goto out2;
- memset(im, 0, sizeof(*im));
- im->im_type = h->h_type;
- im->im_conv = IDMAP_CONV_IDTONAME;
- im->im_id = id;
+ idmap->idmap_key_cons = cons;
- memset(&msg, 0, sizeof(msg));
- msg.data = im;
- msg.len = sizeof(*im);
+ ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
+ if (ret < 0)
+ goto out2;
- add_wait_queue(&idmap->idmap_wq, &wq);
+ return ret;
- if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
- remove_wait_queue(&idmap->idmap_wq, &wq);
- goto out;
- }
+out2:
+ kfree(im);
+out1:
+ kfree(msg);
+out0:
+ key_revoke(cons->key);
+ key_revoke(cons->authkey);
+ return ret;
+}
- set_current_state(TASK_UNINTERRUPTIBLE);
- mutex_unlock(&idmap->idmap_im_lock);
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&idmap->idmap_wq, &wq);
- mutex_lock(&idmap->idmap_im_lock);
-
- if (im->im_status & IDMAP_STATUS_SUCCESS) {
- if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
- goto out;
- memcpy(name, im->im_name, len);
- ret = len;
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+{
+ return key_instantiate_and_link(key, data, strlen(data) + 1,
+ id_resolver_cache->thread_keyring,
+ authkey);
+}
+
+static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey)
+{
+ char id_str[NFS_UINT_MAXLEN];
+ int ret = -EINVAL;
+
+ switch (im->im_conv) {
+ case IDMAP_CONV_NAMETOID:
+ sprintf(id_str, "%d", im->im_id);
+ ret = nfs_idmap_instantiate(key, authkey, id_str);
+ break;
+ case IDMAP_CONV_IDTONAME:
+ ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+ break;
}
- out:
- memset(im, 0, sizeof(*im));
- mutex_unlock(&idmap->idmap_im_lock);
- mutex_unlock(&idmap->idmap_lock);
return ret;
}
@@ -598,115 +722,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
struct idmap *idmap = (struct idmap *)rpci->private;
- struct idmap_msg im_in, *im = &idmap->idmap_im;
- struct idmap_hashtable *h;
- struct idmap_hashent *he = NULL;
+ struct key_construction *cons = idmap->idmap_key_cons;
+ struct idmap_msg im;
size_t namelen_in;
int ret;
- if (mlen != sizeof(im_in))
- return -ENOSPC;
-
- if (copy_from_user(&im_in, src, mlen) != 0)
- return -EFAULT;
-
- mutex_lock(&idmap->idmap_im_lock);
-
- ret = mlen;
- im->im_status = im_in.im_status;
- /* If we got an error, terminate now, and wake up pending upcalls */
- if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
- wake_up(&idmap->idmap_wq);
+ if (mlen != sizeof(im)) {
+ ret = -ENOSPC;
goto out;
}
- /* Sanity checking of strings */
- ret = -EINVAL;
- namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
- if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
+ if (copy_from_user(&im, src, mlen) != 0) {
+ ret = -EFAULT;
goto out;
+ }
- switch (im_in.im_type) {
- case IDMAP_TYPE_USER:
- h = &idmap->idmap_user_hash;
- break;
- case IDMAP_TYPE_GROUP:
- h = &idmap->idmap_group_hash;
- break;
- default:
- goto out;
+ if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
+ ret = mlen;
+ complete_request_key(idmap->idmap_key_cons, -ENOKEY);
+ goto out_incomplete;
}
- switch (im_in.im_conv) {
- case IDMAP_CONV_IDTONAME:
- /* Did we match the current upcall? */
- if (im->im_conv == IDMAP_CONV_IDTONAME
- && im->im_type == im_in.im_type
- && im->im_id == im_in.im_id) {
- /* Yes: copy string, including the terminating '\0' */
- memcpy(im->im_name, im_in.im_name, namelen_in);
- im->im_name[namelen_in] = '\0';
- wake_up(&idmap->idmap_wq);
- }
- he = idmap_alloc_id(h, im_in.im_id);
- break;
- case IDMAP_CONV_NAMETOID:
- /* Did we match the current upcall? */
- if (im->im_conv == IDMAP_CONV_NAMETOID
- && im->im_type == im_in.im_type
- && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
- && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
- im->im_id = im_in.im_id;
- wake_up(&idmap->idmap_wq);
- }
- he = idmap_alloc_name(h, im_in.im_name, namelen_in);
- break;
- default:
+ namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
+ if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
+ ret = -EINVAL;
goto out;
}
- /* If the entry is valid, also copy it to the cache */
- if (he != NULL)
- idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
- ret = mlen;
+ ret = nfs_idmap_read_message(&im, cons->key, cons->authkey);
+ if (ret >= 0) {
+ key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+ ret = mlen;
+ }
+
out:
- mutex_unlock(&idmap->idmap_im_lock);
+ complete_request_key(idmap->idmap_key_cons, ret);
+out_incomplete:
return ret;
}
static void
idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
- struct idmap_msg *im = msg->data;
- struct idmap *idmap = container_of(im, struct idmap, idmap_im);
-
- if (msg->errno >= 0)
- return;
- mutex_lock(&idmap->idmap_im_lock);
- im->im_status = IDMAP_STATUS_LOOKUPFAIL;
- wake_up(&idmap->idmap_wq);
- mutex_unlock(&idmap->idmap_im_lock);
-}
-
-/*
- * Fowler/Noll/Vo hash
- * http://www.isthe.com/chongo/tech/comp/fnv/
- */
-
-#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
-#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
-
-static unsigned int fnvhash32(const void *buf, size_t buflen)
-{
- const unsigned char *p, *end = (const unsigned char *)buf + buflen;
- unsigned int hash = FNV_1_32;
-
- for (p = buf; p < end; p++) {
- hash *= FNV_P_32;
- hash ^= (unsigned int)*p;
- }
-
- return hash;
+ /* Free memory allocated in nfs_idmap_legacy_upcall() */
+ kfree(msg->data);
+ kfree(msg);
}
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
@@ -715,16 +775,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
if (nfs_map_string_to_numeric(name, namelen, uid))
return 0;
- return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
+ return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
}
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
- if (nfs_map_string_to_numeric(name, namelen, uid))
+ if (nfs_map_string_to_numeric(name, namelen, gid))
return 0;
- return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
+ return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
}
int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
@@ -733,21 +793,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s
int ret = -EINVAL;
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+ ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
if (ret < 0)
ret = nfs_map_numeric_to_string(uid, buf, buflen);
return ret;
}
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+ ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
if (ret < 0)
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
+ ret = nfs_map_numeric_to_string(gid, buf, buflen);
return ret;
}
-
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf9..f7296983eba 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,8 +38,9 @@
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
#include <linux/compat.h>
+#include <linux/freezer.h>
+#include <linux/crc32.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "nfs4_fs.h"
@@ -50,13 +51,14 @@
#include "fscache.h"
#include "dns_resolve.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
/* Default is to see 64-bit inode numbers */
-static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
static void nfs_invalidate_inode(struct inode *);
static int nfs_update_inode(struct inode *, struct nfs_fattr *);
@@ -77,7 +79,7 @@ int nfs_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- schedule();
+ freezable_schedule();
return 0;
}
@@ -119,7 +121,7 @@ static void nfs_clear_inode(struct inode *inode)
void nfs_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
nfs_clear_inode(inode);
}
@@ -283,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_mode = fattr->mode;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -298,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
inode->i_data.a_ops = &nfs_dir_aops;
- if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
- set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
/* Deal with crossing mountpoints */
if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -325,6 +323,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_gid = -2;
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
@@ -335,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA
| NFS_INO_REVAL_PAGECACHE;
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
@@ -361,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -387,9 +378,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
- dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
+ dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
+ nfs_display_fhandle_hash(fh),
atomic_read(&inode->i_count));
out:
@@ -400,7 +392,7 @@ out_no_inode:
goto out;
}
-#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE)
+#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
nfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -422,12 +414,14 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
/* Optimization: if the end result is no change, don't RPC */
attr->ia_valid &= NFS_VALID_ATTRS;
- if ((attr->ia_valid & ~ATTR_FILE) == 0)
+ if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
/* Write all dirty data */
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode)) {
+ nfs_inode_dio_wait(inode);
nfs_wb_all(inode);
+ }
fattr = nfs_alloc_fattr();
if (fattr == NULL)
@@ -511,6 +505,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
+ nfs_inode_dio_wait(inode);
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto out;
@@ -629,23 +624,29 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
nfs_revalidate_inode(server, inode);
}
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
{
struct nfs_open_context *ctx;
+ struct rpc_cred *cred = rpc_lookup_cred();
+ if (IS_ERR(cred))
+ return ERR_CAST(cred);
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (ctx != NULL) {
- nfs_sb_active(dentry->d_sb);
- ctx->dentry = dget(dentry);
- ctx->cred = get_rpccred(cred);
- ctx->state = NULL;
- ctx->mode = f_mode;
- ctx->flags = 0;
- ctx->error = 0;
- nfs_init_lock_context(&ctx->lock_context);
- ctx->lock_context.open_context = ctx;
- INIT_LIST_HEAD(&ctx->list);
+ if (!ctx) {
+ put_rpccred(cred);
+ return ERR_PTR(-ENOMEM);
}
+ nfs_sb_active(dentry->d_sb);
+ ctx->dentry = dget(dentry);
+ ctx->cred = cred;
+ ctx->state = NULL;
+ ctx->mode = f_mode;
+ ctx->flags = 0;
+ ctx->error = 0;
+ nfs_init_lock_context(&ctx->lock_context);
+ ctx->lock_context.open_context = ctx;
+ INIT_LIST_HEAD(&ctx->list);
+ ctx->mdsthreshold = NULL;
return ctx;
}
@@ -674,6 +675,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
put_rpccred(ctx->cred);
dput(ctx->dentry);
nfs_sb_deactive(sb);
+ kfree(ctx->mdsthreshold);
kfree(ctx);
}
@@ -738,15 +740,10 @@ static void nfs_file_clear_open_context(struct file *filp)
int nfs_open(struct inode *inode, struct file *filp)
{
struct nfs_open_context *ctx;
- struct rpc_cred *cred;
- cred = rpc_lookup_cred();
- if (IS_ERR(cred))
- return PTR_ERR(cred);
- ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
- put_rpccred(cred);
- if (ctx == NULL)
- return -ENOMEM;
+ ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
nfs_file_set_open_context(filp, ctx);
put_nfs_open_context(ctx);
nfs_fscache_set_inode_cookie(inode, filp);
@@ -867,6 +864,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
return 0;
}
+static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+ if (nfs_have_delegated_attributes(inode))
+ return false;
+ return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ || nfs_attribute_timeout(inode)
+ || NFS_STALE(inode);
+}
+
/**
* nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
@@ -877,9 +883,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;
- if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
- || nfs_attribute_cache_expired(inode)
- || NFS_STALE(inode)) {
+ if (nfs_mapping_need_revalidate_inode(inode)) {
ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (ret < 0)
goto out;
@@ -945,6 +949,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
unsigned long invalid = 0;
+ if (nfs_have_delegated_attributes(inode))
+ return 0;
/* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
return -EIO;
@@ -957,7 +963,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode);
@@ -1019,6 +1025,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->valid = 0;
fattr->time_start = jiffies;
fattr->gencount = nfs_inc_attr_generation_counter();
+ fattr->owner_name = NULL;
+ fattr->group_name = NULL;
}
struct nfs_fattr *nfs_alloc_fattr(void)
@@ -1041,6 +1049,67 @@ struct nfs_fh *nfs_alloc_fhandle(void)
return fh;
}
+#ifdef NFS_DEBUG
+/*
+ * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle
+ * in the same way that wireshark does
+ *
+ * @fh: file handle
+ *
+ * For debugging only.
+ */
+u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
+{
+ /* wireshark uses 32-bit AUTODIN crc and does a bitwise
+ * not on the result */
+ return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
+}
+
+/*
+ * _nfs_display_fhandle - display an NFS file handle on the console
+ *
+ * @fh: file handle to display
+ * @caption: display caption
+ *
+ * For debugging only.
+ */
+void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
+{
+ unsigned short i;
+
+ if (fh == NULL || fh->size == 0) {
+ printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh);
+ return;
+ }
+
+ printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n",
+ caption, fh, fh->size, _nfs_display_fhandle_hash(fh));
+ for (i = 0; i < fh->size; i += 16) {
+ __be32 *pos = (__be32 *)&fh->data[i];
+
+ switch ((fh->size - i - 1) >> 2) {
+ case 0:
+ printk(KERN_DEFAULT " %08x\n",
+ be32_to_cpup(pos));
+ break;
+ case 1:
+ printk(KERN_DEFAULT " %08x %08x\n",
+ be32_to_cpup(pos), be32_to_cpup(pos + 1));
+ break;
+ case 2:
+ printk(KERN_DEFAULT " %08x %08x %08x\n",
+ be32_to_cpup(pos), be32_to_cpup(pos + 1),
+ be32_to_cpup(pos + 2));
+ break;
+ default:
+ printk(KERN_DEFAULT " %08x %08x %08x %08x\n",
+ be32_to_cpup(pos), be32_to_cpup(pos + 1),
+ be32_to_cpup(pos + 2), be32_to_cpup(pos + 3));
+ }
+ }
+}
+#endif
+
/**
* nfs_inode_attrs_need_update - check if the inode attributes need updating
* @inode - pointer to inode
@@ -1208,18 +1277,31 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long now = jiffies;
unsigned long save_cache_validity;
- dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+ dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
+ nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
- if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
- goto out_fileid;
+ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
+ printk(KERN_ERR "NFS: server %s error: fileid changed\n"
+ "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
+ inode->i_sb->s_id, (long long)nfsi->fileid,
+ (long long)fattr->fileid);
+ goto out_err;
+ }
/*
* Make sure the inode's type hasn't changed.
*/
- if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
- goto out_changed;
+ if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+ /*
+ * Big trouble! The inode has become a different object.
+ */
+ printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
+ __func__, inode->i_ino, inode->i_mode, fattr->mode);
+ goto out_err;
+ }
server = NFS_SERVER(inode);
/* Update the fsid? */
@@ -1247,7 +1329,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (inode->i_version != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ invalid |= NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_PAGECACHE;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
inode->i_version = fattr->change_attr;
@@ -1256,38 +1342,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
- /* NFSv2/v3: Check if the mtime agrees */
- if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
- dprintk("NFS: mtime change on server for file %s/%ld\n",
- inode->i_sb->s_id, inode->i_ino);
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode))
- nfs_force_lookup_revalidate(inode);
- memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- }
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA
- | NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
- /* If ctime has changed we should definitely clear access+acl caches */
- if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
- invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- /* and probably clear data for a directory too as utimes can cause
- * havoc with our cache.
- */
- if (S_ISDIR(inode->i_mode)) {
- invalid |= NFS_INO_INVALID_DATA;
- nfs_force_lookup_revalidate(inode);
- }
- memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- }
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1399,12 +1462,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity |= invalid;
return 0;
- out_changed:
- /*
- * Big trouble! The inode has become a different object.
- */
- printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
- __func__, inode->i_ino, inode->i_mode, fattr->mode);
out_err:
/*
* No need to worry about unhashing the dentry, as the
@@ -1413,13 +1470,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfs_invalidate_inode(inode);
return -ESTALE;
-
- out_fileid:
- printk(KERN_ERR "NFS: server %s error: fileid changed\n"
- "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
- NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
- (long long)nfsi->fileid, (long long)fattr->fileid);
- goto out_err;
}
@@ -1433,7 +1483,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
void nfs4_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
/* If we are holding a delegation, return it! */
@@ -1464,7 +1514,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
static void nfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
@@ -1481,7 +1530,6 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
nfsi->layout = NULL;
- atomic_set(&nfsi->commits_outstanding, 0);
#endif
}
@@ -1493,9 +1541,10 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->open_files);
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
- INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+ INIT_LIST_HEAD(&nfsi->commit_info.list);
nfsi->npages = 0;
- nfsi->ncommit = 0;
+ nfsi->commit_info.ncommit = 0;
+ atomic_set(&nfsi->commit_info.rpcs_out, 0);
atomic_set(&nfsi->silly_count, 1);
INIT_HLIST_HEAD(&nfsi->silly_list);
init_waitqueue_head(&nfsi->waitqueue);
@@ -1550,6 +1599,28 @@ static void nfsiod_stop(void)
destroy_workqueue(wq);
}
+int nfs_net_id;
+EXPORT_SYMBOL_GPL(nfs_net_id);
+
+static int nfs_net_init(struct net *net)
+{
+ nfs_clients_init(net);
+ return nfs_dns_resolver_cache_init(net);
+}
+
+static void nfs_net_exit(struct net *net)
+{
+ nfs_dns_resolver_cache_destroy(net);
+ nfs_cleanup_cb_ident_idr(net);
+}
+
+static struct pernet_operations nfs_net_ops = {
+ .init = nfs_net_init,
+ .exit = nfs_net_exit,
+ .id = &nfs_net_id,
+ .size = sizeof(struct nfs_net),
+};
+
/*
* Initialize NFS
*/
@@ -1559,10 +1630,14 @@ static int __init init_nfs_fs(void)
err = nfs_idmap_init();
if (err < 0)
- goto out9;
+ goto out10;
err = nfs_dns_resolver_init();
if (err < 0)
+ goto out9;
+
+ err = register_pernet_subsys(&nfs_net_ops);
+ if (err < 0)
goto out8;
err = nfs_fscache_register();
@@ -1598,14 +1673,14 @@ static int __init init_nfs_fs(void)
goto out0;
#ifdef CONFIG_PROC_FS
- rpc_proc_register(&nfs_rpcstat);
+ rpc_proc_register(&init_net, &nfs_rpcstat);
#endif
if ((err = register_nfs_fs()) != 0)
goto out;
return 0;
out:
#ifdef CONFIG_PROC_FS
- rpc_proc_unregister("nfs");
+ rpc_proc_unregister(&init_net, "nfs");
#endif
nfs_destroy_directcache();
out0:
@@ -1623,10 +1698,12 @@ out5:
out6:
nfs_fscache_unregister();
out7:
- nfs_dns_resolver_destroy();
+ unregister_pernet_subsys(&nfs_net_ops);
out8:
- nfs_idmap_quit();
+ nfs_dns_resolver_destroy();
out9:
+ nfs_idmap_quit();
+out10:
return err;
}
@@ -1638,12 +1715,12 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
+ unregister_pernet_subsys(&nfs_net_ops);
nfs_dns_resolver_destroy();
nfs_idmap_quit();
#ifdef CONFIG_PROC_FS
- rpc_proc_unregister("nfs");
+ rpc_proc_unregister(&init_net, "nfs");
#endif
- nfs_cleanup_cb_ident_idr();
unregister_nfs_fs();
nfs_fs_proc_exit();
nfsiod_stop();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3f4d95751d5..18f99ef7134 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -103,6 +103,7 @@ struct nfs_parsed_mount_data {
unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
+ bool need_mount;
struct {
struct sockaddr_storage address;
@@ -123,6 +124,7 @@ struct nfs_parsed_mount_data {
} nfs_server;
struct security_mnt_opts lsm_opts;
+ struct net *net;
};
/* mount_clnt.c */
@@ -137,20 +139,22 @@ struct nfs_mount_request {
int noresvport;
unsigned int *auth_flav_len;
rpc_authflavor_t *auth_flavs;
+ struct net *net;
};
extern int nfs_mount(struct nfs_mount_request *info);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
-extern struct rpc_program nfs_program;
+extern const struct rpc_program nfs_program;
+extern void nfs_clients_init(struct net *net);
-extern void nfs_cleanup_cb_ident_idr(void);
+extern void nfs_cleanup_cb_ident_idr(struct net *);
extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
-extern struct nfs_client *nfs4_find_client_ident(int);
+extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
extern struct nfs_client *
-nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
+nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
+ struct nfs4_sessionid *);
extern struct nfs_server *nfs_create_server(
const struct nfs_parsed_mount_data *,
struct nfs_fh *);
@@ -162,12 +166,15 @@ extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
- struct nfs_fattr *);
+ struct nfs_fattr *,
+ rpc_authflavor_t);
+extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
-extern int nfs4_check_client_ready(struct nfs_client *clp);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
const struct sockaddr *ds_addr,
- int ds_addrlen, int ds_proto);
+ int ds_addrlen, int ds_proto,
+ unsigned int ds_timeo,
+ unsigned int ds_retrans);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
@@ -181,21 +188,11 @@ static inline void nfs_fs_proc_exit(void)
}
#endif
-/* nfs4namespace.c */
-#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
-#else
-static inline
-struct vfsmount *nfs_do_refmount(struct dentry *dentry)
-{
- return ERR_PTR(-ENOENT);
-}
-#endif
-
/* callback_xdr.c */
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
+struct nfs_pageio_descriptor;
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
@@ -206,9 +203,13 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
/* nfs2xdr.c */
-extern int nfs_stat_to_errno(enum nfs_stat);
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
@@ -231,17 +232,15 @@ extern const u32 nfs41_maxwrite_overhead;
/* nfs4proc.c */
#ifdef CONFIG_NFS_V4
extern struct rpc_procinfo nfs4_procedures[];
-void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
#endif
-extern int nfs4_init_ds_session(struct nfs_client *clp);
+extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
-extern int nfs_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour,
- int noresvport);
+ const char *ip_addr, rpc_authflavor_t authflavour);
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -277,9 +276,10 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
-#endif
+struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
+ struct nfs_fh *, struct nfs_fattr *);
+struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
+ struct nfs_fattr *, rpc_authflavor_t);
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -291,59 +291,95 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
-struct nfs_pageio_descriptor;
+struct nfs_pgio_completion_ops;
/* read.c */
-extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
- const struct rpc_call_ops *call_ops);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
+extern int nfs_initiate_read(struct rpc_clnt *clnt,
+ struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops, int flags);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
-
+ struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode);
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
+ struct nfs_pgio_header *hdr);
+extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
-extern int nfs_initiate_write(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern void nfs_commit_free(struct nfs_commit_data *p);
+extern int nfs_initiate_write(struct rpc_clnt *clnt,
+ struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
- int how);
+ int how, int flags);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+ struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
- int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+ int how, int flags);
+extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
- struct pnfs_layout_segment *lseg);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max);
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
- struct pnfs_layout_segment *lseg);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
- struct page *, struct page *);
+ struct page *, struct page *, enum migrate_mode);
#else
#define nfs_migrate_page NULL
#endif
+/* direct.c */
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq);
+static inline void nfs_inode_dio_wait(struct inode *inode)
+{
+ inode_dio_wait(inode);
+}
+
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_read_data *);
-extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
-extern int nfs4_init_client(struct nfs_client *clp,
+extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr,
- rpc_authflavor_t authflavour,
- int noresvport);
-extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
+ rpc_authflavor_t authflavour);
extern int _nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs_server *server,
struct rpc_message *msg,
@@ -459,3 +495,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
PAGE_SIZE - 1) >> PAGE_SHIFT;
}
+/*
+ * Convert a struct timespec into a 64-bit change attribute
+ *
+ * This does approximately the same thing as timespec_to_ns(),
+ * but for calculation efficiency, we multiply the seconds by
+ * 1024*1024*1024.
+ */
+static inline
+u64 nfs_timespec_to_change_attr(const struct timespec *ts)
+{
+ return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index d4c2d6b7507..8e65c7f1f87 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -16,7 +16,7 @@
#include <linux/nfs_fs.h>
#include "internal.h"
-#ifdef RPC_DEBUG
+#ifdef NFS_DEBUG
# define NFSDBG_FACILITY NFSDBG_MOUNT
#endif
@@ -67,7 +67,7 @@ enum {
MOUNTPROC3_EXPORT = 5,
};
-static struct rpc_program mnt_program;
+static const struct rpc_program mnt_program;
/*
* Defined by OpenGroup XNFS Version 3W, chapter 8
@@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
.rpc_resp = &result,
};
struct rpc_create_args args = {
- .net = &init_net,
+ .net = info->net,
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
@@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
.to_retries = 2,
};
struct rpc_create_args args = {
- .net = &init_net,
+ .net = info->net,
.protocol = IPPROTO_UDP,
.address = info->sap,
.addrsize = info->salen,
@@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = {
};
-static struct rpc_version mnt_version1 = {
+static const struct rpc_version mnt_version1 = {
.number = 1,
.nrprocs = ARRAY_SIZE(mnt_procedures),
.procs = mnt_procedures,
};
-static struct rpc_version mnt_version3 = {
+static const struct rpc_version mnt_version3 = {
.number = 3,
.nrprocs = ARRAY_SIZE(mnt3_procedures),
.procs = mnt3_procedures,
};
-static struct rpc_version *mnt_version[] = {
+static const struct rpc_version *mnt_version[] = {
NULL,
&mnt_version1,
NULL,
@@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = {
static struct rpc_stat mnt_stats;
-static struct rpc_program mnt_program = {
+static const struct rpc_program mnt_program = {
.name = "mount",
.number = NFS_MNT_PROGRAM,
.nrvers = ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 8102391bb37..08b9c93675d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list);
static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t authflavor);
-
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
@@ -118,99 +113,6 @@ Elong:
return ERR_PTR(-ENAMETOOLONG);
}
-#ifdef CONFIG_NFS_V4
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
-{
- struct gss_api_mech *mech;
- struct xdr_netobj oid;
- int i;
- rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
-
- for (i = 0; i < flavors->num_flavors; i++) {
- struct nfs4_secinfo_flavor *flavor;
- flavor = &flavors->flavors[i];
-
- if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
- pseudoflavor = flavor->flavor;
- break;
- } else if (flavor->flavor == RPC_AUTH_GSS) {
- oid.len = flavor->gss.sec_oid4.len;
- oid.data = flavor->gss.sec_oid4.data;
- mech = gss_mech_get_by_OID(&oid);
- if (!mech)
- continue;
- pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
- gss_mech_put(mech);
- break;
- }
- }
-
- return pseudoflavor;
-}
-
-static int nfs_negotiate_security(const struct dentry *parent,
- const struct dentry *dentry,
- rpc_authflavor_t *flavor)
-{
- struct page *page;
- struct nfs4_secinfo_flavors *flavors;
- int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
- int ret = -EPERM;
-
- secinfo = NFS_PROTO(parent->d_inode)->secinfo;
- if (secinfo != NULL) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
- flavors = page_address(page);
- ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
- *flavor = nfs_find_best_sec(flavors);
- put_page(page);
- }
-
-out:
- return ret;
-}
-
-static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
- struct dentry *dentry, struct path *path,
- struct nfs_fh *fh, struct nfs_fattr *fattr,
- rpc_authflavor_t *flavor)
-{
- struct rpc_clnt *clone;
- struct rpc_auth *auth;
- int err;
-
- err = nfs_negotiate_security(parent, path->dentry, flavor);
- if (err < 0)
- goto out;
- clone = rpc_clone_client(server->client);
- auth = rpcauth_create(*flavor, clone);
- if (!auth) {
- err = -EIO;
- goto out_shutdown;
- }
- err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
- &path->dentry->d_name,
- fh, fattr);
-out_shutdown:
- rpc_shutdown_client(clone);
-out:
- return err;
-}
-#else /* CONFIG_NFS_V4 */
-static inline int nfs_lookup_with_sec(struct nfs_server *server,
- struct dentry *parent, struct dentry *dentry,
- struct path *path, struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t *flavor)
-{
- return -EPERM;
-}
-#endif /* CONFIG_NFS_V4 */
-
/*
* nfs_d_automount - Handle crossing a mountpoint on the server
* @path - The mountpoint
@@ -227,11 +129,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
- struct dentry *parent;
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
- int err;
- rpc_authflavor_t flavor = RPC_AUTH_UNIX;
dprintk("--> nfs_d_automount()\n");
@@ -247,23 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
dprintk("%s: enter\n", __func__);
- /* Look it up again to get its attributes */
- parent = dget_parent(path->dentry);
- err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
- &path->dentry->d_name,
- fh, fattr);
- if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
- err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
- dput(parent);
- if (err != 0) {
- mnt = ERR_PTR(err);
- goto out;
- }
-
- if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(path->dentry);
- else
- mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
+ mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
if (IS_ERR(mnt))
goto out;
@@ -276,7 +159,10 @@ out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
out_nofree:
- dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt);
+ if (IS_ERR(mnt))
+ dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
+ else
+ dprintk("<-- %s() = %p\n", __func__, mnt);
return mnt;
}
@@ -333,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
* @authflavor - security flavor to use when performing the mount
*
*/
-static struct vfsmount *nfs_do_submount(struct dentry *dentry,
- struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t authflavor)
+struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
+ struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
{
struct nfs_clone_mount mountdata = {
.sb = dentry->d_sb,
@@ -369,3 +253,18 @@ out:
dprintk("<-- nfs_do_submount() = %p\n", mnt);
return mnt;
}
+
+struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
+ struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+ int err;
+ struct dentry *parent = dget_parent(dentry);
+
+ /* Look it up again to get its attributes */
+ err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
+ dput(parent);
+ if (err != 0)
+ return ERR_PTR(err);
+
+ return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+}
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
new file mode 100644
index 00000000000..8a6394edb8b
--- /dev/null
+++ b/fs/nfs/netns.h
@@ -0,0 +1,32 @@
+/*
+ * NFS-private data for each "struct net". Accessed with net_generic().
+ */
+
+#ifndef __NFS_NETNS_H__
+#define __NFS_NETNS_H__
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+struct bl_dev_msg {
+ int32_t status;
+ uint32_t major, minor;
+};
+
+struct nfs_net {
+ struct cache_detail *nfs_dns_resolve;
+ struct rpc_pipe *bl_device_pipe;
+ struct bl_dev_msg bl_mount_reply;
+ wait_queue_head_t bl_wq;
+ struct list_head nfs_client_list;
+ struct list_head nfs_volume_list;
+#ifdef CONFIG_NFS_V4
+ struct idr cb_ident_idr; /* Protected by nfs_client_lock */
+#endif
+ spinlock_t nfs_client_lock;
+ struct timespec boot_time;
+};
+
+extern int nfs_net_id;
+
+#endif
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 792cb13a430..baf759bccd0 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -61,6 +61,7 @@
#define NFS_readdirres_sz (1)
#define NFS_statfsres_sz (1+NFS_info_sz)
+static int nfs_stat_to_errno(enum nfs_stat);
/*
* While encoding arguments, set up the reply buffer in advance to
@@ -313,6 +314,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
p = xdr_decode_time(p, &fattr->atime);
p = xdr_decode_time(p, &fattr->mtime);
xdr_decode_time(p, &fattr->ctime);
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
+
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -1109,7 +1112,7 @@ static const struct {
* Returns a local errno value, or -EIO if the NFS status code is
* not recognized. This function is used jointly by NFSv2 and NFSv3.
*/
-int nfs_stat_to_errno(enum nfs_stat status)
+static int nfs_stat_to_errno(enum nfs_stat status)
{
int i;
@@ -1150,7 +1153,7 @@ struct rpc_procinfo nfs_procedures[] = {
PROC(STATFS, fhandle, statfsres, 0),
};
-struct rpc_version nfs_version2 = {
+const struct rpc_version nfs_version2 = {
.number = 2,
.nrprocs = ARRAY_SIZE(nfs_procedures),
.procs = nfs_procedures
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7ef23979896..e4498dc351a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
.pages = pages,
};
struct nfs3_getaclres res = {
- 0
+ NULL,
};
struct rpc_message msg = {
.rpc_argp = &args,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed9174..2292a0fd2bf 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
#include <linux/nfs_page.h>
#include <linux/lockd/bind.h>
#include <linux/nfs_mount.h>
+#include <linux/freezer.h>
#include "iostat.h"
#include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX && res != -EKEYEXPIRED)
break;
- schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+ freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
@@ -141,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs3_diropargs arg = {
@@ -397,8 +398,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
- .name.len = name->len,
- .name.name = name->name,
+ .name = *name,
};
struct nfs_removeres res;
struct rpc_message msg = {
@@ -427,6 +427,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
}
+static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
+{
+ rpc_call_start(task);
+}
+
static int
nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
{
@@ -444,6 +449,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
}
+static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ rpc_call_start(task);
+}
+
static int
nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
struct inode *new_dir)
@@ -800,11 +810,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
- nfs_invalidate_atime(data->inode);
- nfs_refresh_inode(data->inode, &data->fattr);
+ nfs_invalidate_atime(inode);
+ nfs_refresh_inode(inode, &data->fattr);
return 0;
}
@@ -813,12 +825,19 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
+static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+{
+ rpc_call_start(task);
+}
+
static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
return 0;
}
@@ -827,7 +846,17 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+ rpc_call_start(task);
+}
+
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
@@ -835,7 +864,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
@@ -855,6 +884,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.file_inode_ops = &nfs3_file_inode_operations,
.file_ops = &nfs_file_operations,
.getroot = nfs3_proc_get_root,
+ .submount = nfs_submount,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
@@ -863,9 +893,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.create = nfs3_proc_create,
.remove = nfs3_proc_remove,
.unlink_setup = nfs3_proc_unlink_setup,
+ .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
.unlink_done = nfs3_proc_unlink_done,
.rename = nfs3_proc_rename,
.rename_setup = nfs3_proc_rename_setup,
+ .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
.rename_done = nfs3_proc_rename_done,
.link = nfs3_proc_link,
.symlink = nfs3_proc_symlink,
@@ -878,10 +910,13 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 183c6b123d0..902de489ec9 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -86,6 +86,8 @@
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
+static int nfs3_stat_to_errno(enum nfs_stat);
+
/*
* Map file type to S_IFMT bits
*/
@@ -675,6 +677,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
p = xdr_decode_nfstime3(p, &fattr->atime);
p = xdr_decode_nfstime3(p, &fattr->mtime);
xdr_decode_nfstime3(p, &fattr->ctime);
+ fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
fattr->valid |= NFS_ATTR_FATTR_V3;
return 0;
@@ -725,12 +728,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
goto out_overflow;
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+ | NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME;
p = xdr_decode_size3(p, &fattr->pre_size);
p = xdr_decode_nfstime3(p, &fattr->pre_mtime);
xdr_decode_nfstime3(p, &fattr->pre_ctime);
+ fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);
return 0;
out_overflow:
@@ -1287,7 +1292,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
* };
*/
static void encode_commit3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
__be32 *p;
@@ -1300,7 +1305,7 @@ static void encode_commit3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
encode_commit3args(xdr, args);
}
@@ -1385,7 +1390,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1424,7 +1429,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1472,7 +1477,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1513,7 +1518,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1554,7 +1559,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1636,7 +1641,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1706,7 +1711,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1770,7 +1775,7 @@ out_default:
error = decode_wcc_data(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1809,7 +1814,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1853,7 +1858,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -1896,7 +1901,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/**
@@ -2088,7 +2093,7 @@ out_default:
error = decode_post_op_attr(xdr, result->dir_attr);
if (unlikely(error))
goto out;
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2156,7 +2161,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2232,7 +2237,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2295,7 +2300,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
/*
@@ -2319,7 +2324,7 @@ out_status:
*/
static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_commitres *result)
{
enum nfs_stat status;
int error;
@@ -2336,7 +2341,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
out:
return error;
out_status:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
#ifdef CONFIG_NFS_V3_ACL
@@ -2401,7 +2406,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
@@ -2420,11 +2425,76 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
out:
return error;
out_default:
- return nfs_stat_to_errno(status);
+ return nfs3_stat_to_errno(status);
}
#endif /* CONFIG_NFS_V3_ACL */
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS_OK, 0 },
+ { NFSERR_PERM, -EPERM },
+ { NFSERR_NOENT, -ENOENT },
+ { NFSERR_IO, -errno_NFSERR_IO},
+ { NFSERR_NXIO, -ENXIO },
+/* { NFSERR_EAGAIN, -EAGAIN }, */
+ { NFSERR_ACCES, -EACCES },
+ { NFSERR_EXIST, -EEXIST },
+ { NFSERR_XDEV, -EXDEV },
+ { NFSERR_NODEV, -ENODEV },
+ { NFSERR_NOTDIR, -ENOTDIR },
+ { NFSERR_ISDIR, -EISDIR },
+ { NFSERR_INVAL, -EINVAL },
+ { NFSERR_FBIG, -EFBIG },
+ { NFSERR_NOSPC, -ENOSPC },
+ { NFSERR_ROFS, -EROFS },
+ { NFSERR_MLINK, -EMLINK },
+ { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, -ENOTEMPTY },
+ { NFSERR_DQUOT, -EDQUOT },
+ { NFSERR_STALE, -ESTALE },
+ { NFSERR_REMOTE, -EREMOTE },
+#ifdef EWFLUSH
+ { NFSERR_WFLUSH, -EWFLUSH },
+#endif
+ { NFSERR_BADHANDLE, -EBADHANDLE },
+ { NFSERR_NOT_SYNC, -ENOTSYNC },
+ { NFSERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFSERR_NOTSUPP, -ENOTSUPP },
+ { NFSERR_TOOSMALL, -ETOOSMALL },
+ { NFSERR_SERVERFAULT, -EREMOTEIO },
+ { NFSERR_BADTYPE, -EBADTYPE },
+ { NFSERR_JUKEBOX, -EJUKEBOX },
+ { -1, -EIO }
+};
+
+/**
+ * nfs3_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized. This function is used jointly by NFSv2 and NFSv3.
+ */
+static int nfs3_stat_to_errno(enum nfs_stat status)
+{
+ int i;
+
+ for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+ if (nfs_errtbl[i].stat == (int)status)
+ return nfs_errtbl[i].errno;
+ }
+ dprintk("NFS: Unrecognized nfs status value: %u\n", status);
+ return nfs_errtbl[i].errno;
+}
+
+
#define PROC(proc, argtype, restype, timer) \
[NFS3PROC_##proc] = { \
.p_proc = NFS3PROC_##proc, \
@@ -2461,7 +2531,7 @@ struct rpc_procinfo nfs3_procedures[] = {
PROC(COMMIT, commit, commit, 5),
};
-struct rpc_version nfs_version3 = {
+const struct rpc_version nfs_version3 = {
.number = 3,
.nrprocs = ARRAY_SIZE(nfs3_procedures),
.procs = nfs3_procedures
@@ -2489,7 +2559,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
},
};
-struct rpc_version nfsacl_version3 = {
+const struct rpc_version nfsacl_version3 = {
.number = 3,
.nrprocs = sizeof(nfs3_acl_procedures)/
sizeof(nfs3_acl_procedures[0]),
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 693ae22f873..cc5900ac61b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -20,11 +20,12 @@ enum nfs4_client_state {
NFS4CLNT_RECLAIM_REBOOT,
NFS4CLNT_RECLAIM_NOGRACE,
NFS4CLNT_DELEGRETURN,
- NFS4CLNT_LAYOUTRECALL,
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
NFS4CLNT_SERVER_SCOPE_MISMATCH,
+ NFS4CLNT_PURGE_STATE,
+ NFS4CLNT_BIND_CONN_TO_SESSION,
};
enum nfs4_session_state {
@@ -44,7 +45,7 @@ struct nfs4_minor_version_ops {
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
int cache_reply);
- int (*validate_stateid)(struct nfs_delegation *,
+ bool (*match_stateid)(const nfs4_stateid *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
@@ -53,26 +54,21 @@ struct nfs4_minor_version_ops {
const struct nfs4_state_maintenance_ops *state_renewal_ops;
};
-/*
- * struct rpc_sequence ensures that RPC calls are sent in the exact
- * order that they appear on the list.
- */
-struct rpc_sequence {
- struct rpc_wait_queue wait; /* RPC call delay queue */
- spinlock_t lock; /* Protects the list */
- struct list_head list; /* Defines sequence of RPC calls */
-};
-
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
- struct rpc_sequence *sequence;
+ ktime_t create_time;
+ int owner_id;
int flags;
u32 counter;
+ spinlock_t lock; /* Protects the list */
+ struct list_head list; /* Defines sequence of RPC calls */
+ struct rpc_wait_queue wait; /* RPC call delay queue */
};
struct nfs_seqid {
struct nfs_seqid_counter *sequence;
struct list_head list;
+ struct rpc_task *task;
};
static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
@@ -81,19 +77,15 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
seqid->flags |= NFS_SEQID_CONFIRMED;
}
-struct nfs_unique_id {
- struct rb_node rb_node;
- __u64 id;
-};
-
/*
* NFS4 state_owners and lock_owners are simply labels for ordered
* sequences of RPC calls. Their sole purpose is to provide once-only
* semantics by allowing the server to identify replayed requests.
*/
struct nfs4_state_owner {
- struct nfs_unique_id so_owner_id;
struct nfs_server *so_server;
+ struct list_head so_lru;
+ unsigned long so_expires;
struct rb_node so_server_node;
struct rpc_cred *so_cred; /* Associated cred */
@@ -103,7 +95,6 @@ struct nfs4_state_owner {
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
- struct rpc_sequence so_sequence;
};
enum {
@@ -144,8 +135,6 @@ struct nfs4_lock_state {
#define NFS_LOCK_INITIALIZED 1
int ls_flags;
struct nfs_seqid_counter ls_seqid;
- struct rpc_sequence ls_sequence;
- struct nfs_unique_id ls_id;
nfs4_stateid ls_stateid;
atomic_t ls_count;
struct nfs4_lock_owner ls_owner;
@@ -191,6 +180,7 @@ struct nfs4_exception {
long timeout;
int retry;
struct nfs4_state *state;
+ struct inode *inode;
};
struct nfs4_state_recovery_ops {
@@ -212,17 +202,29 @@ struct nfs4_state_maintenance_ops {
extern const struct dentry_operations nfs4_dentry_operations;
extern const struct inode_operations nfs4_dir_inode_operations;
+/* nfs4namespace.c */
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
+ struct nfs_fh *, struct nfs_fattr *);
+
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+extern int nfs4_destroy_clientid(struct nfs_client *clp);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
-extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
- struct nfs4_fs_locations *fs_locations, struct page *page);
-extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
+ struct nfs4_fs_locations *, struct page *);
+extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
+ struct nfs_fh *, struct nfs_fattr *);
+extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
+extern int nfs4_release_lockowner(struct nfs4_lock_state *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
#if defined(CONFIG_NFS_V4_1)
@@ -231,16 +233,17 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
return server->nfs_client->cl_session;
}
+extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy);
extern int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- int cache_reply, struct rpc_task *task);
+ struct rpc_task *task);
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- int cache_reply, struct rpc_task *task);
+ struct rpc_task *task);
extern void nfs4_destroy_session(struct nfs4_session *session);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
-extern int nfs4_proc_create_session(struct nfs_client *);
-extern int nfs4_proc_destroy_session(struct nfs4_session *);
+extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_init_session(struct nfs_server *server);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
@@ -267,7 +270,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
static inline int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- int cache_reply, struct rpc_task *task)
+ struct rpc_task *task)
{
return 0;
}
@@ -292,7 +295,7 @@ is_ds_client(struct nfs_client *clp)
extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
-extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_fattr_bitmap[3];
extern const u32 nfs4_statfs_bitmap[2];
extern const u32 nfs4_pathconf_bitmap[2];
extern const u32 nfs4_fsinfo_bitmap[3];
@@ -310,20 +313,23 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
-extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
#else
-static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
}
#endif /* CONFIG_NFS_V4_1 */
-extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
+extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_purge_state_owners(struct nfs_server *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
+extern void nfs_inode_find_state_and_recover(struct inode *inode,
+ const nfs4_stateid *stateid);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
@@ -331,10 +337,11 @@ extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs41_handle_server_scope(struct nfs_client *,
- struct server_scope **);
+ struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
+ fmode_t, fl_owner_t, pid_t);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -343,6 +350,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
+extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
+
extern const nfs4_stateid zero_stateid;
/* nfs4xdr.c */
@@ -354,6 +363,16 @@ struct nfs4_mount_data;
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
+static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+
+static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return memcmp(dst, src, sizeof(*dst)) == 0;
+}
+
#else
#define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index a62d36b9a99..e1340293872 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,7 +33,10 @@
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sunrpc/metrics.h>
+
#include "internal.h"
+#include "delegation.h"
#include "nfs4filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -49,13 +52,14 @@ filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
loff_t offset)
{
u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
- u64 tmp;
+ u64 stripe_no;
+ u32 rem;
offset -= flseg->pattern_offset;
- tmp = offset;
- do_div(tmp, stripe_width);
+ stripe_no = div_u64(offset, stripe_width);
+ div_u64_rem(offset, flseg->stripe_unit, &rem);
- return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
+ return stripe_no * flseg->stripe_unit + rem;
}
/* This function is used by the layout driver to calculate the
@@ -78,17 +82,79 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
+static void filelayout_reset_write(struct nfs_write_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct rpc_task *task = &data->task;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ dprintk("%s Reset task %5u for i/o through MDS "
+ "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ data->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (long long)NFS_FILEID(hdr->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+ }
+}
+
+static void filelayout_reset_read(struct nfs_read_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct rpc_task *task = &data->task;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ dprintk("%s Reset task %5u for i/o through MDS "
+ "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ data->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (long long)NFS_FILEID(hdr->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+ }
+}
+
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
- int *reset)
+ struct pnfs_layout_segment *lseg)
{
+ struct inode *inode = lseg->pls_layout->plh_inode;
+ struct nfs_server *mds_server = NFS_SERVER(inode);
+ struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
+ struct nfs_client *mds_client = mds_server->nfs_client;
+ struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
+
if (task->tk_status >= 0)
return 0;
- *reset = 0;
-
switch (task->tk_status) {
+ /* MDS state errors */
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ if (state == NULL)
+ break;
+ nfs_remove_bad_delegation(state->inode);
+ case -NFS4ERR_OPENMODE:
+ if (state == NULL)
+ break;
+ nfs4_schedule_stateid_recovery(mds_server, state);
+ goto wait_on_recovery;
+ case -NFS4ERR_EXPIRED:
+ if (state != NULL)
+ nfs4_schedule_stateid_recovery(mds_server, state);
+ nfs4_schedule_lease_recovery(mds_client);
+ goto wait_on_recovery;
+ /* DS session errors */
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@@ -99,7 +165,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
@@ -108,14 +174,57 @@ static int filelayout_async_handle_error(struct rpc_task *task,
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
+ /* Invalidate Layout errors */
+ case -NFS4ERR_PNFS_NO_LAYOUT:
+ case -ESTALE: /* mapped NFS4ERR_STALE */
+ case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
+ case -EISDIR: /* mapped NFS4ERR_ISDIR */
+ case -NFS4ERR_FHEXPIRED:
+ case -NFS4ERR_WRONG_TYPE:
+ dprintk("%s Invalid layout error %d\n", __func__,
+ task->tk_status);
+ /*
+ * Destroy layout so new i/o will get a new layout.
+ * Layout will not be destroyed until all current lseg
+ * references are put. Mark layout as invalid to resend failed
+ * i/o and all i/o waiting on the slot table to the MDS until
+ * layout is destroyed and a new valid layout is obtained.
+ */
+ set_bit(NFS_LAYOUT_INVALID,
+ &NFS_I(inode)->layout->plh_flags);
+ pnfs_destroy_layout(NFS_I(inode));
+ rpc_wake_up(&tbl->slot_tbl_waitq);
+ goto reset;
+ /* RPC connection errors */
+ case -ECONNREFUSED:
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
+ case -ENETUNREACH:
+ case -EIO:
+ case -ETIMEDOUT:
+ case -EPIPE:
+ dprintk("%s DS connection error %d\n", __func__,
+ task->tk_status);
+ if (!filelayout_test_devid_invalid(devid))
+ _pnfs_return_layout(inode);
+ filelayout_mark_devid_invalid(devid);
+ rpc_wake_up(&tbl->slot_tbl_waitq);
+ nfs4_ds_disconnect(clp);
+ /* fall through */
default:
- dprintk("%s DS error. Retry through MDS %d\n", __func__,
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
- *reset = 1;
- break;
+ return -NFS4ERR_RESET_TO_MDS;
}
+out:
task->tk_status = 0;
return -EAGAIN;
+wait_on_recovery:
+ rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
+ if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
+ rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
+ goto out;
}
/* NFS_PROTO call done callback routines */
@@ -123,18 +232,17 @@ static int filelayout_async_handle_error(struct rpc_task *task,
static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_read_data *data)
{
- int reset = 0;
+ struct nfs_pgio_header *hdr = data->header;
+ int err;
- dprintk("%s DS read\n", __func__);
+ err = filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, hdr->lseg);
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- pnfs_set_lo_fail(data->lseg);
- nfs4_reset_read(task, data);
- }
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ filelayout_reset_read(data);
+ return task->tk_status;
+ case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -150,13 +258,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
static void
filelayout_set_layoutcommit(struct nfs_write_data *wdata)
{
- if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+ struct nfs_pgio_header *hdr = wdata->header;
+
+ if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
wdata->res.verf->committed == NFS_FILE_SYNC)
return;
pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
- (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+ dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
/*
@@ -166,13 +276,19 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
+ if (filelayout_reset_to_mds(rdata->header->lseg)) {
+ dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+ filelayout_reset_read(rdata);
+ rpc_exit(task, 0);
+ return;
+ }
rdata->read_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args, &rdata->res.seq_res,
- 0, task))
+ task))
return;
rpc_call_start(task);
@@ -180,34 +296,47 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+ if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+ task->tk_status == 0)
+ return;
+
/* Note this may cause RPC to be resent */
- rdata->mds_ops->rpc_call_done(task, data);
+ rdata->header->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_read_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_read_data *rdata = data;
+
+ rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;
- rdata->mds_ops->rpc_release(data);
+ nfs_put_client(rdata->ds_clp);
+ rdata->header->mds_ops->rpc_release(data);
}
static int filelayout_write_done_cb(struct rpc_task *task,
struct nfs_write_data *data)
{
- int reset = 0;
+ struct nfs_pgio_header *hdr = data->header;
+ int err;
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- pnfs_set_lo_fail(data->lseg);
- nfs4_reset_write(task, data);
- }
+ err = filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, hdr->lseg);
+
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ filelayout_reset_write(data);
+ return task->tk_status;
+ case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -217,7 +346,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
}
/* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
{
struct nfs_page *first = nfs_list_entry(data->pages.next);
@@ -228,19 +357,19 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
}
static int filelayout_commit_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_commit_data *data)
{
- int reset = 0;
+ int err;
- if (filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, &reset) == -EAGAIN) {
- dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
- __func__, data->ds_clp, data->ds_clp->cl_session);
- if (reset) {
- prepare_to_resend_writes(data);
- pnfs_set_lo_fail(data->lseg);
- } else
- rpc_restart_call_prepare(task);
+ err = filelayout_async_handle_error(task, NULL, data->ds_clp,
+ data->lseg);
+
+ switch (err) {
+ case -NFS4ERR_RESET_TO_MDS:
+ prepare_to_resend_writes(data);
+ return -EAGAIN;
+ case -EAGAIN:
+ rpc_restart_call_prepare(task);
return -EAGAIN;
}
@@ -249,11 +378,17 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+ if (filelayout_reset_to_mds(wdata->header->lseg)) {
+ dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
+ filelayout_reset_write(wdata);
+ rpc_exit(task, 0);
+ return;
+ }
if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args, &wdata->res.seq_res,
- 0, task))
+ task))
return;
rpc_call_start(task);
@@ -261,51 +396,94 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+
+ if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+ task->tk_status == 0)
+ return;
/* Note this may cause RPC to be resent */
- wdata->mds_ops->rpc_call_done(task, data);
+ wdata->header->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_write_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = data;
+
+ rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;
+
+ nfs_put_client(wdata->ds_clp);
+ wdata->header->mds_ops->rpc_release(data);
+}
+
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *wdata = data;
+
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ &wdata->args.seq_args, &wdata->res.seq_res,
+ task))
+ return;
- wdata->mds_ops->rpc_release(data);
+ rpc_call_start(task);
}
-static void filelayout_commit_release(void *data)
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_commit_data *wdata = data;
- nfs_commit_release_pages(wdata);
- if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
- nfs_commit_clear_lock(NFS_I(wdata->inode));
- nfs_commitdata_release(wdata);
+ /* Note this may cause RPC to be resent */
+ wdata->mds_ops->rpc_call_done(task, data);
}
-struct rpc_call_ops filelayout_read_call_ops = {
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *cdata = data;
+
+ rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
+}
+
+static void filelayout_commit_release(void *calldata)
+{
+ struct nfs_commit_data *data = calldata;
+
+ data->completion_ops->completion(data);
+ put_lseg(data->lseg);
+ nfs_put_client(data->ds_clp);
+ nfs_commitdata_release(data);
+}
+
+static const struct rpc_call_ops filelayout_read_call_ops = {
.rpc_call_prepare = filelayout_read_prepare,
.rpc_call_done = filelayout_read_call_done,
+ .rpc_count_stats = filelayout_read_count_stats,
.rpc_release = filelayout_read_release,
};
-struct rpc_call_ops filelayout_write_call_ops = {
+static const struct rpc_call_ops filelayout_write_call_ops = {
.rpc_call_prepare = filelayout_write_prepare,
.rpc_call_done = filelayout_write_call_done,
+ .rpc_count_stats = filelayout_write_count_stats,
.rpc_release = filelayout_write_release,
};
-struct rpc_call_ops filelayout_commit_call_ops = {
- .rpc_call_prepare = filelayout_write_prepare,
- .rpc_call_done = filelayout_write_call_done,
+static const struct rpc_call_ops filelayout_commit_call_ops = {
+ .rpc_call_prepare = filelayout_commit_prepare,
+ .rpc_call_done = filelayout_write_commit_done,
+ .rpc_count_stats = filelayout_commit_count_stats,
.rpc_release = filelayout_commit_release,
};
static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
@@ -313,25 +491,20 @@ filelayout_read_pagelist(struct nfs_read_data *data)
int status;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
- __func__, data->inode->i_ino,
+ __func__, hdr->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
- if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
- return PNFS_NOT_ATTEMPTED;
-
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
- if (!ds) {
- /* Either layout fh index faulty, or ds connect failed */
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ if (!ds)
return PNFS_NOT_ATTEMPTED;
- }
- dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
+ dprintk("%s USE DS: %s cl_count %d\n", __func__,
+ ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
@@ -341,8 +514,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
- status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
- &filelayout_read_call_ops);
+ status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
@@ -351,31 +524,26 @@ filelayout_read_pagelist(struct nfs_read_data *data)
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
int status;
- if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
- return PNFS_NOT_ATTEMPTED;
-
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
- if (!ds) {
- printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ if (!ds)
return PNFS_NOT_ATTEMPTED;
- }
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
- data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ds->ds_remotestr);
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+ __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+ offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
data->write_done_cb = filelayout_write_done_cb;
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
@@ -387,8 +555,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
- &filelayout_write_call_ops, sync);
+ status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_write_call_ops, sync,
+ RPC_TASK_SOFTCONN);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
@@ -574,7 +743,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
goto out_err_free;
fl->fh_array[i]->size = be32_to_cpup(p++);
if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
- printk(KERN_ERR "Too big fh %d received %d\n",
+ printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err_free;
}
@@ -604,10 +773,65 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
dprintk("--> %s\n", __func__);
nfs4_fl_put_deviceid(fl->dsaddr);
- kfree(fl->commit_buckets);
+ /* This assumes a single RW lseg */
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ struct nfs4_filelayout *flo;
+
+ flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+ flo->commit_info.nbuckets = 0;
+ kfree(flo->commit_info.buckets);
+ flo->commit_info.buckets = NULL;
+ }
_filelayout_free_lseg(fl);
}
+static int
+filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo,
+ gfp_t gfp_flags)
+{
+ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+ struct pnfs_commit_bucket *buckets;
+ int size;
+
+ if (fl->commit_through_mds)
+ return 0;
+ if (cinfo->ds->nbuckets != 0) {
+ /* This assumes there is only one IOMODE_RW lseg. What
+ * we really want to do is have a layout_hdr level
+ * dictionary of <multipath_list4, fh> keys, each
+ * associated with a struct list_head, populated by calls
+ * to filelayout_write_pagelist().
+ * */
+ return 0;
+ }
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
+ gfp_flags);
+ if (!buckets)
+ return -ENOMEM;
+ else {
+ int i;
+
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets != 0)
+ kfree(buckets);
+ else {
+ cinfo->ds->buckets = buckets;
+ cinfo->ds->nbuckets = size;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ }
+ }
+ spin_unlock(cinfo->lock);
+ return 0;
+ }
+}
+
static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
struct nfs4_layoutget_res *lgr,
@@ -627,27 +851,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
_filelayout_free_lseg(fl);
return NULL;
}
-
- /* This assumes there is only one IOMODE_RW lseg. What
- * we really want to do is have a layout_hdr level
- * dictionary of <multipath_list4, fh> keys, each
- * associated with a struct list_head, populated by calls
- * to filelayout_write_pagelist().
- * */
- if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
- int i;
- int size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
- fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
- if (!fl->commit_buckets) {
- filelayout_free_lseg(&fl->generic_hdr);
- return NULL;
- }
- fl->number_of_buckets = size;
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(&fl->commit_buckets[i]);
- }
return &fl->generic_hdr;
}
@@ -668,8 +871,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;
- p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
- r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+ p_stripe = (u64)req_offset(prev);
+ r_stripe = (u64)req_offset(req);
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
do_div(p_stripe, stripe_unit);
@@ -678,12 +881,22 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}
-void
+static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ /*
+ * Handling unaligned pages is difficult, because have to
+ * somehow split a req in two in certain cases in the
+ * pg.test code. Avoid this by just not using pnfs
+ * in this case.
+ */
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
@@ -695,12 +908,17 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
nfs_pageio_reset_read_mds(pgio);
}
-void
+static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
+ struct nfs_commit_info cinfo;
+ int status;
+
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase)
+ goto out_mds;
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
@@ -709,7 +927,17 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_reset_write_mds(pgio);
+ goto out_mds;
+ nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+ status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
+ if (status < 0) {
+ put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out_mds;
+ }
+ return;
+out_mds:
+ nfs_pageio_reset_write_mds(pgio);
}
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -724,11 +952,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_doio = pnfs_generic_pg_writepages,
};
-static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
-{
- return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
-}
-
static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
{
if (fl->stripe_type == STRIPE_SPARSE)
@@ -737,12 +960,46 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
return j;
}
-struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
+/* The generic layer is about to remove the req from the commit list.
+ * If this will make the bucket empty, it will need to put the lseg reference.
+ */
+static void
+filelayout_clear_request_commit(struct nfs_page *req,
+ struct nfs_commit_info *cinfo)
+{
+ struct pnfs_layout_segment *freeme = NULL;
+
+ spin_lock(cinfo->lock);
+ if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
+ goto out;
+ cinfo->ds->nwritten--;
+ if (list_is_singular(&req->wb_list)) {
+ struct pnfs_commit_bucket *bucket;
+
+ bucket = list_first_entry(&req->wb_list,
+ struct pnfs_commit_bucket,
+ written);
+ freeme = bucket->wlseg;
+ bucket->wlseg = NULL;
+ }
+out:
+ nfs_request_remove_commit_list(req, cinfo);
+ spin_unlock(cinfo->lock);
+ put_lseg(freeme);
+}
+
+static struct list_head *
+filelayout_choose_commit_list(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
- struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
+ struct pnfs_commit_bucket *buckets;
+
+ if (fl->commit_through_mds)
+ return &cinfo->mds->list;
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
@@ -750,17 +1007,35 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
* to store the value calculated in filelayout_write_pagelist
* and just use that here.
*/
- j = nfs4_fl_calc_j_index(lseg,
- (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+ j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
- list = &fl->commit_buckets[i];
+ buckets = cinfo->ds->buckets;
+ list = &buckets[i].written;
if (list_empty(list)) {
- /* Non-empty buckets hold a reference on the lseg */
- get_lseg(lseg);
+ /* Non-empty buckets hold a reference on the lseg. That ref
+ * is normally transferred to the COMMIT call and released
+ * there. It could also be released if the last req is pulled
+ * off due to a rewrite, in which case it will be done in
+ * filelayout_clear_request_commit
+ */
+ buckets[i].wlseg = get_lseg(lseg);
}
+ set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ cinfo->ds->nwritten++;
return list;
}
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
+{
+ struct list_head *list;
+
+ list = filelayout_choose_commit_list(req, lseg, cinfo);
+ nfs_request_add_commit_list(req, list, cinfo);
+}
+
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -786,7 +1061,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
return flseg->fh_array[i];
}
-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
@@ -796,117 +1071,187 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
- printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
prepare_to_resend_writes(data);
- data->mds_ops->rpc_release(data);
+ filelayout_commit_release(data);
return -EAGAIN;
}
- dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
- data->write_done_cb = filelayout_commit_done_cb;
+ dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
+ data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
+ data->commit_done_cb = filelayout_commit_done_cb;
+ atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
- return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
- &filelayout_commit_call_ops, how);
+ return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
+ &filelayout_commit_call_ops, how,
+ RPC_TASK_SOFTCONN);
}
-/*
- * This is only useful while we are using whole file layouts.
+static int
+transfer_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max)
+{
+ struct nfs_page *req, *tmp;
+ int ret = 0;
+
+ list_for_each_entry_safe(req, tmp, src, wb_list) {
+ if (!nfs_lock_request(req))
+ continue;
+ kref_get(&req->wb_kref);
+ if (cond_resched_lock(cinfo->lock))
+ list_safe_reset_next(req, tmp, wb_list);
+ nfs_request_remove_commit_list(req, cinfo);
+ clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ nfs_list_add_request(req, dst);
+ ret++;
+ if ((ret == max) && !cinfo->dreq)
+ break;
+ }
+ return ret;
+}
+
+static int
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo,
+ int max)
+{
+ struct list_head *src = &bucket->written;
+ struct list_head *dst = &bucket->committing;
+ int ret;
+
+ ret = transfer_commit_list(src, dst, cinfo, max);
+ if (ret) {
+ cinfo->ds->nwritten -= ret;
+ cinfo->ds->ncommitting += ret;
+ bucket->clseg = bucket->wlseg;
+ if (list_empty(src))
+ bucket->wlseg = NULL;
+ else
+ get_lseg(bucket->clseg);
+ }
+ return ret;
+}
+
+/* Move reqs from written to committing lists, returning count of number moved.
+ * Note called with cinfo->lock held.
*/
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+ int max)
{
- struct pnfs_layout_segment *lseg, *rv = NULL;
+ int i, rv = 0, cnt;
- spin_lock(&inode->i_lock);
- list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
- if (lseg->pls_range.iomode == IOMODE_RW)
- rv = get_lseg(lseg);
- spin_unlock(&inode->i_lock);
+ for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+ cinfo, max);
+ max -= cnt;
+ rv += cnt;
+ }
return rv;
}
-static int alloc_ds_commits(struct inode *inode, struct list_head *list)
+/* Pull everything off the committing lists and dump into @dst */
+static void filelayout_recover_commit_reqs(struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
- struct pnfs_layout_segment *lseg;
- struct nfs4_filelayout_segment *fl;
- struct nfs_write_data *data;
+ struct pnfs_commit_bucket *b;
+ int i;
+
+ /* NOTE cinfo->lock is NOT held, relying on fact that this is
+ * only called on single thread per dreq.
+ * Can't take the lock because need to do put_lseg
+ */
+ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+ if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+ BUG_ON(!list_empty(&b->written));
+ put_lseg(b->wlseg);
+ b->wlseg = NULL;
+ }
+ }
+ cinfo->ds->nwritten = 0;
+}
+
+static unsigned int
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
+{
+ struct pnfs_ds_commit_info *fl_cinfo;
+ struct pnfs_commit_bucket *bucket;
+ struct nfs_commit_data *data;
int i, j;
+ unsigned int nreq = 0;
- /* Won't need this when non-whole file layout segments are supported
- * instead we will use a pnfs_layout_hdr structure */
- lseg = find_only_write_lseg(inode);
- if (!lseg)
- return 0;
- fl = FILELAYOUT_LSEG(lseg);
- for (i = 0; i < fl->number_of_buckets; i++) {
- if (list_empty(&fl->commit_buckets[i]))
+ fl_cinfo = cinfo->ds;
+ bucket = fl_cinfo->buckets;
+ for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
data = nfs_commitdata_alloc();
if (!data)
- goto out_bad;
+ break;
data->ds_commit_index = i;
- data->lseg = lseg;
+ data->lseg = bucket->clseg;
+ bucket->clseg = NULL;
list_add(&data->pages, list);
+ nreq++;
}
- put_lseg(lseg);
- return 0;
-out_bad:
- for (j = i; j < fl->number_of_buckets; j++) {
- if (list_empty(&fl->commit_buckets[i]))
+ /* Clean up on error */
+ for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
- nfs_retry_commit(&fl->commit_buckets[i], lseg);
- put_lseg(lseg); /* associated with emptying bucket */
+ nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
+ put_lseg(bucket->clseg);
+ bucket->clseg = NULL;
}
- put_lseg(lseg);
/* Caller will clean up entries put on list */
- return -ENOMEM;
+ return nreq;
}
/* This follows nfs_commit_list pretty closely */
static int
filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
- struct nfs_write_data *data, *tmp;
+ struct nfs_commit_data *data, *tmp;
LIST_HEAD(list);
+ unsigned int nreq = 0;
if (!list_empty(mds_pages)) {
data = nfs_commitdata_alloc();
- if (!data)
- goto out_bad;
- data->lseg = NULL;
- list_add(&data->pages, &list);
+ if (data != NULL) {
+ data->lseg = NULL;
+ list_add(&data->pages, &list);
+ nreq++;
+ } else
+ nfs_retry_commit(mds_pages, NULL, cinfo);
}
- if (alloc_ds_commits(inode, &list))
- goto out_bad;
+ nreq += alloc_ds_commits(cinfo, &list);
+
+ if (nreq == 0) {
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ goto out;
+ }
+
+ atomic_add(nreq, &cinfo->mds->rpcs_out);
list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
- atomic_inc(&NFS_I(inode)->commits_outstanding);
if (!data->lseg) {
- nfs_init_commit(data, mds_pages, NULL);
- nfs_initiate_commit(data, NFS_CLIENT(inode),
- data->mds_ops, how);
+ nfs_init_commit(data, mds_pages, NULL, cinfo);
+ nfs_initiate_commit(NFS_CLIENT(inode), data,
+ data->mds_ops, how, 0);
} else {
- nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
+ struct pnfs_commit_bucket *buckets;
+
+ buckets = cinfo->ds->buckets;
+ nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
filelayout_initiate_commit(data, how);
}
}
- return 0;
- out_bad:
- list_for_each_entry_safe(data, tmp, &list, pages) {
- nfs_retry_commit(&data->pages, data->lseg);
- list_del_init(&data->pages);
- nfs_commit_free(data);
- }
- nfs_retry_commit(mds_pages, NULL);
- nfs_commit_clear_lock(NFS_I(inode));
- return -ENOMEM;
+out:
+ cinfo->ds->ncommitting = 0;
+ return PNFS_ATTEMPTED;
}
static void
@@ -915,16 +1260,47 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}
+static struct pnfs_layout_hdr *
+filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+ struct nfs4_filelayout *flo;
+
+ flo = kzalloc(sizeof(*flo), gfp_flags);
+ return &flo->generic_hdr;
+}
+
+static void
+filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ kfree(FILELAYOUT_FROM_HDR(lo));
+}
+
+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+ struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
+
+ if (layout == NULL)
+ return NULL;
+ else
+ return &FILELAYOUT_FROM_HDR(layout)->commit_info;
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
+ .alloc_layout_hdr = filelayout_alloc_layout_hdr,
+ .free_layout_hdr = filelayout_free_layout_hdr,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
- .mark_pnfs_commit = filelayout_mark_pnfs_commit,
- .choose_commit_list = filelayout_choose_commit_list,
+ .get_ds_info = &filelayout_get_ds_info,
+ .mark_request_commit = filelayout_mark_request_commit,
+ .clear_request_commit = filelayout_clear_request_commit,
+ .scan_commit_lists = filelayout_scan_commit_lists,
+ .recover_commit_reqs = filelayout_recover_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284253f..43fe802dd67 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,6 +33,13 @@
#include "pnfs.h"
/*
+ * Default data server connection timeout and retrans vaules.
+ * Set by module paramters dataserver_timeo and dataserver_retrans.
+ */
+#define NFS4_DEF_DS_TIMEO 60
+#define NFS4_DEF_DS_RETRANS 5
+
+/*
* Field testing shows we need to support up to 4096 stripe indices.
* We store each index as a u8 (u32 on the wire) to keep the memory footprint
* reasonable. This in turn means we support a maximum of 256
@@ -41,6 +48,9 @@
#define NFS4_PNFS_MAX_STRIPE_CNT 4096
#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS 12001
+
enum stripetype4 {
STRIPE_SPARSE = 1,
STRIPE_DENSE = 2
@@ -62,12 +72,8 @@ struct nfs4_pnfs_ds {
atomic_t ds_count;
};
-/* nfs4_file_layout_dsaddr flags */
-#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
-
struct nfs4_file_layout_dsaddr {
struct nfs4_deviceid_node id_node;
- unsigned long flags;
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
@@ -84,10 +90,19 @@ struct nfs4_filelayout_segment {
struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
unsigned int num_fh;
struct nfs_fh **fh_array;
- struct list_head *commit_buckets; /* Sort commits to ds */
- int number_of_buckets;
};
+struct nfs4_filelayout {
+ struct pnfs_layout_hdr generic_hdr;
+ struct pnfs_ds_commit_info commit_info;
+};
+
+static inline struct nfs4_filelayout *
+FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
+{
+ return container_of(lo, struct nfs4_filelayout, generic_hdr);
+}
+
static inline struct nfs4_filelayout_segment *
FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
{
@@ -102,6 +117,36 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
}
+static inline void
+filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
+{
+ u32 *p = (u32 *)&node->deviceid;
+
+ printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n",
+ p[0], p[1], p[2], p[3]);
+
+ set_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
+{
+ return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
+}
+
+static inline bool
+filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
+{
+ return test_bit(NFS_DEVICEID_INVALID, &node->flags);
+}
+
+static inline bool
+filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
+{
+ return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
+ filelayout_test_layout_invalid(lseg->pls_layout);
+}
+
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
@@ -114,5 +159,6 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+void nfs4_ds_disconnect(struct nfs_client *clp);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index ed388aae968..a1fab8da7f0 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -30,12 +30,16 @@
#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
+#include <linux/module.h>
#include "internal.h"
#include "nfs4filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
+static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
+static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+
/*
* Data server cache
*
@@ -45,7 +49,7 @@
* - incremented when a device id maps a data server already in the cache.
* - decremented when deviceid is removed from the cache.
*/
-DEFINE_SPINLOCK(nfs4_ds_cache_lock);
+static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
static LIST_HEAD(nfs4_data_server_cache);
/* Debug routines */
@@ -108,58 +112,62 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
return false;
}
+static bool
+_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
+ const struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+
+ /* step through both lists, comparing as we go */
+ for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
+ da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
+ da1 != NULL && da2 != NULL;
+ da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
+ da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
+ if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return false;
+ }
+ if (da1 == NULL && da2 == NULL)
+ return true;
+
+ return false;
+}
+
/*
- * Lookup DS by addresses. The first matching address returns true.
- * nfs4_ds_cache_lock is held
+ * Lookup DS by addresses. nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(struct list_head *dsaddrs)
+_data_server_lookup_locked(const struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
- struct nfs4_pnfs_ds_addr *da1, *da2;
- list_for_each_entry(da1, dsaddrs, da_node) {
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- list_for_each_entry(da2, &ds->ds_addrs, da_node) {
- if (same_sockaddr(
- (struct sockaddr *)&da1->da_addr,
- (struct sockaddr *)&da2->da_addr))
- return ds;
- }
- }
- }
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
+ return ds;
return NULL;
}
/*
- * Compare two lists of addresses.
+ * Lookup DS by nfs_client pointer. Zero data server client pointer
*/
-static bool
-_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
- struct list_head *dsaddrs2)
+void nfs4_ds_disconnect(struct nfs_client *clp)
{
- struct nfs4_pnfs_ds_addr *da1, *da2;
- size_t count1 = 0,
- count2 = 0;
-
- list_for_each_entry(da1, dsaddrs1, da_node)
- count1++;
-
- list_for_each_entry(da2, dsaddrs2, da_node) {
- bool found = false;
- count2++;
- list_for_each_entry(da1, dsaddrs1, da_node) {
- if (same_sockaddr((struct sockaddr *)&da1->da_addr,
- (struct sockaddr *)&da2->da_addr)) {
- found = true;
- break;
- }
+ struct nfs4_pnfs_ds *ds;
+ struct nfs_client *found = NULL;
+
+ dprintk("%s clp %p\n", __func__, clp);
+ spin_lock(&nfs4_ds_cache_lock);
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
+ if (ds->ds_clp && ds->ds_clp == clp) {
+ found = ds->ds_clp;
+ ds->ds_clp = NULL;
}
- if (!found)
- return false;
+ spin_unlock(&nfs4_ds_cache_lock);
+ if (found) {
+ set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
+ nfs_put_client(clp);
}
-
- return (count1 == count2);
}
/*
@@ -183,8 +191,9 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
__func__, ds->ds_remotestr, da->da_remotestr);
clp = nfs4_set_ds_client(mds_srv->nfs_client,
- (struct sockaddr *)&da->da_addr,
- da->da_addrlen, IPPROTO_TCP);
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP,
+ dataserver_timeo, dataserver_retrans);
if (!IS_ERR(clp))
break;
}
@@ -194,28 +203,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out;
}
- if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
- if (!is_ds_client(clp)) {
- status = -ENODEV;
- goto out_put;
- }
- ds->ds_clp = clp;
- dprintk("%s [existing] server=%s\n", __func__,
- ds->ds_remotestr);
- goto out;
- }
-
- /*
- * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
- * be equal to the MDS lease. Renewal is scheduled in create_session.
- */
- spin_lock(&mds_srv->nfs_client->cl_lock);
- clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
- spin_unlock(&mds_srv->nfs_client->cl_lock);
- clp->cl_last_renewal = jiffies;
-
- /* New nfs_client */
- status = nfs4_init_ds_session(clp);
+ status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
if (status)
goto out_put;
@@ -356,11 +344,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
dprintk("%s add new data server %s\n", __func__,
ds->ds_remotestr);
} else {
- if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
- dsaddrs)) {
- dprintk("%s: multipath address mismatch: %s != %s",
- __func__, tmp_ds->ds_remotestr, remotestr);
- }
kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
@@ -378,11 +361,11 @@ out:
* Currently only supports ipv4, ipv6 and one multi-path address.
*/
static struct nfs4_pnfs_ds_addr *
-decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
+decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr;
- u32 port;
+ __be16 port;
int nlen, rlen;
int tmp[2];
__be32 *p;
@@ -457,7 +440,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
INIT_LIST_HEAD(&da->da_node);
- if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+ if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
sizeof(da->da_addr))) {
dprintk("%s: error parsing address %s\n", __func__, buf);
goto out_free_da;
@@ -554,7 +537,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
cnt = be32_to_cpup(p);
dprintk("%s stripe count %d\n", __func__, cnt);
if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
- printk(KERN_WARNING "%s: stripe count %d greater than "
+ printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
"supported maximum %d\n", __func__,
cnt, NFS4_PNFS_MAX_STRIPE_CNT);
goto out_err_free_scratch;
@@ -585,7 +568,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
num = be32_to_cpup(p);
dprintk("%s ds_num %u\n", __func__, num);
if (num > NFS4_PNFS_MAX_MULTI_CNT) {
- printk(KERN_WARNING "%s: multipath count %d greater than "
+ printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
"supported maximum %d\n", __func__,
num, NFS4_PNFS_MAX_MULTI_CNT);
goto out_err_free_stripe_indices;
@@ -593,7 +576,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
/* validate stripe indices are all < num */
if (max_stripe_index >= num) {
- printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
+ printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
__func__, max_stripe_index, num);
goto out_err_free_stripe_indices;
}
@@ -625,7 +608,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
mp_count = be32_to_cpup(p); /* multipath count */
for (j = 0; j < mp_count; j++) {
- da = decode_ds_addr(&stream, gfp_flags);
+ da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net,
+ &stream, gfp_flags);
if (da)
list_add_tail(&da->da_node, &dsaddrs);
}
@@ -686,7 +670,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
new = decode_device(inode, dev, gfp_flags);
if (!new) {
- printk(KERN_WARNING "%s: Could not decode or add device\n",
+ printk(KERN_WARNING "NFS: %s: Could not decode or add device\n",
__func__);
return NULL;
}
@@ -721,7 +705,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
__func__, inode, max_resp_sz, max_pages);
@@ -813,48 +797,42 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
return flseg->fh_array[i];
}
-static void
-filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
- int err, const char *ds_remotestr)
-{
- u32 *p = (u32 *)&dsaddr->id_node.deviceid;
-
- printk(KERN_ERR "NFS: data server %s connection error %d."
- " Deviceid [%x%x%x%x] marked out of use.\n",
- ds_remotestr, err, p[0], p[1], p[2], p[3]);
-
- spin_lock(&nfs4_ds_cache_lock);
- dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
- spin_unlock(&nfs4_ds_cache_lock);
-}
-
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
+ struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
+
+ if (filelayout_test_devid_invalid(devid))
+ return NULL;
if (ds == NULL) {
- printk(KERN_ERR "%s: No data server for offset index %d\n",
+ printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
- return NULL;
+ goto mark_dev_invalid;
}
if (!ds->ds_clp) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
- if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
- /* Already tried to connect, don't try again */
- dprintk("%s Deviceid marked out of use\n", __func__);
- return NULL;
- }
err = nfs4_ds_connect(s, ds);
- if (err) {
- filelayout_mark_devid_negative(dsaddr, err,
- ds->ds_remotestr);
- return NULL;
- }
+ if (err)
+ goto mark_dev_invalid;
}
return ds;
+
+mark_dev_invalid:
+ filelayout_mark_devid_invalid(devid);
+ return NULL;
}
+
+module_param(dataserver_retrans, uint, 0644);
+MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
+ "retries a request before it attempts further "
+ " recovery action.");
+module_param(dataserver_timeo, uint, 0644);
+MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
+ "NFSv4.1 client waits for a response from a "
+ " data server before it retries an NFS request.");
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index bb80c49b653..017b4b01a69 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -52,6 +52,30 @@ Elong:
}
/*
+ * return the path component of "<server>:<path>"
+ * nfspath - the "<server>:<path>" string
+ * end - one past the last char that could contain "<server>:"
+ * returns NULL on failure
+ */
+static char *nfs_path_component(const char *nfspath, const char *end)
+{
+ char *p;
+
+ if (*nfspath == '[') {
+ /* parse [] escaped IPv6 addrs */
+ p = strchr(nfspath, ']');
+ if (p != NULL && ++p < end && *p == ':')
+ return p + 1;
+ } else {
+ /* otherwise split on first colon */
+ p = strchr(nfspath, ':');
+ if (p != NULL && p < end)
+ return p + 1;
+ }
+ return NULL;
+}
+
+/*
* Determine the mount path as a string
*/
static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
@@ -59,9 +83,9 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
char *limit;
char *path = nfs_path(&limit, dentry, buffer, buflen);
if (!IS_ERR(path)) {
- char *colon = strchr(path, ':');
- if (colon && colon < limit)
- path = colon + 1;
+ char *path_component = nfs_path_component(path, limit);
+ if (path_component)
+ return path_component;
}
return path;
}
@@ -94,19 +118,101 @@ static int nfs4_validate_fspath(struct dentry *dentry,
}
static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen)
+ struct sockaddr *sa, size_t salen, struct nfs_server *server)
{
+ struct net *net = rpc_net_ns(server->client);
ssize_t ret;
- ret = rpc_pton(string, len, sa, salen);
+ ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(string, len, sa, salen);
+ ret = nfs_dns_resolve_name(net, string, len, sa, salen);
if (ret < 0)
ret = 0;
}
return ret;
}
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+{
+ struct gss_api_mech *mech;
+ struct xdr_netobj oid;
+ int i;
+ rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+
+ for (i = 0; i < flavors->num_flavors; i++) {
+ struct nfs4_secinfo_flavor *flavor;
+ flavor = &flavors->flavors[i];
+
+ if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
+ pseudoflavor = flavor->flavor;
+ break;
+ } else if (flavor->flavor == RPC_AUTH_GSS) {
+ oid.len = flavor->gss.sec_oid4.len;
+ oid.data = flavor->gss.sec_oid4.data;
+ mech = gss_mech_get_by_OID(&oid);
+ if (!mech)
+ continue;
+ pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
+ gss_mech_put(mech);
+ break;
+ }
+ }
+
+ return pseudoflavor;
+}
+
+static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+{
+ struct page *page;
+ struct nfs4_secinfo_flavors *flavors;
+ rpc_authflavor_t flavor;
+ int err;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ flavors = page_address(page);
+
+ err = nfs4_proc_secinfo(inode, name, flavors);
+ if (err < 0) {
+ flavor = err;
+ goto out;
+ }
+
+ flavor = nfs_find_best_sec(flavors);
+
+out:
+ put_page(page);
+ return flavor;
+}
+
+/*
+ * Please call rpc_shutdown_client() when you are done with this client.
+ */
+struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
+ struct qstr *name)
+{
+ struct rpc_clnt *clone;
+ struct rpc_auth *auth;
+ rpc_authflavor_t flavor;
+
+ flavor = nfs4_negotiate_security(inode, name);
+ if ((int)flavor < 0)
+ return ERR_PTR(flavor);
+
+ clone = rpc_clone_client(clnt);
+ if (IS_ERR(clone))
+ return clone;
+
+ auth = rpcauth_create(flavor, clone);
+ if (!auth) {
+ rpc_shutdown_client(clone);
+ clone = ERR_PTR(-EIO);
+ }
+
+ return clone;
+}
+
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
char *page, char *page2,
const struct nfs4_fs_location *location)
@@ -137,7 +243,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
continue;
mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
- mountdata->addr, addr_bufsize);
+ mountdata->addr, addr_bufsize,
+ NFS_SB(mountdata->sb));
if (mountdata->addrlen == 0)
continue;
@@ -222,7 +329,7 @@ out:
* @dentry - dentry of referral
*
*/
-struct vfsmount *nfs_do_refmount(struct dentry *dentry)
+static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
{
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
@@ -248,7 +355,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry)
dprintk("%s: getting locations for %s/%s\n",
__func__, parent->d_name.name, dentry->d_name.name);
- err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page);
+ err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
if (err != 0 ||
fs_locations->nlocations <= 0 ||
@@ -263,3 +370,25 @@ out:
dprintk("%s: done\n", __func__);
return mnt;
}
+
+struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
+ struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+ struct dentry *parent = dget_parent(dentry);
+ struct rpc_clnt *client;
+ struct vfsmount *mnt;
+
+ /* Look it up again to get its attributes and sec flavor */
+ client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+ dput(parent);
+ if (IS_ERR(client))
+ return ERR_CAST(client);
+
+ if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ mnt = nfs_do_refmount(client, dentry);
+ else
+ mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+
+ rpc_shutdown_client(client);
+ return mnt;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d9f4d78c341..15fc7e4664e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -52,9 +52,11 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/module.h>
+#include <linux/nfs_idmap.h>
#include <linux/sunrpc/bc_xprt.h>
#include <linux/xattr.h>
#include <linux/utsname.h>
+#include <linux/freezer.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -62,6 +64,7 @@
#include "iostat.h"
#include "callback.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -70,18 +73,22 @@
#define NFS4_MAX_LOOP_ON_RECOVER (10)
+static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
+
struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
+static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs4_state *state);
#ifdef CONFIG_NFS_V4_1
-static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
-static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *);
+static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *);
#endif
/* Prevent leaks of NFSv4 errors into userland */
static int nfs4_map_errors(int err)
@@ -96,6 +103,10 @@ static int nfs4_map_errors(int err)
case -NFS4ERR_BADOWNER:
case -NFS4ERR_BADNAME:
return -EINVAL;
+ case -NFS4ERR_SHARE_DENIED:
+ return -EACCES;
+ case -NFS4ERR_MINOR_VERS_MISMATCH:
+ return -EPROTONOSUPPORT;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -107,7 +118,7 @@ static int nfs4_map_errors(int err)
/*
* This is our standard bitmap for GETATTR requests.
*/
-const u32 nfs4_fattr_bitmap[2] = {
+const u32 nfs4_fattr_bitmap[3] = {
FATTR4_WORD0_TYPE
| FATTR4_WORD0_CHANGE
| FATTR4_WORD0_SIZE
@@ -124,6 +135,24 @@ const u32 nfs4_fattr_bitmap[2] = {
| FATTR4_WORD1_TIME_MODIFY
};
+static const u32 nfs4_pnfs_open_bitmap[3] = {
+ FATTR4_WORD0_TYPE
+ | FATTR4_WORD0_CHANGE
+ | FATTR4_WORD0_SIZE
+ | FATTR4_WORD0_FSID
+ | FATTR4_WORD0_FILEID,
+ FATTR4_WORD1_MODE
+ | FATTR4_WORD1_NUMLINKS
+ | FATTR4_WORD1_OWNER
+ | FATTR4_WORD1_OWNER_GROUP
+ | FATTR4_WORD1_RAWDEV
+ | FATTR4_WORD1_SPACE_USED
+ | FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_METADATA
+ | FATTR4_WORD1_TIME_MODIFY,
+ FATTR4_WORD2_MDSTHRESHOLD
+};
+
const u32 nfs4_statfs_bitmap[2] = {
FATTR4_WORD0_FILES_AVAIL
| FATTR4_WORD0_FILES_FREE
@@ -191,7 +220,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
* when talking to the server, we always send cookie 0
* instead of 1 or 2.
*/
- start = p = kmap_atomic(*readdir->pages, KM_USER0);
+ start = p = kmap_atomic(*readdir->pages);
if (cookie == 0) {
*p++ = xdr_one; /* next */
@@ -219,7 +248,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
readdir->pgbase = (char *)p - (char *)start;
readdir->count -= readdir->pgbase;
- kunmap_atomic(start, KM_USER0);
+ kunmap_atomic(start);
}
static int nfs4_wait_clnt_recover(struct nfs_client *clp)
@@ -243,7 +272,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MIN;
if (*timeout > NFS4_POLL_RETRY_MAX)
*timeout = NFS4_POLL_RETRY_MAX;
- schedule_timeout_killable(*timeout);
+ freezable_schedule_timeout_killable(*timeout);
if (fatal_signal_pending(current))
res = -ERESTARTSYS;
*timeout <<= 1;
@@ -257,17 +286,29 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
+ struct inode *inode = exception->inode;
int ret = errorcode;
exception->retry = 0;
switch(errorcode) {
case 0:
return 0;
+ case -NFS4ERR_OPENMODE:
+ if (inode && nfs_have_delegation(inode, FMODE_READ)) {
+ nfs_inode_return_delegation(inode);
+ exception->retry = 1;
+ return 0;
+ }
+ if (state == NULL)
+ break;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
+ nfs_remove_bad_delegation(state->inode);
nfs4_schedule_stateid_recovery(server, state);
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
@@ -287,7 +328,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, errorcode);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -358,17 +399,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
* When updating highest_used_slotid there may be "holes" in the bitmap
* so we need to scan down from highest_used_slotid to 0 looking for the now
* highest slotid in use.
- * If none found, highest_used_slotid is set to -1.
+ * If none found, highest_used_slotid is set to NFS4_NO_SLOT.
*
* Must be called while holding tbl->slot_tbl_lock
*/
static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
+nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid)
{
- int free_slotid = free_slot - tbl->slots;
- int slotid = free_slotid;
-
- BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
+ BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE);
/* clear used bit in bitmap */
__clear_bit(slotid, tbl->used_slots);
@@ -378,10 +416,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
if (slotid < tbl->max_slots)
tbl->highest_used_slotid = slotid;
else
- tbl->highest_used_slotid = -1;
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
}
- dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__,
- free_slotid, tbl->highest_used_slotid);
+ dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
+ slotid, tbl->highest_used_slotid);
+}
+
+bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy)
+{
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ return true;
}
/*
@@ -389,16 +433,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
*/
static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
{
- struct rpc_task *task;
-
if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
- task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
- if (task)
- rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq,
+ nfs4_set_task_privileged, NULL);
return;
}
- if (ses->fc_slot_table.highest_used_slotid != -1)
+ if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
return;
dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__);
@@ -411,7 +452,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses)
void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
{
if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) ||
- ses->bc_slot_table.highest_used_slotid != -1)
+ ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT)
return;
dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__);
complete(&ses->bc_slot_table.complete);
@@ -430,7 +471,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
}
spin_lock(&tbl->slot_tbl_lock);
- nfs4_free_slot(tbl, res->sr_slot);
+ nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
nfs4_check_drain_fc_complete(res->sr_session);
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slot = NULL;
@@ -506,25 +547,25 @@ static int nfs4_sequence_done(struct rpc_task *task,
* nfs4_find_slot looks for an unset bit in the used_slots bitmap.
* If found, we mark the slot as used, update the highest_used_slotid,
* and respectively set up the sequence operation args.
- * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise.
+ * The slot number is returned if found, or NFS4_NO_SLOT otherwise.
*
* Note: must be called with under the slot_tbl_lock.
*/
-static u8
+static u32
nfs4_find_slot(struct nfs4_slot_table *tbl)
{
- int slotid;
- u8 ret_id = NFS4_MAX_SLOT_TABLE;
- BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
+ u32 slotid;
+ u32 ret_id = NFS4_NO_SLOT;
- dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n",
+ dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
tbl->max_slots);
slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
if (slotid >= tbl->max_slots)
goto out;
__set_bit(slotid, tbl->used_slots);
- if (slotid > tbl->highest_used_slotid)
+ if (slotid > tbl->highest_used_slotid ||
+ tbl->highest_used_slotid == NFS4_NO_SLOT)
tbl->highest_used_slotid = slotid;
ret_id = slotid;
out:
@@ -533,15 +574,25 @@ out:
return ret_id;
}
+static void nfs41_init_sequence(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res, int cache_reply)
+{
+ args->sa_session = NULL;
+ args->sa_cache_this = 0;
+ if (cache_reply)
+ args->sa_cache_this = 1;
+ res->sr_session = NULL;
+ res->sr_slot = NULL;
+}
+
int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
- int cache_reply,
struct rpc_task *task)
{
struct nfs4_slot *slot;
struct nfs4_slot_table *tbl;
- u8 slotid;
+ u32 slotid;
dprintk("--> %s\n", __func__);
/* slot already allocated? */
@@ -553,13 +604,10 @@ int nfs41_setup_sequence(struct nfs4_session *session,
spin_lock(&tbl->slot_tbl_lock);
if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
!rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
- /*
- * The state manager will wait until the slot table is empty.
- * Schedule the reset thread
- */
+ /* The state manager will wait until the slot table is empty */
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
spin_unlock(&tbl->slot_tbl_lock);
- dprintk("%s Schedule Session Reset\n", __func__);
+ dprintk("%s session is draining\n", __func__);
return -EAGAIN;
}
@@ -572,7 +620,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
}
slotid = nfs4_find_slot(tbl);
- if (slotid == NFS4_MAX_SLOT_TABLE) {
+ if (slotid == NFS4_NO_SLOT) {
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
spin_unlock(&tbl->slot_tbl_lock);
dprintk("<-- %s: no free slots\n", __func__);
@@ -584,7 +632,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
slot = tbl->slots + slotid;
args->sa_session = session;
args->sa_slotid = slotid;
- args->sa_cache_this = cache_reply;
dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
@@ -604,24 +651,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
- int cache_reply,
struct rpc_task *task)
{
struct nfs4_session *session = nfs4_get_session(server);
int ret = 0;
- if (session == NULL) {
- args->sa_session = NULL;
- res->sr_session = NULL;
+ if (session == NULL)
goto out;
- }
dprintk("--> %s clp %p session %p sr_slot %td\n",
__func__, session->clp, session, res->sr_slot ?
res->sr_slot - session->fc_slot_table.slots : -1);
- ret = nfs41_setup_sequence(session, args, res, cache_reply,
- task);
+ ret = nfs41_setup_sequence(session, args, res, task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
@@ -631,7 +673,6 @@ struct nfs41_call_sync_data {
const struct nfs_server *seq_server;
struct nfs4_sequence_args *seq_args;
struct nfs4_sequence_res *seq_res;
- int cache_reply;
};
static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
@@ -641,7 +682,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
if (nfs4_setup_sequence(data->seq_server, data->seq_args,
- data->seq_res, data->cache_reply, task))
+ data->seq_res, task))
return;
rpc_call_start(task);
}
@@ -659,12 +700,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
nfs41_sequence_done(task, data->seq_res);
}
-struct rpc_call_ops nfs41_call_sync_ops = {
+static const struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_prepare = nfs41_call_sync_prepare,
.rpc_call_done = nfs41_call_sync_done,
};
-struct rpc_call_ops nfs41_call_priv_sync_ops = {
+static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
.rpc_call_prepare = nfs41_call_priv_sync_prepare,
.rpc_call_done = nfs41_call_sync_done,
};
@@ -674,7 +715,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
struct rpc_message *msg,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
- int cache_reply,
int privileged)
{
int ret;
@@ -683,7 +723,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
.seq_server = server,
.seq_args = args,
.seq_res = res,
- .cache_reply = cache_reply,
};
struct rpc_task_setup task_setup = {
.rpc_client = clnt,
@@ -692,7 +731,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
.callback_data = &data
};
- res->sr_slot = NULL;
if (privileged)
task_setup.callback_ops = &nfs41_call_priv_sync_ops;
task = rpc_run_task(&task_setup);
@@ -712,10 +750,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
struct nfs4_sequence_res *res,
int cache_reply)
{
- return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
+ nfs41_init_sequence(args, res, cache_reply);
+ return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
}
#else
+static inline
+void nfs41_init_sequence(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res, int cache_reply)
+{
+}
+
static int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -730,7 +775,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs4_sequence_res *res,
int cache_reply)
{
- args->sa_session = res->sr_session = NULL;
+ nfs41_init_sequence(args, res, cache_reply);
return rpc_call_sync(clnt, msg, 0);
}
@@ -751,7 +796,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
struct nfs_inode *nfsi = NFS_I(dir);
spin_lock(&dir->i_lock);
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
if (!cinfo->atomic || cinfo->before != dir->i_version)
nfs_force_lookup_revalidate(dir);
dir->i_version = cinfo->after;
@@ -764,8 +809,9 @@ struct nfs4_opendata {
struct nfs_openres o_res;
struct nfs_open_confirmargs c_arg;
struct nfs_open_confirmres c_res;
+ struct nfs4_string owner_name;
+ struct nfs4_string group_name;
struct nfs_fattr f_attr;
- struct nfs_fattr dir_attr;
struct dentry *dir;
struct dentry *dentry;
struct nfs4_state_owner *owner;
@@ -781,12 +827,11 @@ struct nfs4_opendata {
static void nfs4_init_opendata_res(struct nfs4_opendata *p)
{
p->o_res.f_attr = &p->f_attr;
- p->o_res.dir_attr = &p->dir_attr;
p->o_res.seqid = p->o_arg.seqid;
p->c_res.seqid = p->c_arg.seqid;
p->o_res.server = p->o_arg.server;
nfs_fattr_init(&p->f_attr);
- nfs_fattr_init(&p->dir_attr);
+ nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
}
static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
@@ -814,19 +859,23 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.open_flags = flags;
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
p->o_arg.clientid = server->nfs_client->cl_clientid;
- p->o_arg.id = sp->so_owner_id.id;
+ p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
+ p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
p->o_arg.bitmask = server->attr_bitmask;
+ p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
- if (flags & O_CREAT) {
- u32 *s;
+ if (attrs != NULL && attrs->ia_valid != 0) {
+ __be32 verf[2];
p->o_arg.u.attrs = &p->attrs;
memcpy(&p->attrs, attrs, sizeof(p->attrs));
- s = (u32 *) p->o_arg.u.verifier.data;
- s[0] = jiffies;
- s[1] = current->pid;
+
+ verf[0] = jiffies;
+ verf[1] = current->pid;
+ memcpy(p->o_arg.u.verifier.data, verf,
+ sizeof(p->o_arg.u.verifier.data));
}
p->c_arg.fh = &p->o_res.fh;
p->c_arg.stateid = &p->o_res.stateid;
@@ -854,6 +903,7 @@ static void nfs4_opendata_free(struct kref *kref)
dput(p->dir);
dput(p->dentry);
nfs_sb_deactive(sb);
+ nfs_fattr_free_names(&p->f_attr);
kfree(p);
}
@@ -875,7 +925,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode
{
int ret = 0;
- if (open_mode & O_EXCL)
+ if (open_mode & (O_EXCL|O_TRUNC))
goto out;
switch (mode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
@@ -924,8 +974,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
{
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
- memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -953,7 +1003,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
*/
write_seqlock(&state->seqlock);
if (deleg_stateid != NULL) {
- memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+ nfs4_stateid_copy(&state->stateid, deleg_stateid);
set_bit(NFS_DELEGATED_STATE, &state->flags);
}
if (open_stateid != NULL)
@@ -984,7 +1034,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
if (delegation == NULL)
delegation = &deleg_cur->stateid;
- else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
+ else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
@@ -1023,7 +1073,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
struct nfs4_state *state = opendata->state;
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_delegation *delegation;
- int open_mode = opendata->o_arg.open_flags & O_EXCL;
+ int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
fmode_t fmode = opendata->o_arg.fmode;
nfs4_stateid stateid;
int ret = -EAGAIN;
@@ -1045,7 +1095,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
break;
}
/* Save the delegation */
- memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
rcu_read_unlock();
ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
if (ret != 0)
@@ -1087,6 +1137,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
if (state == NULL)
goto err_put_inode;
if (data->o_res.delegation_type != 0) {
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
int delegation_flags = 0;
rcu_read_lock();
@@ -1098,7 +1149,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
"returning a delegation for "
"OPEN(CLAIM_DELEGATE_CUR)\n",
- NFS_CLIENT(inode)->cl_server);
+ clp->cl_hostname);
} else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
nfs_inode_set_delegation(state->inode,
data->owner->so_cred,
@@ -1207,10 +1258,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
* Check if we need to update the current stateid.
*/
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
- memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+ !nfs4_stateid_match(&state->stateid, &state->open_stateid)) {
write_seqlock(&state->seqlock);
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
write_sequnlock(&state->seqlock);
}
return 0;
@@ -1279,8 +1330,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
if (IS_ERR(opendata))
return PTR_ERR(opendata);
opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
- memcpy(opendata->o_arg.u.delegation.data, stateid->data,
- sizeof(opendata->o_arg.u.delegation.data));
+ nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
ret = nfs4_open_recover(opendata, state);
nfs4_opendata_put(opendata);
return ret;
@@ -1303,7 +1353,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
@@ -1316,8 +1366,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
* The show must go on: exit, but mark the
* stateid as needing recovery.
*/
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ nfs_inode_find_state_and_recover(state->inode,
+ stateid);
nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
@@ -1342,8 +1395,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
if (data->rpc_status == 0) {
- memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
- sizeof(data->o_res.stateid.data));
+ nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
@@ -1436,8 +1488,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
goto unlock_no_action;
rcu_read_unlock();
}
- /* Update sequence id. */
- data->o_arg.id = sp->so_owner_id.id;
+ /* Update client id. */
data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
@@ -1446,7 +1497,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->o_arg.server,
&data->o_arg.seq_args,
- &data->o_res.seq_res, 1, task))
+ &data->o_res.seq_res, task))
return;
rpc_call_start(task);
return;
@@ -1548,6 +1599,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
};
int status;
+ nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
@@ -1578,7 +1630,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
if (status != 0 || !data->rpc_done)
return status;
- nfs_refresh_inode(dir, o_res->dir_attr);
+ nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
status = _nfs4_proc_open_confirm(data);
@@ -1610,11 +1662,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return status;
}
- if (o_arg->open_flags & O_CREAT) {
+ nfs_fattr_map_and_free_names(server, &data->f_attr);
+
+ if (o_arg->open_flags & O_CREAT)
update_changeattr(dir, &o_res->cinfo);
- nfs_post_op_update_inode(dir, o_res->dir_attr);
- } else
- nfs_refresh_inode(dir, o_res->dir_attr);
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1705,15 +1756,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
}
#if defined(CONFIG_NFS_V4_1)
-static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags)
{
- int status;
+ int status = NFS_OK;
struct nfs_server *server = NFS_SERVER(state->inode);
- status = nfs41_test_stateid(server, state);
- if (status == NFS_OK)
- return 0;
- nfs41_free_stateid(server, state);
+ if (state->flags & flags) {
+ status = nfs41_test_stateid(server, stateid);
+ if (status != NFS_OK) {
+ nfs41_free_stateid(server, stateid);
+ state->flags &= ~flags;
+ }
+ }
+ return status;
+}
+
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ int deleg_status, open_status;
+ int deleg_flags = 1 << NFS_DELEGATED_STATE;
+ int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE);
+
+ deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags);
+ open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags);
+
+ if ((deleg_status == NFS_OK) && (open_status == NFS_OK))
+ return NFS_OK;
return nfs4_open_expired(sp, state);
}
#endif
@@ -1737,7 +1805,14 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
/*
* Returns a referenced nfs4_state
*/
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_state **res,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
@@ -1747,7 +1822,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
/* Protect against reboot recovery conflicts */
status = -ENOMEM;
- if (!(sp = nfs4_get_state_owner(server, cred))) {
+ sp = nfs4_get_state_owner(server, cred, GFP_KERNEL);
+ if (sp == NULL) {
dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
goto out_err;
}
@@ -1761,6 +1837,12 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
if (opendata == NULL)
goto err_put_state_owner;
+ if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+ if (!opendata->f_attr.mdsthreshold)
+ goto err_opendata_put;
+ opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
+ }
if (dentry->d_inode != NULL)
opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
@@ -1786,11 +1868,19 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode
nfs_setattr_update_inode(state->inode, sattr);
nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
}
+
+ if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+ *ctx_th = opendata->f_attr.mdsthreshold;
+ else
+ kfree(opendata->f_attr.mdsthreshold);
+ opendata->f_attr.mdsthreshold = NULL;
+
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
*res = state;
return 0;
err_opendata_put:
+ kfree(opendata->f_attr.mdsthreshold);
nfs4_opendata_put(opendata);
err_put_state_owner:
nfs4_put_state_owner(sp);
@@ -1800,14 +1890,22 @@ out_err:
}
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir,
+ struct dentry *dentry,
+ fmode_t fmode,
+ int flags,
+ struct iattr *sattr,
+ struct rpc_cred *cred,
+ struct nfs4_threshold **ctx_th)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
+ fmode &= FMODE_READ|FMODE_WRITE;
do {
- status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
+ status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
+ &res, ctx_th);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1822,7 +1920,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
* the user though...
*/
if (status == -NFS4ERR_BAD_SEQID) {
- printk(KERN_WARNING "NFS: v4 server %s "
+ pr_warn_ratelimited("NFS: v4 server %s "
" returned a bad sequence-id error!\n",
NFS_SERVER(dir)->nfs_client->cl_hostname);
exception.retry = 1;
@@ -1875,12 +1973,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
nfs_fattr_init(fattr);
- if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ if (state != NULL) {
+ nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
+ current->files, current->tgid);
+ } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
+ FMODE_WRITE)) {
/* Use that stateid */
- } else if (state != NULL) {
- nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
} else
- memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ nfs4_stateid_copy(&arg.stateid, &zero_stateid);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (status == 0 && state != NULL)
@@ -1893,13 +1993,25 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .state = state,
+ .inode = inode,
+ };
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_do_setattr(inode, cred, fattr, sattr, state),
- &exception);
+ err = _nfs4_do_setattr(inode, cred, fattr, sattr, state);
+ switch (err) {
+ case -NFS4ERR_OPENMODE:
+ if (state && !(state->state & FMODE_WRITE)) {
+ err = -EBADF;
+ if (sattr->ia_valid & ATTR_OPEN)
+ err = -EACCES;
+ goto out;
+ }
+ }
+ err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+out:
return err;
}
@@ -1947,6 +2059,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
struct nfs4_state *state = calldata->state;
struct nfs_server *server = NFS_SERVER(calldata->inode);
+ dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return;
/* hmm. we are done with the inode, and in the process of freeing
@@ -1974,6 +2087,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
}
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
+ dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
}
static void nfs4_close_prepare(struct rpc_task *task, void *data)
@@ -1982,6 +2096,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_state *state = calldata->state;
int call_close = 0;
+ dprintk("%s: begin!\n", __func__);
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
return;
@@ -2006,7 +2121,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (!call_close) {
/* Note: exit _without_ calling nfs4_close_done */
task->tk_action = NULL;
- return;
+ goto out;
}
if (calldata->arg.fmode == 0) {
@@ -2015,17 +2130,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
task, NULL);
- return;
+ goto out;
}
}
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
- &calldata->arg.seq_args, &calldata->res.seq_res,
- 1, task))
- return;
+ &calldata->arg.seq_args,
+ &calldata->res.seq_res,
+ task))
+ goto out;
rpc_call_start(task);
+out:
+ dprintk("%s: done!\n", __func__);
}
static const struct rpc_call_ops nfs4_close_ops = {
@@ -2067,6 +2185,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
calldata = kzalloc(sizeof(*calldata), gfp_mask);
if (calldata == NULL)
goto out;
+ nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
calldata->inode = state->inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
@@ -2110,7 +2229,8 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
struct nfs4_state *state;
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
+ state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
+ ctx->cred, &ctx->mdsthreshold);
if (IS_ERR(state))
return ERR_CAST(state);
ctx->state = state;
@@ -2175,6 +2295,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
server->acl_bitmask = res.acl_bitmask;
+ server->fh_expire_type = res.fh_expire_type;
}
return status;
@@ -2223,11 +2344,12 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- break;
+ goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
}
} while (exception.retry);
+out:
return err;
}
@@ -2278,8 +2400,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
/*
* get the file handle for the "/" directory on the server
*/
-static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
{
int minor_version = server->nfs_client->cl_minorversion;
int status = nfs4_lookup_root(server, fhandle, info);
@@ -2296,14 +2418,39 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
return nfs4_map_errors(status);
}
-static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
+ struct nfs_fsinfo *info)
+{
+ int error;
+ struct nfs_fattr *fattr = info->fattr;
+
+ error = nfs4_server_capabilities(server, mntfh);
+ if (error < 0) {
+ dprintk("nfs4_get_root: getcaps error = %d\n", -error);
+ return error;
+ }
+
+ error = nfs4_proc_getattr(server, mntfh, fattr);
+ if (error < 0) {
+ dprintk("nfs4_get_root: getattr error = %d\n", -error);
+ return error;
+ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_FSID &&
+ !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+ memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+
+ return error;
+}
+
/*
* Get locations and (maybe) other attributes of a referral.
* Note that we'll actually follow the referral later when
* we detect fsid mismatch in inode revalidation
*/
-static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
- struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name, struct nfs_fattr *fattr,
+ struct nfs_fh *fhandle)
{
int status = -ENOMEM;
struct page *page = NULL;
@@ -2316,7 +2463,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
if (locations == NULL)
goto out;
- status = nfs4_proc_fs_locations(dir, name, locations, page);
+ status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
/* Make sure server returned a different fsid for the referral */
@@ -2402,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
+ /* Deal with open(O_TRUNC) */
+ if (sattr->ia_valid & ATTR_OPEN)
+ sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+
+ /* Optimization: if the end result is no change, don't RPC */
+ if ((sattr->ia_valid & ~(ATTR_FILE)) == 0)
+ return 0;
+
/* Search for an existing open(O_WRITE) file */
if (sattr->ia_valid & ATTR_FILE) {
struct nfs_open_context *ctx;
@@ -2449,45 +2604,90 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
return status;
}
-void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
+static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
{
- memset(fh, 0, sizeof(struct nfs_fh));
- fattr->fsid.major = 1;
fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
- NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
+ NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_MOUNTPOINT;
fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
fattr->nlink = 2;
}
-static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
+ struct qstr *name, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
{
struct nfs4_exception exception = { };
+ struct rpc_clnt *client = *clnt;
int err;
do {
- int status;
-
- status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr);
- switch (status) {
+ err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr);
+ switch (err) {
case -NFS4ERR_BADNAME:
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case -NFS4ERR_MOVED:
- return nfs4_get_referral(dir, name, fattr, fhandle);
+ err = nfs4_get_referral(client, dir, name, fattr, fhandle);
+ goto out;
case -NFS4ERR_WRONGSEC:
- nfs_fixup_secinfo_attributes(fattr, fhandle);
+ err = -EPERM;
+ if (client != *clnt)
+ goto out;
+
+ client = nfs4_create_sec_client(client, dir, name);
+ if (IS_ERR(client))
+ return PTR_ERR(client);
+
+ exception.retry = 1;
+ break;
+ default:
+ err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
}
- err = nfs4_handle_exception(NFS_SERVER(dir),
- status, &exception);
} while (exception.retry);
+
+out:
+ if (err == 0)
+ *clnt = client;
+ else if (client != *clnt)
+ rpc_shutdown_client(client);
+
return err;
}
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int status;
+ struct rpc_clnt *client = NFS_CLIENT(dir);
+
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
+ if (client != NFS_CLIENT(dir)) {
+ rpc_shutdown_client(client);
+ nfs_fixup_secinfo_attributes(fattr);
+ }
+ return status;
+}
+
+struct rpc_clnt *
+nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int status;
+ struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
+
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
+ if (status < 0) {
+ rpc_shutdown_client(client);
+ return ERR_PTR(status);
+ }
+ return client;
+}
+
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_accessargs args = {
.fh = NFS_FH(inode),
- .bitmask = server->attr_bitmask,
+ .bitmask = server->cache_consistency_bitmask,
};
struct nfs4_accessres res = {
.server = server,
@@ -2636,7 +2836,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
fmode = ctx->mode;
}
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
+ state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -2657,9 +2857,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_removeargs args = {
.fh = NFS_FH(dir),
- .name.len = name->len,
- .name.name = name->name,
- .bitmask = server->attr_bitmask,
+ .name = *name,
};
struct nfs_removeres res = {
.server = server,
@@ -2669,19 +2867,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
.rpc_argp = &args,
.rpc_resp = &res,
};
- int status = -ENOMEM;
-
- res.dir_attr = nfs_alloc_fattr();
- if (res.dir_attr == NULL)
- goto out;
+ int status;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
- if (status == 0) {
+ if (status == 0)
update_changeattr(dir, &res.cinfo);
- nfs_post_op_update_inode(dir, res.dir_attr);
- }
- nfs_free_fattr(res.dir_attr);
-out:
return status;
}
@@ -2703,10 +2893,19 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
struct nfs_removeargs *args = msg->rpc_argp;
struct nfs_removeres *res = msg->rpc_resp;
- args->bitmask = server->cache_consistency_bitmask;
res->server = server;
- res->seq_res.sr_slot = NULL;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
+ nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
+}
+
+static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
+{
+ if (nfs4_setup_sequence(NFS_SERVER(data->dir),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
}
static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
@@ -2718,7 +2917,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
return 0;
update_changeattr(dir, &res->cinfo);
- nfs_post_op_update_inode(dir, res->dir_attr);
return 1;
}
@@ -2729,8 +2927,18 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
struct nfs_renameres *res = msg->rpc_resp;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
- arg->bitmask = server->attr_bitmask;
res->server = server;
+ nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
+}
+
+static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ if (nfs4_setup_sequence(NFS_SERVER(data->old_dir),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
}
static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
@@ -2744,9 +2952,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
update_changeattr(old_dir, &res->old_cinfo);
- nfs_post_op_update_inode(old_dir, res->old_fattr);
update_changeattr(new_dir, &res->new_cinfo);
- nfs_post_op_update_inode(new_dir, res->new_fattr);
return 1;
}
@@ -2759,7 +2965,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
.new_dir = NFS_FH(new_dir),
.old_name = old_name,
.new_name = new_name,
- .bitmask = server->attr_bitmask,
};
struct nfs_renameres res = {
.server = server,
@@ -2771,21 +2976,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
};
int status = -ENOMEM;
- res.old_fattr = nfs_alloc_fattr();
- res.new_fattr = nfs_alloc_fattr();
- if (res.old_fattr == NULL || res.new_fattr == NULL)
- goto out;
-
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
update_changeattr(old_dir, &res.old_cinfo);
- nfs_post_op_update_inode(old_dir, res.old_fattr);
update_changeattr(new_dir, &res.new_cinfo);
- nfs_post_op_update_inode(new_dir, res.new_fattr);
}
-out:
- nfs_free_fattr(res.new_fattr);
- nfs_free_fattr(res.old_fattr);
return status;
}
@@ -2823,18 +3018,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
int status = -ENOMEM;
res.fattr = nfs_alloc_fattr();
- res.dir_attr = nfs_alloc_fattr();
- if (res.fattr == NULL || res.dir_attr == NULL)
+ if (res.fattr == NULL)
goto out;
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
update_changeattr(dir, &res.cinfo);
- nfs_post_op_update_inode(dir, res.dir_attr);
nfs_post_op_update_inode(inode, res.fattr);
}
out:
- nfs_free_fattr(res.dir_attr);
nfs_free_fattr(res.fattr);
return status;
}
@@ -2857,7 +3049,6 @@ struct nfs4_createdata {
struct nfs4_create_res res;
struct nfs_fh fh;
struct nfs_fattr fattr;
- struct nfs_fattr dir_fattr;
};
static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -2881,9 +3072,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
data->res.server = server;
data->res.fh = &data->fh;
data->res.fattr = &data->fattr;
- data->res.dir_fattr = &data->dir_fattr;
nfs_fattr_init(data->res.fattr);
- nfs_fattr_init(data->res.dir_fattr);
}
return data;
}
@@ -2894,7 +3083,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
update_changeattr(dir, &data->res.dir_cinfo);
- nfs_post_op_update_inode(dir, data->res.dir_fattr);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
}
return status;
@@ -3190,12 +3378,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
void __nfs4_read_done_cb(struct nfs_read_data *data)
{
- nfs_invalidate_atime(data->inode);
+ nfs_invalidate_atime(data->header->inode);
}
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3225,27 +3413,22 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
data->timestamp = jiffies;
data->read_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
}
-/* Reset the the nfs_read_data to send the read to the MDS. */
-void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
{
- dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
- /* offsets will differ in the dense stripe case */
- data->args.offset = data->mds_offset;
- data->ds_clp = NULL;
- data->args.fh = NFS_FH(data->inode);
- data->read_done_cb = nfs4_read_done_cb;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
}
-EXPORT_SYMBOL_GPL(nfs4_reset_read);
static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3253,7 +3436,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
}
return 0;
}
@@ -3266,41 +3449,60 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
nfs4_write_done_cb(task, data);
}
-/* Reset the the nfs_write_data to send the write to the MDS. */
-void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+static
+bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
{
- dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
- data->ds_clp = NULL;
- data->write_done_cb = nfs4_write_done_cb;
- data->args.fh = NFS_FH(data->inode);
- data->args.bitmask = data->res.server->cache_consistency_bitmask;
- data->args.offset = data->mds_offset;
- data->res.fattr = &data->fattr;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+ const struct nfs_pgio_header *hdr = data->header;
+
+ /* Don't request attributes for pNFS or O_DIRECT writes */
+ if (data->ds_clp != NULL || hdr->dreq != NULL)
+ return false;
+ /* Otherwise, request attributes if and only if we don't hold
+ * a delegation
+ */
+ return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-EXPORT_SYMBOL_GPL(nfs4_reset_write);
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);
- if (data->lseg) {
+ if (!nfs4_write_need_cache_consistency_data(data)) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
} else
data->args.bitmask = server->cache_consistency_bitmask;
+
if (!data->write_done_cb)
data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+{
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
+}
+
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
+}
+
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
{
struct inode *inode = data->inode;
@@ -3308,30 +3510,25 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- nfs_refresh_inode(inode, data->res.fattr);
return 0;
}
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
- return data->write_done_cb(task, data);
+ return data->commit_done_cb(task, data);
}
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);
- if (data->lseg) {
- data->args.bitmask = NULL;
- data->res.fattr = NULL;
- } else
- data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_commit_done_cb;
+ if (data->commit_done_cb == NULL)
+ data->commit_done_cb = nfs4_commit_done_cb;
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
struct nfs4_renewdata {
@@ -3430,19 +3627,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
*/
#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
-static void buf_to_pages(const void *buf, size_t buflen,
- struct page **pages, unsigned int *pgbase)
-{
- const void *p = buf;
-
- *pgbase = offset_in_page(buf);
- p -= *pgbase;
- while (p < buf + buflen) {
- *(pages++) = virt_to_page(p);
- p += PAGE_CACHE_SIZE;
- }
-}
-
static int buf_to_pages_noslab(const void *buf, size_t buflen,
struct page **pages, unsigned int *pgbase)
{
@@ -3518,16 +3702,16 @@ out:
return ret;
}
-static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len)
+static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
{
struct nfs4_cached_acl *acl;
- if (buf && acl_len <= PAGE_SIZE) {
+ if (pages && acl_len <= PAGE_SIZE) {
acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
if (acl == NULL)
goto out;
acl->cached = 1;
- memcpy(acl->data, buf, acl_len);
+ _copy_from_pages(acl->data, pages, pgbase, acl_len);
} else {
acl = kmalloc(sizeof(*acl), GFP_KERNEL);
if (acl == NULL)
@@ -3539,9 +3723,19 @@ out:
nfs4_set_cached_acl(inode, acl);
}
+/*
+ * The getxattr API returns the required buffer length when called with a
+ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
+ * the required buf. On a NULL buf, we send a page of data to the server
+ * guessing that the ACL request can be serviced by a page. If so, we cache
+ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
+ * the cache. If not so, we throw away the page, and cache the required
+ * length. The next getxattr call will then produce another round trip to
+ * the server, this time with the input buf of the required size.
+ */
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES];
+ struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
.acl_pages = pages,
@@ -3550,47 +3744,68 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
struct nfs_getaclres res = {
.acl_len = buflen,
};
- void *resp_buf;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
.rpc_argp = &args,
.rpc_resp = &res,
};
- struct page *localpage = NULL;
- int ret;
+ int ret = -ENOMEM, npages, i, acl_len = 0;
- if (buflen < PAGE_SIZE) {
- /* As long as we're doing a round trip to the server anyway,
- * let's be prepared for a page of acl data. */
- localpage = alloc_page(GFP_KERNEL);
- resp_buf = page_address(localpage);
- if (localpage == NULL)
- return -ENOMEM;
- args.acl_pages[0] = localpage;
- args.acl_pgbase = 0;
- args.acl_len = PAGE_SIZE;
- } else {
- resp_buf = buf;
- buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+ npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ /* As long as we're doing a round trip to the server anyway,
+ * let's be prepared for a page of acl data. */
+ if (npages == 0)
+ npages = 1;
+
+ /* Add an extra page to handle the bitmap returned */
+ npages++;
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ goto out_free;
}
- ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
+
+ /* for decoding across pages */
+ res.acl_scratch = alloc_page(GFP_KERNEL);
+ if (!res.acl_scratch)
+ goto out_free;
+
+ args.acl_len = npages * PAGE_SIZE;
+ args.acl_pgbase = 0;
+
+ /* Let decode_getfacl know not to fail if the ACL data is larger than
+ * the page we send as a guess */
+ if (buf == NULL)
+ res.acl_flags |= NFS4_ACL_LEN_REQUEST;
+
+ dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
+ __func__, buf, buflen, npages, args.acl_len);
+ ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+ &msg, &args.seq_args, &res.seq_res, 0);
if (ret)
goto out_free;
- if (res.acl_len > args.acl_len)
- nfs4_write_cached_acl(inode, NULL, res.acl_len);
+
+ acl_len = res.acl_len - res.acl_data_offset;
+ if (acl_len > args.acl_len)
+ nfs4_write_cached_acl(inode, NULL, 0, acl_len);
else
- nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
+ nfs4_write_cached_acl(inode, pages, res.acl_data_offset,
+ acl_len);
if (buf) {
ret = -ERANGE;
- if (res.acl_len > buflen)
+ if (acl_len > buflen)
goto out_free;
- if (localpage)
- memcpy(buf, resp_buf, res.acl_len);
+ _copy_from_pages(buf, pages, res.acl_data_offset,
+ acl_len);
}
- ret = res.acl_len;
+ ret = acl_len;
out_free:
- if (localpage)
- __free_page(localpage);
+ for (i = 0; i < npages; i++)
+ if (pages[i])
+ __free_page(pages[i]);
+ if (res.acl_scratch)
+ __free_page(res.acl_scratch);
return ret;
}
@@ -3621,6 +3836,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
nfs_zap_acl_cache(inode);
ret = nfs4_read_cached_acl(inode, buf, buflen);
if (ret != -ENOENT)
+ /* -ENOENT is returned if there is no ACL or if there is an ACL
+ * but no cached acl data, just the acl length */
return ret;
return nfs4_get_acl_uncached(inode, buf, buflen);
}
@@ -3689,8 +3906,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
if (task->tk_status >= 0)
return 0;
switch(task->tk_status) {
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ if (state == NULL)
+ break;
+ nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
@@ -3713,7 +3934,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_session_recovery(clp->cl_session);
+ nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3739,6 +3960,24 @@ wait_on_recovery:
return -EAGAIN;
}
+static void nfs4_init_boot_verifier(const struct nfs_client *clp,
+ nfs4_verifier *bootverf)
+{
+ __be32 verf[2];
+
+ if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+ /* An impossible timestamp guarantees this value
+ * will never match a generated boot time. */
+ verf[0] = 0;
+ verf[1] = (__be32)(NSEC_PER_SEC + 1);
+ } else {
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+ verf[0] = (__be32)nn->boot_time.tv_sec;
+ verf[1] = (__be32)nn->boot_time.tv_nsec;
+ }
+ memcpy(bootverf->data, verf, sizeof(bootverf->data));
+}
+
int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
unsigned short port, struct rpc_cred *cred,
struct nfs4_setclientid_res *res)
@@ -3755,15 +3994,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.rpc_resp = res,
.rpc_cred = cred,
};
- __be32 *p;
int loop = 0;
int status;
- p = (__be32*)sc_verifier.data;
- *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
- *p = htonl((u32)clp->cl_boot_time.tv_nsec);
+ nfs4_init_boot_verifier(clp, &sc_verifier);
for(;;) {
+ rcu_read_lock();
setclientid.sc_name_len = scnprintf(setclientid.sc_name,
sizeof(setclientid.sc_name), "%s/%s %s %s %u",
clp->cl_ipaddr,
@@ -3780,6 +4017,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
+ rcu_read_unlock();
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (status != -NFS4ERR_CLID_INUSE)
@@ -3866,7 +4104,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
if (nfs4_setup_sequence(d_data->res.server,
&d_data->args.seq_args,
- &d_data->res.seq_res, 1, task))
+ &d_data->res.seq_res, task))
return;
rpc_call_start(task);
}
@@ -3900,11 +4138,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data = kzalloc(sizeof(*data), GFP_NOFS);
if (data == NULL)
return -ENOMEM;
+ nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->attr_bitmask;
+ data->args.bitmask = server->cache_consistency_bitmask;
nfs_copy_fh(&data->fh, NFS_FH(inode));
- memcpy(&data->stateid, stateid, sizeof(data->stateid));
+ nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
data->res.server = server;
nfs_fattr_init(data->res.fattr);
@@ -3923,9 +4162,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
if (status != 0)
goto out;
status = data->rpc_status;
- if (status != 0)
- goto out;
- nfs_refresh_inode(inode, &data->fattr);
+ if (status == 0)
+ nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ else
+ nfs_refresh_inode(inode, &data->fattr);
out:
rpc_put_task(task);
return status;
@@ -3958,7 +4198,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- schedule_timeout_killable(timeout);
+ freezable_schedule_timeout_killable(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
@@ -3991,7 +4231,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
if (status != 0)
goto out;
lsp = request->fl_u.nfs4_fl.owner;
- arg.lock_owner.id = lsp->ls_id.id;
+ arg.lock_owner.id = lsp->ls_seqid.owner_id;
arg.lock_owner.s_dev = server->s_dev;
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
switch (status) {
@@ -4087,9 +4327,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
return;
switch (task->tk_status) {
case 0:
- memcpy(calldata->lsp->ls_stateid.data,
- calldata->res.stateid.data,
- sizeof(calldata->lsp->ls_stateid.data));
+ nfs4_stateid_copy(&calldata->lsp->ls_stateid,
+ &calldata->res.stateid);
renew_lease(calldata->server, calldata->timestamp);
break;
case -NFS4ERR_BAD_STATEID:
@@ -4117,7 +4356,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
calldata->timestamp = jiffies;
if (nfs4_setup_sequence(calldata->server,
&calldata->arg.seq_args,
- &calldata->res.seq_res, 1, task))
+ &calldata->res.seq_res, task))
return;
rpc_call_start(task);
}
@@ -4157,6 +4396,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return ERR_PTR(-ENOMEM);
}
+ nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
@@ -4236,7 +4476,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
goto out_free_seqid;
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
- p->arg.lock_owner.id = lsp->ls_id.id;
+ p->arg.lock_owner.id = lsp->ls_seqid.owner_id;
p->arg.lock_owner.s_dev = server->s_dev;
p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
@@ -4272,7 +4512,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->server,
&data->arg.seq_args,
- &data->res.seq_res, 1, task))
+ &data->res.seq_res, task))
return;
rpc_call_start(task);
dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
@@ -4301,8 +4541,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
goto out;
}
if (data->rpc_status == 0) {
- memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
- sizeof(data->lsp->ls_stateid.data));
+ nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
}
@@ -4390,6 +4629,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.reclaim = NFS_LOCK_RECLAIM;
task_setup_data.callback_ops = &nfs4_recover_lock_ops;
}
+ nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
@@ -4412,7 +4652,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
do {
@@ -4430,7 +4672,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
err = nfs4_set_lock_state(state, request);
@@ -4454,15 +4698,34 @@ out:
}
#if defined(CONFIG_NFS_V4_1)
-static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+static int nfs41_check_expired_locks(struct nfs4_state *state)
{
- int status;
+ int status, ret = NFS_OK;
+ struct nfs4_lock_state *lsp;
struct nfs_server *server = NFS_SERVER(state->inode);
- status = nfs41_test_stateid(server, state);
+ list_for_each_entry(lsp, &state->lock_states, ls_locks) {
+ if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+ status = nfs41_test_stateid(server, &lsp->ls_stateid);
+ if (status != NFS_OK) {
+ nfs41_free_stateid(server, &lsp->ls_stateid);
+ lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
+ ret = status;
+ }
+ }
+ };
+
+ return ret;
+}
+
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+ int status = NFS_OK;
+
+ if (test_bit(LK_STATE_IN_USE, &state->flags))
+ status = nfs41_check_expired_locks(state);
if (status == NFS_OK)
- return 0;
- nfs41_free_stateid(server, state);
+ return status;
return nfs4_lock_expired(state, request);
}
#endif
@@ -4498,7 +4761,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
/* Note: we always want to sleep here! */
request->fl_flags = fl_flags | FL_SLEEP;
if (do_vfs_lock(request->fl_file, request) < 0)
- printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
+ printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock "
+ "manager!\n", __func__);
out_unlock:
up_read(&nfsi->rwsem);
out:
@@ -4508,7 +4772,10 @@ out:
static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .state = state,
+ .inode = state->inode,
+ };
int err;
do {
@@ -4553,6 +4820,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
+ /*
+ * Don't rely on the VFS having checked the file open mode,
+ * since it won't do this for flock() locks.
+ */
+ switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
do {
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4578,8 +4859,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
switch (err) {
default:
- printk(KERN_ERR "%s: unhandled error %d.\n",
- __func__, err);
+ printk(KERN_ERR "NFS: %s: unhandled error "
+ "%d.\n", __func__, err);
case 0:
case -ESTALE:
goto out;
@@ -4594,13 +4875,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
goto out;
case -ERESTARTSYS:
/*
* The show must go on: exit, but mark the
* stateid as needing recovery.
*/
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
@@ -4630,33 +4912,44 @@ out:
return err;
}
+struct nfs_release_lockowner_data {
+ struct nfs4_lock_state *lsp;
+ struct nfs_server *server;
+ struct nfs_release_lockowner_args args;
+};
+
static void nfs4_release_lockowner_release(void *calldata)
{
+ struct nfs_release_lockowner_data *data = calldata;
+ nfs4_free_lock_state(data->server, data->lsp);
kfree(calldata);
}
-const struct rpc_call_ops nfs4_release_lockowner_ops = {
+static const struct rpc_call_ops nfs4_release_lockowner_ops = {
.rpc_release = nfs4_release_lockowner_release,
};
-void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
+int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
{
struct nfs_server *server = lsp->ls_state->owner->so_server;
- struct nfs_release_lockowner_args *args;
+ struct nfs_release_lockowner_data *data;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
};
if (server->nfs_client->cl_mvops->minor_version != 0)
- return;
- args = kmalloc(sizeof(*args), GFP_NOFS);
- if (!args)
- return;
- args->lock_owner.clientid = server->nfs_client->cl_clientid;
- args->lock_owner.id = lsp->ls_id.id;
- args->lock_owner.s_dev = server->s_dev;
- msg.rpc_argp = args;
- rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
+ return -EINVAL;
+ data = kmalloc(sizeof(*data), GFP_NOFS);
+ if (!data)
+ return -ENOMEM;
+ data->lsp = lsp;
+ data->server = server;
+ data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
+ data->args.lock_owner.id = lsp->ls_seqid.owner_id;
+ data->args.lock_owner.s_dev = server->s_dev;
+ msg.rpc_argp = &data->args;
+ rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
+ return 0;
}
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -4702,17 +4995,19 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
(fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
(fattr->valid & NFS_ATTR_FATTR_FSID) &&
- (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+ (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)))
return;
fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
- NFS_ATTR_FATTR_NLINK;
+ NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL;
fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
fattr->nlink = 2;
}
-int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
- struct nfs4_fs_locations *fs_locations, struct page *page)
+static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name,
+ struct nfs4_fs_locations *fs_locations,
+ struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
u32 bitmask[2] = {
@@ -4746,11 +5041,26 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
fs_locations->nlocations = 0;
- status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0);
dprintk("%s: returned status = %d\n", __func__, status);
return status;
}
+int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name,
+ struct nfs4_fs_locations *fs_locations,
+ struct page *page)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_fs_locations(client, dir, name, fs_locations, page),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
{
int status;
@@ -4773,7 +5083,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
return status;
}
-int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
+ struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = { };
int err;
@@ -4806,7 +5117,8 @@ out_inval:
}
static bool
-nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+nfs41_same_server_scope(struct nfs41_server_scope *a,
+ struct nfs41_server_scope *b)
{
if (a->server_scope_sz == b->server_scope_sz &&
memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
@@ -4816,6 +5128,61 @@ nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
}
/*
+ * nfs4_proc_bind_conn_to_session()
+ *
+ * The 4.1 client currently uses the same TCP connection for the
+ * fore and backchannel.
+ */
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+{
+ int status;
+ struct nfs41_bind_conn_to_session_res res;
+ struct rpc_message msg = {
+ .rpc_proc =
+ &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION],
+ .rpc_argp = clp,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+
+ dprintk("--> %s\n", __func__);
+ BUG_ON(clp == NULL);
+
+ res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+ if (unlikely(res.session == NULL)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ if (status == 0) {
+ if (memcmp(res.session->sess_id.data,
+ clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
+ dprintk("NFS: %s: Session ID mismatch\n", __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ if (res.dir != NFS4_CDFS4_BOTH) {
+ dprintk("NFS: %s: Unexpected direction from server\n",
+ __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ if (res.use_conn_in_rdma_mode) {
+ dprintk("NFS: %s: Server returned RDMA mode = true\n",
+ __func__);
+ status = -EIO;
+ goto out_session;
+ }
+ }
+out_session:
+ kfree(res.session);
+out:
+ dprintk("<-- %s status= %d\n", __func__, status);
+ return status;
+}
+
+/*
* nfs4_proc_exchange_id()
*
* Since the clientid has expired, all compounds using sessions
@@ -4827,11 +5194,12 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
{
nfs4_verifier verifier;
struct nfs41_exchange_id_args args = {
+ .verifier = &verifier,
.client = clp,
.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
};
struct nfs41_exchange_id_res res = {
- .client = clp,
+ 0
};
int status;
struct rpc_message msg = {
@@ -4840,52 +5208,147 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
.rpc_resp = &res,
.rpc_cred = cred,
};
- __be32 *p;
dprintk("--> %s\n", __func__);
BUG_ON(clp == NULL);
- p = (u32 *)verifier.data;
- *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
- *p = htonl((u32)clp->cl_boot_time.tv_nsec);
- args.verifier = &verifier;
+ nfs4_init_boot_verifier(clp, &verifier);
args.id_len = scnprintf(args.id, sizeof(args.id),
- "%s/%s.%s/%u",
+ "%s/%s/%u",
clp->cl_ipaddr,
- init_utsname()->nodename,
- init_utsname()->domainname,
+ clp->cl_rpcclient->cl_nodename,
clp->cl_rpcclient->cl_auth->au_flavor);
- res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
- if (unlikely(!res.server_scope))
- return -ENOMEM;
+ res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
+ GFP_NOFS);
+ if (unlikely(res.server_owner == NULL)) {
+ status = -ENOMEM;
+ goto out;
+ }
+
+ res.server_scope = kzalloc(sizeof(struct nfs41_server_scope),
+ GFP_NOFS);
+ if (unlikely(res.server_scope == NULL)) {
+ status = -ENOMEM;
+ goto out_server_owner;
+ }
+
+ res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS);
+ if (unlikely(res.impl_id == NULL)) {
+ status = -ENOMEM;
+ goto out_server_scope;
+ }
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
- if (!status)
- status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+ if (status == 0)
+ status = nfs4_check_cl_exchange_flags(res.flags);
- if (!status) {
- if (clp->server_scope &&
- !nfs41_same_server_scope(clp->server_scope,
+ if (status == 0) {
+ clp->cl_clientid = res.clientid;
+ clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
+ if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+ clp->cl_seqid = res.seqid;
+
+ kfree(clp->cl_serverowner);
+ clp->cl_serverowner = res.server_owner;
+ res.server_owner = NULL;
+
+ /* use the most recent implementation id */
+ kfree(clp->cl_implid);
+ clp->cl_implid = res.impl_id;
+
+ if (clp->cl_serverscope != NULL &&
+ !nfs41_same_server_scope(clp->cl_serverscope,
res.server_scope)) {
dprintk("%s: server_scope mismatch detected\n",
__func__);
set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
- kfree(clp->server_scope);
- clp->server_scope = NULL;
+ kfree(clp->cl_serverscope);
+ clp->cl_serverscope = NULL;
}
- if (!clp->server_scope)
- clp->server_scope = res.server_scope;
- else
- kfree(res.server_scope);
- }
+ if (clp->cl_serverscope == NULL) {
+ clp->cl_serverscope = res.server_scope;
+ goto out;
+ }
+ } else
+ kfree(res.impl_id);
+out_server_owner:
+ kfree(res.server_owner);
+out_server_scope:
+ kfree(res.server_scope);
+out:
+ if (clp->cl_implid != NULL)
+ dprintk("%s: Server Implementation ID: "
+ "domain: %s, name: %s, date: %llu,%u\n",
+ __func__, clp->cl_implid->domain, clp->cl_implid->name,
+ clp->cl_implid->date.seconds,
+ clp->cl_implid->date.nseconds);
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
+static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
+ struct rpc_cred *cred)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID],
+ .rpc_argp = clp,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ if (status)
+ dprintk("NFS: Got error %d from the server %s on "
+ "DESTROY_CLIENTID.", status, clp->cl_hostname);
+ return status;
+}
+
+static int nfs4_proc_destroy_clientid(struct nfs_client *clp,
+ struct rpc_cred *cred)
+{
+ unsigned int loop;
+ int ret;
+
+ for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
+ ret = _nfs4_proc_destroy_clientid(clp, cred);
+ switch (ret) {
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_CLIENTID_BUSY:
+ ssleep(1);
+ break;
+ default:
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int nfs4_destroy_clientid(struct nfs_client *clp)
+{
+ struct rpc_cred *cred;
+ int ret = 0;
+
+ if (clp->cl_mvops->minor_version < 1)
+ goto out;
+ if (clp->cl_exchange_flags == 0)
+ goto out;
+ cred = nfs4_get_exchange_id_cred(clp);
+ ret = nfs4_proc_destroy_clientid(clp, cred);
+ if (cred)
+ put_rpccred(cred);
+ switch (ret) {
+ case 0:
+ case -NFS4ERR_STALE_CLIENTID:
+ clp->cl_exchange_flags = 0;
+ }
+out:
+ return ret;
+}
+
struct nfs4_get_lease_time_data {
struct nfs4_get_lease_time_args *args;
struct nfs4_get_lease_time_res *res;
@@ -4905,7 +5368,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
since we're invoked within one */
ret = nfs41_setup_sequence(data->clp->cl_session,
&data->args->la_seq_args,
- &data->res->lr_seq_res, 0, task);
+ &data->res->lr_seq_res, task);
BUG_ON(ret == -EAGAIN);
rpc_call_start(task);
@@ -4938,7 +5401,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
dprintk("<-- %s\n", __func__);
}
-struct rpc_call_ops nfs4_get_lease_time_ops = {
+static const struct rpc_call_ops nfs4_get_lease_time_ops = {
.rpc_call_prepare = nfs4_get_lease_time_prepare,
.rpc_call_done = nfs4_get_lease_time_done,
};
@@ -4969,6 +5432,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
};
int status;
+ nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
dprintk("--> %s\n", __func__);
task = rpc_run_task(&task_setup);
@@ -4983,37 +5447,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
return status;
}
+static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+{
+ return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+}
+
+static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
+ struct nfs4_slot *new,
+ u32 max_slots,
+ u32 ivalue)
+{
+ struct nfs4_slot *old = NULL;
+ u32 i;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ if (new) {
+ old = tbl->slots;
+ tbl->slots = new;
+ tbl->max_slots = max_slots;
+ }
+ tbl->highest_used_slotid = -1; /* no slot is currently used */
+ for (i = 0; i < tbl->max_slots; i++)
+ tbl->slots[i].seq_nr = ivalue;
+ spin_unlock(&tbl->slot_tbl_lock);
+ kfree(old);
+}
+
/*
- * Reset a slot table
+ * (re)Initialise a slot table
*/
-static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
- int ivalue)
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
+ u32 ivalue)
{
struct nfs4_slot *new = NULL;
- int i;
- int ret = 0;
+ int ret = -ENOMEM;
dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
max_reqs, tbl->max_slots);
/* Does the newly negotiated max_reqs match the existing slot table? */
if (max_reqs != tbl->max_slots) {
- ret = -ENOMEM;
- new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
- GFP_NOFS);
+ new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
if (!new)
goto out;
- ret = 0;
- kfree(tbl->slots);
}
- spin_lock(&tbl->slot_tbl_lock);
- if (new) {
- tbl->slots = new;
- tbl->max_slots = max_reqs;
- }
- for (i = 0; i < tbl->max_slots; ++i)
- tbl->slots[i].seq_nr = ivalue;
- spin_unlock(&tbl->slot_tbl_lock);
+ ret = 0;
+
+ nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
tbl, tbl->slots, tbl->max_slots);
out:
@@ -5021,23 +5501,6 @@ out:
return ret;
}
-/*
- * Reset the forechannel and backchannel slot tables
- */
-static int nfs4_reset_slot_tables(struct nfs4_session *session)
-{
- int status;
-
- status = nfs4_reset_slot_table(&session->fc_slot_table,
- session->fc_attrs.max_reqs, 1);
- if (status)
- return status;
-
- status = nfs4_reset_slot_table(&session->bc_slot_table,
- session->bc_attrs.max_reqs, 0);
- return status;
-}
-
/* Destroy the slot table */
static void nfs4_destroy_slot_tables(struct nfs4_session *session)
{
@@ -5053,59 +5516,26 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
}
/*
- * Initialize slot table
- */
-static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
- int max_slots, int ivalue)
-{
- struct nfs4_slot *slot;
- int ret = -ENOMEM;
-
- BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
-
- dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
-
- slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
- if (!slot)
- goto out;
- ret = 0;
-
- spin_lock(&tbl->slot_tbl_lock);
- tbl->max_slots = max_slots;
- tbl->slots = slot;
- tbl->highest_used_slotid = -1; /* no slot is currently used */
- spin_unlock(&tbl->slot_tbl_lock);
- dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
- tbl, tbl->slots, tbl->max_slots);
-out:
- dprintk("<-- %s: return %d\n", __func__, ret);
- return ret;
-}
-
-/*
- * Initialize the forechannel and backchannel tables
+ * Initialize or reset the forechannel and backchannel tables
*/
-static int nfs4_init_slot_tables(struct nfs4_session *session)
+static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
{
struct nfs4_slot_table *tbl;
- int status = 0;
-
- tbl = &session->fc_slot_table;
- if (tbl->slots == NULL) {
- status = nfs4_init_slot_table(tbl,
- session->fc_attrs.max_reqs, 1);
- if (status)
- return status;
- }
-
- tbl = &session->bc_slot_table;
- if (tbl->slots == NULL) {
- status = nfs4_init_slot_table(tbl,
- session->bc_attrs.max_reqs, 0);
- if (status)
- nfs4_destroy_slot_tables(session);
- }
+ int status;
+ dprintk("--> %s\n", __func__);
+ /* Fore channel */
+ tbl = &ses->fc_slot_table;
+ status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
+ if (status) /* -ENOMEM */
+ return status;
+ /* Back channel */
+ tbl = &ses->bc_slot_table;
+ status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+ if (status && tbl->slots == NULL)
+ /* Fore and back channel share a connection so get
+ * both slot tables or neither */
+ nfs4_destroy_slot_tables(ses);
return status;
}
@@ -5119,13 +5549,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
return NULL;
tbl = &session->fc_slot_table;
- tbl->highest_used_slotid = -1;
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
init_completion(&tbl->complete);
tbl = &session->bc_slot_table;
- tbl->highest_used_slotid = -1;
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
spin_lock_init(&tbl->slot_tbl_lock);
rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
init_completion(&tbl->complete);
@@ -5138,11 +5568,20 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
void nfs4_destroy_session(struct nfs4_session *session)
{
- nfs4_proc_destroy_session(session);
+ struct rpc_xprt *xprt;
+ struct rpc_cred *cred;
+
+ cred = nfs4_get_exchange_id_cred(session->clp);
+ nfs4_proc_destroy_session(session, cred);
+ if (cred)
+ put_rpccred(cred);
+
+ rcu_read_lock();
+ xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt);
+ rcu_read_unlock();
dprintk("%s Destroy backchannel for xprt %p\n",
- __func__, session->clp->cl_rpcclient->cl_xprt);
- xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt,
- NFS41_BC_MIN_CALLBACKS);
+ __func__, xprt);
+ xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
nfs4_destroy_slot_tables(session);
kfree(session);
}
@@ -5170,7 +5609,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
args->fc_attrs.max_rqst_sz = mxrqst_sz;
args->fc_attrs.max_resp_sz = mxresp_sz;
args->fc_attrs.max_ops = NFS4_MAX_OPS;
- args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
+ args->fc_attrs.max_reqs = max_session_slots;
dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
"max_ops=%u max_reqs=%u\n",
@@ -5210,6 +5649,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
return -EINVAL;
if (rcvd->max_reqs == 0)
return -EINVAL;
+ if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE)
+ rcvd->max_reqs = NFS4_MAX_SLOT_TABLE;
return 0;
}
@@ -5225,9 +5666,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
return -EINVAL;
/* These would render the backchannel useless: */
- if (rcvd->max_ops == 0)
+ if (rcvd->max_ops != sent->max_ops)
return -EINVAL;
- if (rcvd->max_reqs == 0)
+ if (rcvd->max_reqs != sent->max_reqs)
return -EINVAL;
return 0;
}
@@ -5243,7 +5684,8 @@ static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
return nfs4_verify_back_channel_attrs(args, session);
}
-static int _nfs4_proc_create_session(struct nfs_client *clp)
+static int _nfs4_proc_create_session(struct nfs_client *clp,
+ struct rpc_cred *cred)
{
struct nfs4_session *session = clp->cl_session;
struct nfs41_create_session_args args = {
@@ -5257,6 +5699,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
.rpc_argp = &args,
.rpc_resp = &res,
+ .rpc_cred = cred,
};
int status;
@@ -5281,7 +5724,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
* It is the responsibility of the caller to verify the session is
* expired before calling this routine.
*/
-int nfs4_proc_create_session(struct nfs_client *clp)
+int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
unsigned *ptr;
@@ -5289,17 +5732,13 @@ int nfs4_proc_create_session(struct nfs_client *clp)
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ status = _nfs4_proc_create_session(clp, cred);
if (status)
goto out;
- /* Init and reset the fore channel */
- status = nfs4_init_slot_tables(session);
- dprintk("slot table initialization returned %d\n", status);
- if (status)
- goto out;
- status = nfs4_reset_slot_tables(session);
- dprintk("slot table reset returned %d\n", status);
+ /* Init or reset the session slot tables */
+ status = nfs4_setup_session_slot_tables(session);
+ dprintk("slot table setup returned %d\n", status);
if (status)
goto out;
@@ -5315,10 +5754,15 @@ out:
* Issue the over-the-wire RPC DESTROY_SESSION.
* The caller must serialize access to this routine.
*/
-int nfs4_proc_destroy_session(struct nfs4_session *session)
+int nfs4_proc_destroy_session(struct nfs4_session *session,
+ struct rpc_cred *cred)
{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION],
+ .rpc_argp = session,
+ .rpc_cred = cred,
+ };
int status = 0;
- struct rpc_message msg;
dprintk("--> nfs4_proc_destroy_session\n");
@@ -5326,68 +5770,89 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
if (session->clp->cl_cons_state != NFS_CS_READY)
return status;
- msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION];
- msg.rpc_argp = session;
- msg.rpc_resp = NULL;
- msg.rpc_cred = NULL;
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (status)
- printk(KERN_WARNING
- "Got error %d from the server on DESTROY_SESSION. "
+ dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
"Session has been destroyed regardless...\n", status);
dprintk("<-- nfs4_proc_destroy_session\n");
return status;
}
+/*
+ * With sessions, the client is not marked ready until after a
+ * successful EXCHANGE_ID and CREATE_SESSION.
+ *
+ * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate
+ * other versions of NFS can be tried.
+ */
+static int nfs41_check_session_ready(struct nfs_client *clp)
+{
+ int ret;
+
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING) {
+ ret = nfs4_client_recover_expired_lease(clp);
+ if (ret)
+ return ret;
+ }
+ if (clp->cl_cons_state < NFS_CS_READY)
+ return -EPROTONOSUPPORT;
+ smp_rmb();
+ return 0;
+}
+
int nfs4_init_session(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_session *session;
unsigned int rsize, wsize;
- int ret;
if (!nfs4_has_session(clp))
return 0;
session = clp->cl_session;
- if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
- return 0;
+ spin_lock(&clp->cl_lock);
+ if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
- rsize = server->rsize;
- if (rsize == 0)
- rsize = NFS_MAX_FILE_IO_SIZE;
- wsize = server->wsize;
- if (wsize == 0)
- wsize = NFS_MAX_FILE_IO_SIZE;
+ rsize = server->rsize;
+ if (rsize == 0)
+ rsize = NFS_MAX_FILE_IO_SIZE;
+ wsize = server->wsize;
+ if (wsize == 0)
+ wsize = NFS_MAX_FILE_IO_SIZE;
- session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
- session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ }
+ spin_unlock(&clp->cl_lock);
- ret = nfs4_recover_expired_lease(server);
- if (!ret)
- ret = nfs4_check_client_ready(clp);
- return ret;
+ return nfs41_check_session_ready(clp);
}
-int nfs4_init_ds_session(struct nfs_client *clp)
+int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
{
struct nfs4_session *session = clp->cl_session;
int ret;
- if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
- return 0;
-
- ret = nfs4_client_recover_expired_lease(clp);
- if (!ret)
- /* Test for the DS role */
- if (!is_ds_client(clp))
- ret = -ENODEV;
- if (!ret)
- ret = nfs4_check_client_ready(clp);
- return ret;
+ spin_lock(&clp->cl_lock);
+ if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
+ /*
+ * Do not set NFS_CS_CHECK_LEASE_TIME instead set the
+ * DS lease to be equal to the MDS lease.
+ */
+ clp->cl_lease_time = lease_time;
+ clp->cl_last_renewal = jiffies;
+ }
+ spin_unlock(&clp->cl_lock);
+ ret = nfs41_check_session_ready(clp);
+ if (ret)
+ return ret;
+ /* Test for the DS role */
+ if (!is_ds_client(clp))
+ return -ENODEV;
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
@@ -5457,7 +5922,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
args = task->tk_msg.rpc_argp;
res = task->tk_msg.rpc_resp;
- if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ if (nfs41_setup_sequence(clp->cl_session, args, res, task))
return;
rpc_call_start(task);
}
@@ -5489,6 +5954,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
nfs_put_client(clp);
return ERR_PTR(-ENOMEM);
}
+ nfs41_init_sequence(&calldata->args, &calldata->res, 0);
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
calldata->clp = clp;
@@ -5550,7 +6016,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
if (nfs41_setup_sequence(calldata->clp->cl_session,
&calldata->arg.seq_args,
- &calldata->res.seq_res, 0, task))
+ &calldata->res.seq_res, task))
return;
rpc_call_start(task);
@@ -5629,6 +6095,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
calldata->clp = clp;
calldata->arg.one_fs = 0;
+ nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
msg.rpc_argp = &calldata->arg;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
@@ -5660,7 +6127,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
* to be no way to prevent it completely.
*/
if (nfs4_setup_sequence(server, &lgp->args.seq_args,
- &lgp->res.seq_res, 0, task))
+ &lgp->res.seq_res, task))
return;
if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
NFS_I(lgp->args.inode)->layout,
@@ -5735,6 +6202,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
+ nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -5755,7 +6223,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
dprintk("--> %s\n", __func__);
if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
- &lrp->res.seq_res, 0, task))
+ &lrp->res.seq_res, task))
return;
rpc_call_start(task);
}
@@ -5821,6 +6289,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
int status;
dprintk("--> %s\n", __func__);
+ nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -5921,7 +6390,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(data->args.inode);
if (nfs4_setup_sequence(server, &data->args.seq_args,
- &data->res.seq_res, 1, task))
+ &data->res.seq_res, task))
return;
rpc_call_start(task);
}
@@ -5936,21 +6405,22 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
return;
switch (task->tk_status) { /* Just ignore these failures */
- case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
- case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
- case NFS4ERR_BADLAYOUT: /* no layout */
- case NFS4ERR_GRACE: /* loca_recalim always false */
+ case -NFS4ERR_DELEG_REVOKED: /* layout was recalled */
+ case -NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
+ case -NFS4ERR_BADLAYOUT: /* no layout */
+ case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
- }
-
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
- rpc_restart_call_prepare(task);
- return;
- }
-
- if (task->tk_status == 0)
+ break;
+ case 0:
nfs_post_op_update_inode_force_wcc(data->args.inode,
data->res.fattr);
+ break;
+ default:
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ rpc_restart_call_prepare(task);
+ return;
+ }
+ }
}
static void nfs4_layoutcommit_release(void *calldata)
@@ -6008,6 +6478,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
data->args.lastbytewritten,
data->args.inode->i_ino);
+ nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -6053,11 +6524,12 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
case 0:
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_NOTSUPP:
- break;
+ goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
}
} while (exception.retry);
+out:
return err;
}
@@ -6101,11 +6573,12 @@ out_freepage:
out:
return err;
}
-static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+
+static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
int status;
struct nfs41_test_stateid_args args = {
- .stateid = &state->stateid,
+ .stateid = stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@@ -6113,28 +6586,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta
.rpc_argp = &args,
.rpc_resp = &res,
};
- args.seq_args.sa_session = res.seq_res.sr_session = NULL;
- status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+
+ nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+
+ if (status == NFS_OK)
+ return res.status;
return status;
}
-static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs41_test_stateid(server, state),
+ _nfs41_test_stateid(server, stateid),
&exception);
} while (exception.retry);
return err;
}
-static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
- int status;
struct nfs41_free_stateid_args args = {
- .stateid = &state->stateid,
+ .stateid = stateid,
};
struct nfs41_free_stateid_res res;
struct rpc_message msg = {
@@ -6143,25 +6619,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat
.rpc_resp = &res,
};
- args.seq_args.sa_session = res.seq_res.sr_session = NULL;
- status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
- return status;
+ nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+ return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
}
-static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_free_stateid(server, state),
+ _nfs4_free_stateid(server, stateid),
&exception);
} while (exception.retry);
return err;
}
+
+static bool nfs41_match_stateid(const nfs4_stateid *s1,
+ const nfs4_stateid *s2)
+{
+ if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0)
+ return false;
+
+ if (s1->seqid == s2->seqid)
+ return true;
+ if (s1->seqid == 0 || s2->seqid == 0)
+ return true;
+
+ return false;
+}
+
#endif /* CONFIG_NFS_V4_1 */
-struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+static bool nfs4_match_stateid(const nfs4_stateid *s1,
+ const nfs4_stateid *s2)
+{
+ return nfs4_stateid_match(s1, s2);
+}
+
+
+static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
.state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
.recover_open = nfs4_open_reclaim,
@@ -6171,7 +6668,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
};
#if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
.state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
.recover_open = nfs4_open_reclaim,
@@ -6182,7 +6679,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
};
#endif /* CONFIG_NFS_V4_1 */
-struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
.recover_open = nfs4_open_expired,
@@ -6192,7 +6689,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
};
#if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
+static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
.recover_open = nfs41_open_expired,
@@ -6202,14 +6699,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
};
#endif /* CONFIG_NFS_V4_1 */
-struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
+static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = {
.sched_state_renewal = nfs4_proc_async_renew,
.get_state_renewal_cred_locked = nfs4_get_renew_cred_locked,
.renew_lease = nfs4_proc_renew,
};
#if defined(CONFIG_NFS_V4_1)
-struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
+static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
.sched_state_renewal = nfs41_proc_async_sequence,
.get_state_renewal_cred_locked = nfs4_get_machine_cred_locked,
.renew_lease = nfs4_proc_sequence,
@@ -6219,7 +6716,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
.call_sync = _nfs4_call_sync,
- .validate_stateid = nfs4_validate_delegation_stateid,
+ .match_stateid = nfs4_match_stateid,
.find_root_sec = nfs4_find_root_sec,
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
@@ -6230,7 +6727,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.minor_version = 1,
.call_sync = _nfs4_call_sync_session,
- .validate_stateid = nfs41_validate_delegation_stateid,
+ .match_stateid = nfs41_match_stateid,
.find_root_sec = nfs41_find_root_sec,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
@@ -6262,6 +6759,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.file_inode_ops = &nfs4_file_inode_operations,
.file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
+ .submount = nfs4_submount,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
@@ -6270,9 +6768,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.create = nfs4_proc_create,
.remove = nfs4_proc_remove,
.unlink_setup = nfs4_proc_unlink_setup,
+ .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
.unlink_done = nfs4_proc_unlink_done,
.rename = nfs4_proc_rename,
.rename_setup = nfs4_proc_rename_setup,
+ .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
.rename_done = nfs4_proc_rename_done,
.link = nfs4_proc_link,
.symlink = nfs4_proc_symlink,
@@ -6286,17 +6786,19 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
.read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
.commit_done = nfs4_commit_done,
.lock = nfs4_proc_lock,
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.init_client = nfs4_init_client,
- .secinfo = nfs4_proc_secinfo,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
@@ -6311,6 +6813,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = {
NULL
};
+module_param(max_session_slots, ushort, 0644);
+MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
+ "requests the client will negotiate");
+
/*
* Local variables:
* c-basic-offset: 8
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index dc484c0eae7..6930bec91bc 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -49,7 +49,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
-#define NFSDBG_FACILITY NFSDBG_PROC
+#define NFSDBG_FACILITY NFSDBG_STATE
void
nfs4_renew_state(struct work_struct *work)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6a7107ae6b7..f38300e9f17 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
#include <linux/ratelimit.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
+#include <linux/jiffies.h>
#include "nfs4_fs.h"
#include "callback.h"
@@ -56,6 +57,8 @@
#include "internal.h"
#include "pnfs.h"
+#define NFSDBG_FACILITY NFSDBG_STATE
+
#define OPENOWNER_POOL_SIZE 8
const nfs4_stateid zero_stateid;
@@ -145,6 +148,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
struct rpc_cred *cred = NULL;
struct nfs_server *server;
+ /* Use machine credentials if available */
+ cred = nfs4_get_machine_cred_locked(clp);
+ if (cred != NULL)
+ goto out;
+
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
cred = nfs4_get_renew_cred_server_locked(server);
@@ -152,6 +160,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
break;
}
rcu_read_unlock();
+
+out:
return cred;
}
@@ -189,30 +199,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
static void nfs4_end_drain_session(struct nfs_client *clp)
{
struct nfs4_session *ses = clp->cl_session;
+ struct nfs4_slot_table *tbl;
int max_slots;
if (ses == NULL)
return;
+ tbl = &ses->fc_slot_table;
if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
- spin_lock(&ses->fc_slot_table.slot_tbl_lock);
- max_slots = ses->fc_slot_table.max_slots;
+ spin_lock(&tbl->slot_tbl_lock);
+ max_slots = tbl->max_slots;
while (max_slots--) {
- struct rpc_task *task;
-
- task = rpc_wake_up_next(&ses->fc_slot_table.
- slot_tbl_waitq);
- if (!task)
+ if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
+ nfs4_set_task_privileged,
+ NULL) == NULL)
break;
- rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
}
- spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
+ spin_unlock(&tbl->slot_tbl_lock);
}
}
static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
{
spin_lock(&tbl->slot_tbl_lock);
- if (tbl->highest_used_slotid != -1) {
+ if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
INIT_COMPLETION(tbl->complete);
spin_unlock(&tbl->slot_tbl_lock);
return wait_for_completion_interruptible(&tbl->complete);
@@ -235,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
}
+static void nfs41_finish_session_reset(struct nfs_client *clp)
+{
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ /* create_session negotiated new slot table */
+ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ nfs41_setup_state_renewal(clp);
+}
+
int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
@@ -247,11 +266,10 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
do_confirm:
- status = nfs4_proc_create_session(clp);
+ status = nfs4_proc_create_session(clp, cred);
if (status != 0)
goto out;
- clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- nfs41_setup_state_renewal(clp);
+ nfs41_finish_session_reset(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
return status;
@@ -316,92 +334,29 @@ out:
return cred;
}
-static void nfs_alloc_unique_id_locked(struct rb_root *root,
- struct nfs_unique_id *new,
- __u64 minval, int maxbits)
-{
- struct rb_node **p, *parent;
- struct nfs_unique_id *pos;
- __u64 mask = ~0ULL;
-
- if (maxbits < 64)
- mask = (1ULL << maxbits) - 1ULL;
-
- /* Ensure distribution is more or less flat */
- get_random_bytes(&new->id, sizeof(new->id));
- new->id &= mask;
- if (new->id < minval)
- new->id += minval;
-retry:
- p = &root->rb_node;
- parent = NULL;
-
- while (*p != NULL) {
- parent = *p;
- pos = rb_entry(parent, struct nfs_unique_id, rb_node);
-
- if (new->id < pos->id)
- p = &(*p)->rb_left;
- else if (new->id > pos->id)
- p = &(*p)->rb_right;
- else
- goto id_exists;
- }
- rb_link_node(&new->rb_node, parent, p);
- rb_insert_color(&new->rb_node, root);
- return;
-id_exists:
- for (;;) {
- new->id++;
- if (new->id < minval || (new->id & mask) != new->id) {
- new->id = minval;
- break;
- }
- parent = rb_next(parent);
- if (parent == NULL)
- break;
- pos = rb_entry(parent, struct nfs_unique_id, rb_node);
- if (new->id < pos->id)
- break;
- }
- goto retry;
-}
-
-static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
-{
- rb_erase(&id->rb_node, root);
-}
-
static struct nfs4_state_owner *
nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
{
struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
- struct nfs4_state_owner *sp, *res = NULL;
+ struct nfs4_state_owner *sp;
while (*p != NULL) {
parent = *p;
sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
- if (server < sp->so_server) {
- p = &parent->rb_left;
- continue;
- }
- if (server > sp->so_server) {
- p = &parent->rb_right;
- continue;
- }
if (cred < sp->so_cred)
p = &parent->rb_left;
else if (cred > sp->so_cred)
p = &parent->rb_right;
else {
+ if (!list_empty(&sp->so_lru))
+ list_del_init(&sp->so_lru);
atomic_inc(&sp->so_count);
- res = sp;
- break;
+ return sp;
}
}
- return res;
+ return NULL;
}
static struct nfs4_state_owner *
@@ -411,6 +366,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
struct rb_node **p = &server->state_owners.rb_node,
*parent = NULL;
struct nfs4_state_owner *sp;
+ int err;
while (*p != NULL) {
parent = *p;
@@ -421,12 +377,15 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
else if (new->so_cred > sp->so_cred)
p = &parent->rb_right;
else {
+ if (!list_empty(&sp->so_lru))
+ list_del_init(&sp->so_lru);
atomic_inc(&sp->so_count);
return sp;
}
}
- nfs_alloc_unique_id_locked(&server->openowner_id,
- &new->so_owner_id, 1, 64);
+ err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
+ if (err)
+ return ERR_PTR(err);
rb_link_node(&new->so_server_node, parent, p);
rb_insert_color(&new->so_server_node, &server->state_owners);
return new;
@@ -439,7 +398,24 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
if (!RB_EMPTY_NODE(&sp->so_server_node))
rb_erase(&sp->so_server_node, &server->state_owners);
- nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id);
+ ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
+}
+
+static void
+nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
+{
+ sc->create_time = ktime_get();
+ sc->flags = 0;
+ sc->counter = 0;
+ spin_lock_init(&sc->lock);
+ INIT_LIST_HEAD(&sc->list);
+ rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
+}
+
+static void
+nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
+{
+ rpc_destroy_wait_queue(&sc->wait);
}
/*
@@ -448,37 +424,75 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
*
*/
static struct nfs4_state_owner *
-nfs4_alloc_state_owner(void)
+nfs4_alloc_state_owner(struct nfs_server *server,
+ struct rpc_cred *cred,
+ gfp_t gfp_flags)
{
struct nfs4_state_owner *sp;
- sp = kzalloc(sizeof(*sp),GFP_NOFS);
+ sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
+ sp->so_server = server;
+ sp->so_cred = get_rpccred(cred);
spin_lock_init(&sp->so_lock);
INIT_LIST_HEAD(&sp->so_states);
- rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
- sp->so_seqid.sequence = &sp->so_sequence;
- spin_lock_init(&sp->so_sequence.lock);
- INIT_LIST_HEAD(&sp->so_sequence.list);
+ nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
+ INIT_LIST_HEAD(&sp->so_lru);
return sp;
}
static void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+ struct rb_node *rb_node = &sp->so_server_node;
+
+ if (!RB_EMPTY_NODE(rb_node)) {
struct nfs_server *server = sp->so_server;
struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- rb_erase(&sp->so_server_node, &server->state_owners);
- RB_CLEAR_NODE(&sp->so_server_node);
+ if (!RB_EMPTY_NODE(rb_node)) {
+ rb_erase(rb_node, &server->state_owners);
+ RB_CLEAR_NODE(rb_node);
+ }
spin_unlock(&clp->cl_lock);
}
}
+static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
+{
+ nfs4_destroy_seqid_counter(&sp->so_seqid);
+ put_rpccred(sp->so_cred);
+ kfree(sp);
+}
+
+static void nfs4_gc_state_owners(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_state_owner *sp, *tmp;
+ unsigned long time_min, time_max;
+ LIST_HEAD(doomed);
+
+ spin_lock(&clp->cl_lock);
+ time_max = jiffies;
+ time_min = (long)time_max - (long)clp->cl_lease_time;
+ list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+ /* NB: LRU is sorted so that oldest is at the head */
+ if (time_in_range(sp->so_expires, time_min, time_max))
+ break;
+ list_move(&sp->so_lru, &doomed);
+ nfs4_remove_state_owner_locked(sp);
+ }
+ spin_unlock(&clp->cl_lock);
+
+ list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+ list_del(&sp->so_lru);
+ nfs4_free_state_owner(sp);
+ }
+}
+
/**
* nfs4_get_state_owner - Look up a state owner given a credential
* @server: nfs_server to search
@@ -487,7 +501,8 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
* Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
*/
struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
- struct rpc_cred *cred)
+ struct rpc_cred *cred,
+ gfp_t gfp_flags)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *new;
@@ -496,21 +511,21 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
sp = nfs4_find_state_owner_locked(server, cred);
spin_unlock(&clp->cl_lock);
if (sp != NULL)
- return sp;
- new = nfs4_alloc_state_owner();
+ goto out;
+ new = nfs4_alloc_state_owner(server, cred, gfp_flags);
if (new == NULL)
- return NULL;
- new->so_server = server;
- new->so_cred = cred;
- spin_lock(&clp->cl_lock);
- sp = nfs4_insert_state_owner_locked(new);
- spin_unlock(&clp->cl_lock);
- if (sp == new)
- get_rpccred(cred);
- else {
- rpc_destroy_wait_queue(&new->so_sequence.wait);
- kfree(new);
- }
+ goto out;
+ do {
+ if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
+ break;
+ spin_lock(&clp->cl_lock);
+ sp = nfs4_insert_state_owner_locked(new);
+ spin_unlock(&clp->cl_lock);
+ } while (sp == ERR_PTR(-EAGAIN));
+ if (sp != new)
+ nfs4_free_state_owner(new);
+out:
+ nfs4_gc_state_owners(server);
return sp;
}
@@ -518,19 +533,51 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
* nfs4_put_state_owner - Release a nfs4_state_owner
* @sp: state owner data to release
*
+ * Note that we keep released state owners on an LRU
+ * list.
+ * This caches valid state owners so that they can be
+ * reused, to avoid the OPEN_CONFIRM on minor version 0.
+ * It also pins the uniquifier of dropped state owners for
+ * a while, to ensure that those state owner names are
+ * never reused.
*/
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
- struct nfs_client *clp = sp->so_server->nfs_client;
- struct rpc_cred *cred = sp->so_cred;
+ struct nfs_server *server = sp->so_server;
+ struct nfs_client *clp = server->nfs_client;
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
- nfs4_remove_state_owner_locked(sp);
+
+ sp->so_expires = jiffies;
+ list_add_tail(&sp->so_lru, &server->state_owners_lru);
spin_unlock(&clp->cl_lock);
- rpc_destroy_wait_queue(&sp->so_sequence.wait);
- put_rpccred(cred);
- kfree(sp);
+}
+
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
+ *
+ * Called at umount time. Remaining state owners will be on
+ * the LRU with ref count of zero.
+ */
+void nfs4_purge_state_owners(struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_state_owner *sp, *tmp;
+ LIST_HEAD(doomed);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+ list_move(&sp->so_lru, &doomed);
+ nfs4_remove_state_owner_locked(sp);
+ }
+ spin_unlock(&clp->cl_lock);
+
+ list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+ list_del(&sp->so_lru);
+ nfs4_free_state_owner(sp);
+ }
}
static struct nfs4_state *
@@ -734,15 +781,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
- struct nfs_client *clp = server->nfs_client;
lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
if (lsp == NULL)
return NULL;
- rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
- spin_lock_init(&lsp->ls_sequence.lock);
- INIT_LIST_HEAD(&lsp->ls_sequence.list);
- lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ nfs4_init_seqid_counter(&lsp->ls_seqid);
atomic_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner.lo_type = type;
@@ -754,25 +797,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
lsp->ls_owner.lo_u.posix_owner = fl_owner;
break;
default:
- kfree(lsp);
- return NULL;
+ goto out_free;
}
- spin_lock(&clp->cl_lock);
- nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64);
- spin_unlock(&clp->cl_lock);
+ lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
+ if (lsp->ls_seqid.owner_id < 0)
+ goto out_free;
INIT_LIST_HEAD(&lsp->ls_locks);
return lsp;
+out_free:
+ kfree(lsp);
+ return NULL;
}
-static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- struct nfs_server *server = lsp->ls_state->owner->so_server;
- struct nfs_client *clp = server->nfs_client;
-
- spin_lock(&clp->cl_lock);
- nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id);
- spin_unlock(&clp->cl_lock);
- rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
+ ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
+ nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -804,7 +844,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
}
spin_unlock(&state->state_lock);
if (new != NULL)
- nfs4_free_lock_state(new);
+ nfs4_free_lock_state(state->owner->so_server, new);
return lsp;
}
@@ -825,9 +865,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags);
spin_unlock(&state->state_lock);
- if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
- nfs4_release_lockowner(lsp);
- nfs4_free_lock_state(lsp);
+ if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+ if (nfs4_release_lockowner(lsp) == 0)
+ return;
+ }
+ nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp);
}
static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -857,7 +899,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_flags & FL_POSIX)
lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
else if (fl->fl_flags & FL_FLOCK)
- lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
+ lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
+ NFS4_FLOCK_LOCK_TYPE);
else
return -EINVAL;
if (lsp == NULL)
@@ -867,28 +910,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
return 0;
}
-/*
- * Byte-range lock aware utility to initialize the stateid of read/write
- * requests.
- */
-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+ fl_owner_t fl_owner, pid_t fl_pid)
{
struct nfs4_lock_state *lsp;
- int seq;
+ bool ret = false;
- do {
- seq = read_seqbegin(&state->seqlock);
- memcpy(dst, &state->stateid, sizeof(*dst));
- } while (read_seqretry(&state->seqlock, seq));
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
- return;
+ goto out;
spin_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
- if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
+ nfs4_stateid_copy(dst, &lsp->ls_stateid);
+ ret = true;
+ }
spin_unlock(&state->state_lock);
nfs4_put_lock_state(lsp);
+out:
+ return ret;
+}
+
+static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+{
+ int seq;
+
+ do {
+ seq = read_seqbegin(&state->seqlock);
+ nfs4_stateid_copy(dst, &state->stateid);
+ } while (read_seqretry(&state->seqlock, seq));
+}
+
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
+ */
+void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+ fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid)
+{
+ if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
+ return;
+ if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid))
+ return;
+ nfs4_copy_open_stateid(dst, state);
}
struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -899,20 +963,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m
if (new != NULL) {
new->sequence = counter;
INIT_LIST_HEAD(&new->list);
+ new->task = NULL;
}
return new;
}
void nfs_release_seqid(struct nfs_seqid *seqid)
{
- if (!list_empty(&seqid->list)) {
- struct rpc_sequence *sequence = seqid->sequence->sequence;
+ struct nfs_seqid_counter *sequence;
- spin_lock(&sequence->lock);
- list_del_init(&seqid->list);
- spin_unlock(&sequence->lock);
- rpc_wake_up(&sequence->wait);
+ if (list_empty(&seqid->list))
+ return;
+ sequence = seqid->sequence;
+ spin_lock(&sequence->lock);
+ list_del_init(&seqid->list);
+ if (!list_empty(&sequence->list)) {
+ struct nfs_seqid *next;
+
+ next = list_first_entry(&sequence->list,
+ struct nfs_seqid, list);
+ rpc_wake_up_queued_task(&sequence->wait, next->task);
}
+ spin_unlock(&sequence->lock);
}
void nfs_free_seqid(struct nfs_seqid *seqid)
@@ -928,14 +1000,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
*/
static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
{
- BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid);
+ BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid);
switch (status) {
case 0:
break;
case -NFS4ERR_BAD_SEQID:
if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
return;
- printk(KERN_WARNING "NFS: v4 server returned a bad"
+ pr_warn_ratelimited("NFS: v4 server returned a bad"
" sequence-id error on an"
" unconfirmed sequence %p!\n",
seqid->sequence);
@@ -979,10 +1051,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
{
- struct rpc_sequence *sequence = seqid->sequence->sequence;
+ struct nfs_seqid_counter *sequence = seqid->sequence;
int status = 0;
spin_lock(&sequence->lock);
+ seqid->task = task;
if (list_empty(&seqid->list))
list_add_tail(&seqid->list, &sequence->list);
if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
@@ -1011,19 +1084,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
void nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
+ char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
return;
__module_get(THIS_MODULE);
atomic_inc(&clp->cl_count);
- task = kthread_run(nfs4_run_state_manager, clp, "%s-manager",
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR));
- if (!IS_ERR(task))
- return;
- nfs4_clear_state_manager_bit(clp);
- nfs_put_client(clp);
- module_put(THIS_MODULE);
+
+ /* The rcu_read_lock() is not strictly necessary, as the state
+ * manager is the only thread that ever changes the rpc_xprt
+ * after it's initialized. At this point, we're single threaded. */
+ rcu_read_lock();
+ snprintf(buf, sizeof(buf), "%s-manager",
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ rcu_read_unlock();
+ task = kthread_run(nfs4_run_state_manager, clp, buf);
+ if (IS_ERR(task)) {
+ printk(KERN_ERR "%s: kthread_run: %ld\n",
+ __func__, PTR_ERR(task));
+ nfs4_clear_state_manager_bit(clp);
+ nfs_put_client(clp);
+ module_put(THIS_MODULE);
+ }
}
/*
@@ -1035,12 +1117,31 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
return;
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ dprintk("%s: scheduling lease recovery for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
+EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
+
+/*
+ * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
+ * @clp: client to process
+ *
+ * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
+ * resend of the SETCLIENTID and hence re-establish the
+ * callback channel. Then return all existing delegations.
+ */
+static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
+{
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs_expire_all_delegations(clp);
+ dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
+ clp->cl_hostname);
+}
void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
{
- nfs_handle_cb_pathdown(clp);
+ nfs40_handle_cb_pathdown(clp);
nfs4_schedule_state_manager(clp);
}
@@ -1072,8 +1173,38 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
struct nfs_client *clp = server->nfs_client;
nfs4_state_mark_reclaim_nograce(clp, state);
+ dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
+EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
+
+void nfs_inode_find_state_and_recover(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+ bool found = false;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ state = ctx->state;
+ if (state == NULL)
+ continue;
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ continue;
+ if (!nfs4_stateid_match(&state->stateid, stateid))
+ continue;
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ found = true;
+ }
+ spin_unlock(&inode->i_lock);
+ if (found)
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
@@ -1112,8 +1243,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto out;
default:
- printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
- __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d. "
+ "Zeroing state\n", __func__, status);
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1159,8 +1290,9 @@ restart:
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
- printk("%s: Lock reclaim failed!\n",
- __func__);
+ pr_warn_ratelimited("NFS: "
+ "%s: Lock reclaim "
+ "failed!\n", __func__);
}
spin_unlock(&state->state_lock);
nfs4_put_open_state(state);
@@ -1169,8 +1301,8 @@ restart:
}
switch (status) {
default:
- printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
- __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d. "
+ "Zeroing state\n", __func__, status);
case -ENOENT:
case -ENOMEM:
case -ESTALE:
@@ -1178,8 +1310,8 @@ restart:
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
*/
- memset(state->stateid.data, 0,
- sizeof(state->stateid.data));
+ memset(&state->stateid, 0,
+ sizeof(state->stateid));
/* Mark the file as being 'closed' */
state->state = 0;
break;
@@ -1357,7 +1489,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
case 0:
break;
case -NFS4ERR_CB_PATH_DOWN:
- nfs_handle_cb_pathdown(clp);
+ nfs40_handle_cb_pathdown(clp);
break;
case -NFS4ERR_NO_GRACE:
nfs4_state_end_reclaim_reboot(clp);
@@ -1376,19 +1508,25 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
/* Zero session reset errors */
break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ break;
case -EKEYEXPIRED:
/* Nothing we can do */
nfs4_warn_keyexpired(clp->cl_hostname);
break;
default:
+ dprintk("%s: failed to handle error %d for server %s\n",
+ __func__, error, clp->cl_hostname);
return error;
}
+ dprintk("%s: handled error %d for server %s\n", __func__, error,
+ clp->cl_hostname);
return 0;
}
@@ -1402,6 +1540,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ nfs4_purge_state_owners(server);
spin_lock(&clp->cl_lock);
for (pos = rb_first(&server->state_owners);
pos != NULL;
@@ -1456,34 +1595,82 @@ out:
return nfs4_recovery_handle_error(clp, status);
}
+/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
+ * on EXCHANGE_ID for v4.1
+ */
+static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
+{
+ switch (status) {
+ case -NFS4ERR_SEQ_MISORDERED:
+ if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
+ return -ESERVERFAULT;
+ /* Lease confirmation error: retry after purging the lease */
+ ssleep(1);
+ case -NFS4ERR_CLID_INUSE:
+ case -NFS4ERR_STALE_CLIENTID:
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ break;
+ case -EACCES:
+ if (clp->cl_machine_cred == NULL)
+ return -EACCES;
+ /* Handle case where the user hasn't set up machine creds */
+ nfs4_clear_machine_cred(clp);
+ case -NFS4ERR_DELAY:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ ssleep(1);
+ break;
+
+ case -NFS4ERR_MINOR_VERS_MISMATCH:
+ if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+ nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
+ dprintk("%s: exit with error %d for server %s\n",
+ __func__, -EPROTONOSUPPORT, clp->cl_hostname);
+ return -EPROTONOSUPPORT;
+ case -EKEYEXPIRED:
+ nfs4_warn_keyexpired(clp->cl_hostname);
+ case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+ * in nfs4_exchange_id */
+ default:
+ dprintk("%s: exit with error %d for server %s\n", __func__,
+ status, clp->cl_hostname);
+ return status;
+ }
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ dprintk("%s: handled error %d for server %s\n", __func__, status,
+ clp->cl_hostname);
+ return 0;
+}
+
static int nfs4_reclaim_lease(struct nfs_client *clp)
{
struct rpc_cred *cred;
const struct nfs4_state_recovery_ops *ops =
clp->cl_mvops->reboot_recovery_ops;
- int status = -ENOENT;
+ int status;
cred = ops->get_clid_cred(clp);
- if (cred != NULL) {
- status = ops->establish_clid(clp, cred);
- put_rpccred(cred);
- /* Handle case where the user hasn't set up machine creds */
- if (status == -EACCES && cred == clp->cl_machine_cred) {
- nfs4_clear_machine_cred(clp);
- status = -EAGAIN;
- }
- if (status == -NFS4ERR_MINOR_VERS_MISMATCH)
- status = -EPROTONOSUPPORT;
- }
- return status;
+ if (cred == NULL)
+ return -ENOENT;
+ status = ops->establish_clid(clp, cred);
+ put_rpccred(cred);
+ if (status != 0)
+ return nfs4_handle_reclaim_lease_error(clp, status);
+ return 0;
}
#ifdef CONFIG_NFS_V4_1
-void nfs4_schedule_session_recovery(struct nfs4_session *session)
+void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
struct nfs_client *clp = session->clp;
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ switch (err) {
+ default:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ }
nfs4_schedule_lease_recovery(clp);
}
EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
@@ -1491,14 +1678,19 @@ EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+ dprintk("%s: scheduling slot recall for server %s\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
- clp->cl_boot_time = CURRENT_TIME;
+ set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
nfs4_state_start_reclaim_nograce(clp);
+ dprintk("%s: scheduling reset of all state for server %s!\n",
+ __func__, clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
}
@@ -1507,33 +1699,50 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
+ dprintk("%s: server %s rebooted!\n", __func__,
+ clp->cl_hostname);
nfs4_schedule_state_manager(clp);
}
}
static void nfs41_handle_state_revoked(struct nfs_client *clp)
{
- /* Temporary */
nfs4_reset_all_state(clp);
+ dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
}
static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
{
/* This will need to handle layouts too */
nfs_expire_all_delegations(clp);
+ dprintk("%s: Recallable state revoked on server %s!\n", __func__,
+ clp->cl_hostname);
}
-static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
nfs4_schedule_state_manager(clp);
+ dprintk("%s: server %s declared a backchannel fault\n", __func__,
+ clp->cl_hostname);
+}
+
+static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+{
+ if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+ &clp->cl_state) == 0)
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
{
if (!flags)
return;
+
+ dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
+ __func__, clp->cl_hostname, clp->cl_clientid, flags);
+
if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
nfs41_handle_server_reboot(clp);
if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
@@ -1543,18 +1752,21 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
nfs41_handle_state_revoked(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
- if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
- SEQ4_STATUS_BACKCHANNEL_FAULT |
- SEQ4_STATUS_CB_PATH_DOWN_SESSION))
+ if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
+ nfs41_handle_backchannel_fault(clp);
+ else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+ SEQ4_STATUS_CB_PATH_DOWN_SESSION))
nfs41_handle_cb_path_down(clp);
}
static int nfs4_reset_session(struct nfs_client *clp)
{
+ struct rpc_cred *cred;
int status;
nfs4_begin_drain_session(clp);
- status = nfs4_proc_destroy_session(clp->cl_session);
+ cred = nfs4_get_exchange_id_cred(clp);
+ status = nfs4_proc_destroy_session(clp->cl_session, cred);
if (status && status != -NFS4ERR_BADSESSION &&
status != -NFS4ERR_DEADSESSION) {
status = nfs4_recovery_handle_error(clp, status);
@@ -1562,19 +1774,19 @@ static int nfs4_reset_session(struct nfs_client *clp)
}
memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
- status = nfs4_proc_create_session(clp);
+ status = nfs4_proc_create_session(clp, cred);
if (status) {
- status = nfs4_recovery_handle_error(clp, status);
+ dprintk("%s: session reset failed with status %d for server %s!\n",
+ __func__, status, clp->cl_hostname);
+ status = nfs4_handle_reclaim_lease_error(clp, status);
goto out;
}
- clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
- /* create_session negotiated new slot table */
- clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
-
- /* Let the state manager reestablish state */
- if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
- nfs41_setup_state_renewal(clp);
+ nfs41_finish_session_reset(clp);
+ dprintk("%s: session reset was successful for server %s!\n",
+ __func__, clp->cl_hostname);
out:
+ if (cred)
+ put_rpccred(cred);
return status;
}
@@ -1606,37 +1818,41 @@ static int nfs4_recall_slot(struct nfs_client *clp)
return 0;
}
-#else /* CONFIG_NFS_V4_1 */
-static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
-static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
-static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
-#endif /* CONFIG_NFS_V4_1 */
-
-/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
- * on EXCHANGE_ID for v4.1
- */
-static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
{
- switch (status) {
- case -NFS4ERR_CLID_INUSE:
- case -NFS4ERR_STALE_CLIENTID:
- clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ struct rpc_cred *cred;
+ int ret;
+
+ nfs4_begin_drain_session(clp);
+ cred = nfs4_get_exchange_id_cred(clp);
+ ret = nfs4_proc_bind_conn_to_session(clp, cred);
+ if (cred)
+ put_rpccred(cred);
+ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ switch (ret) {
+ case 0:
+ dprintk("%s: bind_conn_to_session was successful for server %s!\n",
+ __func__, clp->cl_hostname);
break;
case -NFS4ERR_DELAY:
- case -ETIMEDOUT:
- case -EAGAIN:
ssleep(1);
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
break;
-
- case -EKEYEXPIRED:
- nfs4_warn_keyexpired(clp->cl_hostname);
- case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
- * in nfs4_exchange_id */
default:
- return;
+ return nfs4_recovery_handle_error(clp, ret);
}
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ return 0;
+}
+#else /* CONFIG_NFS_V4_1 */
+static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
+static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
+
+static int nfs4_bind_conn_to_session(struct nfs_client *clp)
+{
+ return 0;
}
+#endif /* CONFIG_NFS_V4_1 */
static void nfs4_state_manager(struct nfs_client *clp)
{
@@ -1644,19 +1860,21 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Ensure exclusive access to NFSv4 state */
do {
+ if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
+ status = nfs4_reclaim_lease(clp);
+ if (status < 0)
+ goto out_error;
+ clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ }
+
if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
/* We're going to have to re-establish a clientid */
status = nfs4_reclaim_lease(clp);
- if (status) {
- nfs4_set_lease_expired(clp, status);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED,
- &clp->cl_state))
- continue;
- if (clp->cl_cons_state ==
- NFS_CS_SESSION_INITING)
- nfs_mark_client_ready(clp, status);
+ if (status < 0)
goto out_error;
- }
+ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+ continue;
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
@@ -1687,6 +1905,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
goto out_error;
}
+ /* Send BIND_CONN_TO_SESSION */
+ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
+ &clp->cl_state) && nfs4_has_session(clp)) {
+ status = nfs4_bind_conn_to_session(clp);
+ if (status < 0)
+ goto out_error;
+ continue;
+ }
+
/* First recover reboot state... */
if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
status = nfs4_do_reclaim(clp,
@@ -1737,7 +1964,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
} while (atomic_read(&clp->cl_count) > 1);
return;
out_error:
- printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
+ pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s"
" with error %d\n", clp->cl_hostname, -status);
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6161b213ed..18fae29b030 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -44,6 +44,8 @@
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
#include <linux/kdev_t.h>
+#include <linux/module.h>
+#include <linux/utsname.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/gss_api.h>
@@ -51,9 +53,11 @@
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
+
#include "nfs4_fs.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_XDR
@@ -72,7 +76,7 @@ static int nfs4_stat_to_errno(int);
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
-#define open_owner_id_maxsz (1 + 1 + 4)
+#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -97,9 +101,12 @@ static int nfs4_stat_to_errno(int);
#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We support only one layout type per file system */
+#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
/* This is based on getfattr, which uses the most attributes: */
#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
- 3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
+ 3 + 3 + 3 + nfs4_owner_maxsz + \
+ nfs4_group_maxsz + decode_mdsthreshold_maxsz))
#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
nfs4_fattr_value_maxsz)
#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -271,7 +278,12 @@ static int nfs4_stat_to_errno(int);
1 /* flags */ + \
1 /* spa_how */ + \
0 /* SP4_NONE (for now) */ + \
- 1 /* zero implemetation id array */)
+ 1 /* implementation id array of size 1 */ + \
+ 1 /* nii_domain */ + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+ 1 /* nii_name */ + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+ 3 /* nii_date */)
#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
2 /* eir_clientid */ + \
1 /* eir_sequenceid */ + \
@@ -284,7 +296,11 @@ static int nfs4_stat_to_errno(int);
/* eir_server_scope<> */ \
XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
1 /* eir_server_impl_id array length */ + \
- 0 /* ignored eir_server_impl_id contents */)
+ 1 /* nii_domain */ + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+ 1 /* nii_name */ + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+ 3 /* nii_date */)
#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */)
#define decode_channel_attrs_maxsz (6 + \
1 /* ca_rdma_ird.len */ + \
@@ -310,8 +326,20 @@ static int nfs4_stat_to_errno(int);
1 /* csr_flags */ + \
decode_channel_attrs_maxsz + \
decode_channel_attrs_maxsz)
+#define encode_bind_conn_to_session_maxsz (op_encode_hdr_maxsz + \
+ /* bctsa_sessid */ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 /* bctsa_dir */ + \
+ 1 /* bctsa_use_conn_in_rdma_mode */)
+#define decode_bind_conn_to_session_maxsz (op_decode_hdr_maxsz + \
+ /* bctsr_sessid */ \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 /* bctsr_dir */ + \
+ 1 /* bctsr_use_conn_in_rdma_mode */)
#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4)
#define decode_destroy_session_maxsz (op_decode_hdr_maxsz)
+#define encode_destroy_clientid_maxsz (op_encode_hdr_maxsz + 2)
+#define decode_destroy_clientid_maxsz (op_decode_hdr_maxsz)
#define encode_sequence_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4)
#define decode_sequence_maxsz (op_decode_hdr_maxsz + \
@@ -410,30 +438,22 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_commit_maxsz + \
- encode_getattr_maxsz)
+ encode_commit_maxsz)
#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_commit_maxsz + \
- decode_getattr_maxsz)
+ decode_commit_maxsz)
#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_savefh_maxsz + \
encode_open_maxsz + \
encode_getfh_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_savefh_maxsz + \
decode_open_maxsz + \
decode_getfh_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_open_confirm_sz \
(compound_encode_hdr_maxsz + \
@@ -584,47 +604,37 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_remove_maxsz + \
- encode_getattr_maxsz)
+ encode_remove_maxsz)
#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_remove_maxsz + \
- decode_getattr_maxsz)
+ decode_remove_maxsz)
#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
- encode_rename_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
- encode_getattr_maxsz)
+ encode_rename_maxsz)
#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
- decode_rename_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
- decode_getattr_maxsz)
+ decode_rename_maxsz)
#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
encode_link_maxsz + \
- decode_getattr_maxsz + \
encode_restorefh_maxsz + \
- decode_getattr_maxsz)
+ encode_getattr_maxsz)
#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
decode_link_maxsz + \
- decode_getattr_maxsz + \
decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
@@ -642,20 +652,14 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_savefh_maxsz + \
encode_create_maxsz + \
encode_getfh_maxsz + \
- encode_getattr_maxsz + \
- encode_restorefh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_savefh_maxsz + \
decode_create_maxsz + \
decode_getfh_maxsz + \
- decode_getattr_maxsz + \
- decode_restorefh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
@@ -727,6 +731,12 @@ static int nfs4_stat_to_errno(int);
decode_putfh_maxsz + \
decode_secinfo_maxsz)
#if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_bind_conn_to_session_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_bind_conn_to_session_maxsz)
+#define NFS4_dec_bind_conn_to_session_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_bind_conn_to_session_maxsz)
#define NFS4_enc_exchange_id_sz \
(compound_encode_hdr_maxsz + \
encode_exchange_id_maxsz)
@@ -743,6 +753,10 @@ static int nfs4_stat_to_errno(int);
encode_destroy_session_maxsz)
#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \
decode_destroy_session_maxsz)
+#define NFS4_enc_destroy_clientid_sz (compound_encode_hdr_maxsz + \
+ encode_destroy_clientid_maxsz)
+#define NFS4_dec_destroy_clientid_sz (compound_decode_hdr_maxsz + \
+ decode_destroy_clientid_maxsz)
#define NFS4_enc_sequence_sz \
(compound_decode_hdr_maxsz + \
encode_sequence_maxsz)
@@ -838,6 +852,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
XDR_UNIT);
#endif /* CONFIG_NFS_V4_1 */
+static unsigned short send_implementation_id = 1;
+
+module_param(send_implementation_id, ushort, 0644);
+MODULE_PARM_DESC(send_implementation_id,
+ "Send implementation ID with NFSv4.1 exchange_id");
+
static const umode_t nfs_type2fmt[] = {
[NF4BAD] = 0,
[NF4REG] = S_IFREG,
@@ -868,15 +888,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
return p;
}
+static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, len);
+ xdr_encode_opaque_fixed(p, buf, len);
+}
+
static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
__be32 *p;
- p = xdr_reserve_space(xdr, 4 + len);
- BUG_ON(p == NULL);
+ p = reserve_space(xdr, 4 + len);
xdr_encode_opaque(p, str, len);
}
+static void encode_uint32(struct xdr_stream *xdr, u32 n)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(n);
+}
+
+static void encode_uint64(struct xdr_stream *xdr, u64 n)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 8);
+ xdr_encode_hyper(p, n);
+}
+
+static void encode_nfs4_seqid(struct xdr_stream *xdr,
+ const struct nfs_seqid *seqid)
+{
+ encode_uint32(xdr, seqid->sequence->counter);
+}
+
static void encode_compound_hdr(struct xdr_stream *xdr,
struct rpc_rqst *req,
struct compound_hdr *hdr)
@@ -889,28 +938,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
* but this is not required as a MUST for the server to do so. */
hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
- dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
- p = reserve_space(xdr, 4 + hdr->taglen + 8);
- p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
+ encode_string(xdr, hdr->taglen, hdr->tag);
+ p = reserve_space(xdr, 8);
*p++ = cpu_to_be32(hdr->minorversion);
hdr->nops_p = p;
*p = cpu_to_be32(hdr->nops);
}
+static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op,
+ uint32_t replen,
+ struct compound_hdr *hdr)
+{
+ encode_uint32(xdr, op);
+ hdr->nops++;
+ hdr->replen += replen;
+}
+
static void encode_nops(struct compound_hdr *hdr)
{
BUG_ON(hdr->nops > NFS4_MAX_OPS);
*hdr->nops_p = htonl(hdr->nops);
}
-static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid)
{
- __be32 *p;
+ encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
+}
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- BUG_ON(p == NULL);
- xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
+static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+ encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
}
static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
@@ -1023,7 +1081,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
* Now we backfill the bitmap and the attribute buffer length.
*/
if (len != ((char *)p - (char *)q) + 4) {
- printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
+ printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
len, ((char *)p - (char *)q) + 4);
BUG();
}
@@ -1037,46 +1095,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8);
- *p++ = cpu_to_be32(OP_ACCESS);
- *p = cpu_to_be32(access);
- hdr->nops++;
- hdr->replen += decode_access_maxsz;
+ encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr);
+ encode_uint32(xdr, access);
}
static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_CLOSE);
- *p++ = cpu_to_be32(arg->seqid->sequence->counter);
- xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
- hdr->nops++;
- hdr->replen += decode_close_maxsz;
+ encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
+ encode_nfs4_seqid(xdr, arg->seqid);
+ encode_nfs4_stateid(xdr, arg->stateid);
}
-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 16);
- *p++ = cpu_to_be32(OP_COMMIT);
+ encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+ p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
*p = cpu_to_be32(args->count);
- hdr->nops++;
- hdr->replen += decode_commit_maxsz;
}
static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 8);
- *p++ = cpu_to_be32(OP_CREATE);
- *p = cpu_to_be32(create->ftype);
+ encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr);
+ encode_uint32(xdr, create->ftype);
switch (create->ftype) {
case NF4LNK:
@@ -1096,9 +1141,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
}
encode_string(xdr, create->name->len, create->name->name);
- hdr->nops++;
- hdr->replen += decode_create_maxsz;
-
encode_attrs(xdr, create->attrs, create->server);
}
@@ -1106,25 +1148,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
{
__be32 *p;
- p = reserve_space(xdr, 12);
- *p++ = cpu_to_be32(OP_GETATTR);
+ encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
+ p = reserve_space(xdr, 8);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(bitmap);
- hdr->nops++;
- hdr->replen += decode_getattr_maxsz;
}
static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 16);
- *p++ = cpu_to_be32(OP_GETATTR);
+ encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
+ p = reserve_space(xdr, 12);
*p++ = cpu_to_be32(2);
*p++ = cpu_to_be32(bm0);
*p = cpu_to_be32(bm1);
- hdr->nops++;
- hdr->replen += decode_getattr_maxsz;
}
static void
@@ -1134,8 +1172,7 @@ encode_getattr_three(struct xdr_stream *xdr,
{
__be32 *p;
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_GETATTR);
+ encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
if (bm2) {
p = reserve_space(xdr, 16);
*p++ = cpu_to_be32(3);
@@ -1152,8 +1189,6 @@ encode_getattr_three(struct xdr_stream *xdr,
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(bm0);
}
- hdr->nops++;
- hdr->replen += decode_getattr_maxsz;
}
static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
@@ -1162,6 +1197,17 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
bitmask[1] & nfs4_fattr_bitmap[1], hdr);
}
+static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+ const u32 *open_bitmap,
+ struct compound_hdr *hdr)
+{
+ encode_getattr_three(xdr,
+ bitmask[0] & open_bitmap[0],
+ bitmask[1] & open_bitmap[1],
+ bitmask[2] & open_bitmap[2],
+ hdr);
+}
+
static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
{
encode_getattr_three(xdr,
@@ -1179,23 +1225,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru
static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_GETFH);
- hdr->nops++;
- hdr->replen += decode_getfh_maxsz;
+ encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr);
}
static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8 + name->len);
- *p++ = cpu_to_be32(OP_LINK);
- xdr_encode_opaque(p, name->name, name->len);
- hdr->nops++;
- hdr->replen += decode_link_maxsz;
+ encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr);
+ encode_string(xdr, name->len, name->name);
}
static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -1232,79 +1268,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
{
__be32 *p;
- p = reserve_space(xdr, 32);
- *p++ = cpu_to_be32(OP_LOCK);
+ encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr);
+ p = reserve_space(xdr, 28);
*p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
*p++ = cpu_to_be32(args->reclaim);
p = xdr_encode_hyper(p, args->fl->fl_start);
p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
*p = cpu_to_be32(args->new_lock_owner);
if (args->new_lock_owner){
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
- *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+ encode_nfs4_seqid(xdr, args->open_seqid);
+ encode_nfs4_stateid(xdr, args->open_stateid);
+ encode_nfs4_seqid(xdr, args->lock_seqid);
encode_lockowner(xdr, &args->lock_owner);
}
else {
- p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
- p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
- *p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ encode_nfs4_stateid(xdr, args->lock_stateid);
+ encode_nfs4_seqid(xdr, args->lock_seqid);
}
- hdr->nops++;
- hdr->replen += decode_lock_maxsz;
}
static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 24);
- *p++ = cpu_to_be32(OP_LOCKT);
+ encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr);
+ p = reserve_space(xdr, 20);
*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
p = xdr_encode_hyper(p, args->fl->fl_start);
p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
encode_lockowner(xdr, &args->lock_owner);
- hdr->nops++;
- hdr->replen += decode_lockt_maxsz;
}
static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
- *p++ = cpu_to_be32(OP_LOCKU);
- *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
- *p++ = cpu_to_be32(args->seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr);
+ encode_uint32(xdr, nfs4_lock_type(args->fl, 0));
+ encode_nfs4_seqid(xdr, args->seqid);
+ encode_nfs4_stateid(xdr, args->stateid);
+ p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->fl->fl_start);
xdr_encode_hyper(p, nfs4_lock_length(args->fl));
- hdr->nops++;
- hdr->replen += decode_locku_maxsz;
}
static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
+ encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr);
encode_lockowner(xdr, lowner);
- hdr->nops++;
- hdr->replen += decode_release_lockowner_maxsz;
}
static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
{
- int len = name->len;
- __be32 *p;
-
- p = reserve_space(xdr, 8 + len);
- *p++ = cpu_to_be32(OP_LOOKUP);
- xdr_encode_opaque(p, name->name, len);
- hdr->nops++;
- hdr->replen += decode_lookup_maxsz;
+ encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr);
+ encode_string(xdr, name->len, name->name);
}
static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
@@ -1335,16 +1352,15 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
* opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
* owner 4 = 32
*/
- p = reserve_space(xdr, 8);
- *p++ = cpu_to_be32(OP_OPEN);
- *p = cpu_to_be32(arg->seqid->sequence->counter);
+ encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->fmode);
- p = reserve_space(xdr, 32);
+ p = reserve_space(xdr, 36);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(20);
+ *p++ = cpu_to_be32(24);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
*p++ = cpu_to_be32(arg->server->s_dev);
- xdr_encode_hyper(p, arg->id);
+ *p++ = cpu_to_be32(arg->id.uniquifier);
+ xdr_encode_hyper(p, arg->id.create_time);
}
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1437,14 +1453,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
{
__be32 *p;
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+ encode_nfs4_stateid(xdr, stateid);
encode_string(xdr, name->len, name->name);
}
static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
{
+ encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
encode_openhdr(xdr, arg);
encode_opentype(xdr, arg);
switch (arg->claim) {
@@ -1460,88 +1477,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
default:
BUG();
}
- hdr->nops++;
- hdr->replen += decode_open_maxsz;
}
static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
- *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
- *p = cpu_to_be32(arg->seqid->sequence->counter);
- hdr->nops++;
- hdr->replen += decode_open_confirm_maxsz;
+ encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr);
+ encode_nfs4_stateid(xdr, arg->stateid);
+ encode_nfs4_seqid(xdr, arg->seqid);
}
static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
- *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
- *p = cpu_to_be32(arg->seqid->sequence->counter);
+ encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
+ encode_nfs4_stateid(xdr, arg->stateid);
+ encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->fmode);
- hdr->nops++;
- hdr->replen += decode_open_downgrade_maxsz;
}
static void
encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
{
- int len = fh->size;
- __be32 *p;
-
- p = reserve_space(xdr, 8 + len);
- *p++ = cpu_to_be32(OP_PUTFH);
- xdr_encode_opaque(p, fh->data, len);
- hdr->nops++;
- hdr->replen += decode_putfh_maxsz;
+ encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr);
+ encode_string(xdr, fh->size, fh->data);
}
static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_PUTROOTFH);
- hdr->nops++;
- hdr->replen += decode_putrootfh_maxsz;
+ encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
}
-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
+static void encode_open_stateid(struct xdr_stream *xdr,
+ const struct nfs_open_context *ctx,
+ const struct nfs_lock_context *l_ctx,
+ fmode_t fmode,
+ int zero_seqid)
{
nfs4_stateid stateid;
- __be32 *p;
- p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
- nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
+ nfs4_select_rw_stateid(&stateid, ctx->state,
+ fmode, l_ctx->lockowner, l_ctx->pid);
if (zero_seqid)
- stateid.stateid.seqid = 0;
- xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
+ stateid.seqid = 0;
+ encode_nfs4_stateid(xdr, &stateid);
} else
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ encode_nfs4_stateid(xdr, &zero_stateid);
}
static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_READ);
-
- encode_stateid(xdr, args->context, args->lock_context,
- hdr->minorversion);
+ encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
+ encode_open_stateid(xdr, args->context, args->lock_context,
+ FMODE_READ, hdr->minorversion);
p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
*p = cpu_to_be32(args->count);
- hdr->nops++;
- hdr->replen += decode_read_maxsz;
}
static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1551,7 +1544,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
FATTR4_WORD1_MOUNTED_ON_FILEID,
};
uint32_t dircount = readdir->count >> 1;
- __be32 *p;
+ __be32 *p, verf[2];
if (readdir->plus) {
attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1566,80 +1559,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
attrs[0] |= FATTR4_WORD0_FILEID;
- p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
- *p++ = cpu_to_be32(OP_READDIR);
- p = xdr_encode_hyper(p, readdir->cookie);
- p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
+ encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
+ encode_uint64(xdr, readdir->cookie);
+ encode_nfs4_verifier(xdr, &readdir->verifier);
+ p = reserve_space(xdr, 20);
*p++ = cpu_to_be32(dircount);
*p++ = cpu_to_be32(readdir->count);
*p++ = cpu_to_be32(2);
*p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
*p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
- hdr->nops++;
- hdr->replen += decode_readdir_maxsz;
+ memcpy(verf, readdir->verifier.data, sizeof(verf));
dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
__func__,
(unsigned long long)readdir->cookie,
- ((u32 *)readdir->verifier.data)[0],
- ((u32 *)readdir->verifier.data)[1],
+ verf[0], verf[1],
attrs[0] & readdir->bitmask[0],
attrs[1] & readdir->bitmask[1]);
}
static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_READLINK);
- hdr->nops++;
- hdr->replen += decode_readlink_maxsz;
+ encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr);
}
static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8 + name->len);
- *p++ = cpu_to_be32(OP_REMOVE);
- xdr_encode_opaque(p, name->name, name->len);
- hdr->nops++;
- hdr->replen += decode_remove_maxsz;
+ encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr);
+ encode_string(xdr, name->len, name->name);
}
static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_RENAME);
+ encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr);
encode_string(xdr, oldname->len, oldname->name);
encode_string(xdr, newname->len, newname->name);
- hdr->nops++;
- hdr->replen += decode_rename_maxsz;
}
-static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
+static void encode_renew(struct xdr_stream *xdr, clientid4 clid,
+ struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 12);
- *p++ = cpu_to_be32(OP_RENEW);
- xdr_encode_hyper(p, client_stateid->cl_clientid);
- hdr->nops++;
- hdr->replen += decode_renew_maxsz;
+ encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr);
+ encode_uint64(xdr, clid);
}
static void
encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_RESTOREFH);
- hdr->nops++;
- hdr->replen += decode_restorefh_maxsz;
+ encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr);
}
static void
@@ -1647,9 +1614,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
{
__be32 *p;
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &zero_stateid);
p = reserve_space(xdr, 2*4);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1657,30 +1623,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
p = reserve_space(xdr, 4);
*p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
- hdr->nops++;
- hdr->replen += decode_setacl_maxsz;
}
static void
encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_SAVEFH);
- hdr->nops++;
- hdr->replen += decode_savefh_maxsz;
+ encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr);
}
static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
- hdr->nops++;
- hdr->replen += decode_setattr_maxsz;
+ encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &arg->stateid);
encode_attrs(xdr, arg->iap, server);
}
@@ -1688,9 +1642,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
{
__be32 *p;
- p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
- *p++ = cpu_to_be32(OP_SETCLIENTID);
- xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
+ encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr);
+ encode_nfs4_verifier(xdr, setclientid->sc_verifier);
encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
p = reserve_space(xdr, 4);
@@ -1699,31 +1652,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(setclientid->sc_cb_ident);
- hdr->nops++;
- hdr->replen += decode_setclientid_maxsz;
}
static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
- *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
- p = xdr_encode_hyper(p, arg->clientid);
- xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
- hdr->nops++;
- hdr->replen += decode_setclientid_confirm_maxsz;
+ encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM,
+ decode_setclientid_confirm_maxsz, hdr);
+ encode_uint64(xdr, arg->clientid);
+ encode_nfs4_verifier(xdr, &arg->confirm);
}
static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
{
__be32 *p;
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(OP_WRITE);
-
- encode_stateid(xdr, args->context, args->lock_context,
- hdr->minorversion);
+ encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
+ encode_open_stateid(xdr, args->context, args->lock_context,
+ FMODE_WRITE, hdr->minorversion);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
@@ -1731,54 +1676,74 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
*p = cpu_to_be32(args->count);
xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
- hdr->nops++;
- hdr->replen += decode_write_maxsz;
}
static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
-
- *p++ = cpu_to_be32(OP_DELEGRETURN);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
- hdr->nops++;
- hdr->replen += decode_delegreturn_maxsz;
+ encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr);
+ encode_nfs4_stateid(xdr, stateid);
}
static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
{
- int len = name->len;
- __be32 *p;
-
- p = reserve_space(xdr, 8 + len);
- *p++ = cpu_to_be32(OP_SECINFO);
- xdr_encode_opaque(p, name->name, len);
- hdr->nops++;
- hdr->replen += decode_secinfo_maxsz;
+ encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr);
+ encode_string(xdr, name->len, name->name);
}
#if defined(CONFIG_NFS_V4_1)
/* NFSv4.1 operations */
+static void encode_bind_conn_to_session(struct xdr_stream *xdr,
+ struct nfs4_session *session,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION,
+ decode_bind_conn_to_session_maxsz, hdr);
+ encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+ p = xdr_reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(NFS4_CDFC4_BACK_OR_BOTH);
+ *p = 0; /* use_conn_in_rdma_mode = False */
+}
+
static void encode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
+ char impl_name[NFS4_OPAQUE_LIMIT];
+ int len = 0;
- p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
- *p++ = cpu_to_be32(OP_EXCHANGE_ID);
- xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
+ encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
+ encode_nfs4_verifier(xdr, args->verifier);
encode_string(xdr, args->id_len, args->id);
p = reserve_space(xdr, 12);
*p++ = cpu_to_be32(args->flags);
*p++ = cpu_to_be32(0); /* zero length state_protect4_a */
- *p = cpu_to_be32(0); /* zero length implementation id array */
- hdr->nops++;
- hdr->replen += decode_exchange_id_maxsz;
+
+ if (send_implementation_id &&
+ sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
+ sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN)
+ <= NFS4_OPAQUE_LIMIT + 1)
+ len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s",
+ utsname()->sysname, utsname()->release,
+ utsname()->version, utsname()->machine);
+
+ if (len > 0) {
+ *p = cpu_to_be32(1); /* implementation id array length=1 */
+
+ encode_string(xdr,
+ sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
+ CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN);
+ encode_string(xdr, len, impl_name);
+ /* just send zeros for nii_date - the date is in nii_name */
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, 0);
+ *p = cpu_to_be32(0);
+ } else
+ *p = cpu_to_be32(0); /* implementation id array length=0 */
}
static void encode_create_session(struct xdr_stream *xdr,
@@ -1789,6 +1754,7 @@ static void encode_create_session(struct xdr_stream *xdr,
char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
uint32_t len;
struct nfs_client *clp = args->client;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
u32 max_resp_sz_cached;
/*
@@ -1801,8 +1767,8 @@ static void encode_create_session(struct xdr_stream *xdr,
len = scnprintf(machine_name, sizeof(machine_name), "%s",
clp->cl_ipaddr);
- p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
- *p++ = cpu_to_be32(OP_CREATE_SESSION);
+ encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr);
+ p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12);
p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -1830,38 +1796,35 @@ static void encode_create_session(struct xdr_stream *xdr,
*p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
/* authsys_parms rfc1831 */
- *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
+ *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
p = xdr_encode_opaque(p, machine_name, len);
*p++ = cpu_to_be32(0); /* UID */
*p++ = cpu_to_be32(0); /* GID */
*p = cpu_to_be32(0); /* No more gids */
- hdr->nops++;
- hdr->replen += decode_create_session_maxsz;
}
static void encode_destroy_session(struct xdr_stream *xdr,
struct nfs4_session *session,
struct compound_hdr *hdr)
{
- __be32 *p;
- p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
- *p++ = cpu_to_be32(OP_DESTROY_SESSION);
- xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
- hdr->nops++;
- hdr->replen += decode_destroy_session_maxsz;
+ encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
+ encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
+}
+
+static void encode_destroy_clientid(struct xdr_stream *xdr,
+ uint64_t clientid,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_DESTROY_CLIENTID, decode_destroy_clientid_maxsz, hdr);
+ encode_uint64(xdr, clientid);
}
static void encode_reclaim_complete(struct xdr_stream *xdr,
struct nfs41_reclaim_complete_args *args,
struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8);
- *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE);
- *p++ = cpu_to_be32(args->one_fs);
- hdr->nops++;
- hdr->replen += decode_reclaim_complete_maxsz;
+ encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
+ encode_uint32(xdr, args->one_fs);
}
#endif /* CONFIG_NFS_V4_1 */
@@ -1883,8 +1846,7 @@ static void encode_sequence(struct xdr_stream *xdr,
WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
slot = tp->slots + args->sa_slotid;
- p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
- *p++ = cpu_to_be32(OP_SEQUENCE);
+ encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
/*
* Sessionid + seqid + slotid + max slotid + cache_this
@@ -1898,13 +1860,12 @@ static void encode_sequence(struct xdr_stream *xdr,
((u32 *)session->sess_id.data)[3],
slot->seq_nr, args->sa_slotid,
tp->highest_used_slotid, args->sa_cache_this);
+ p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
*p++ = cpu_to_be32(slot->seq_nr);
*p++ = cpu_to_be32(args->sa_slotid);
*p++ = cpu_to_be32(tp->highest_used_slotid);
*p = cpu_to_be32(args->sa_cache_this);
- hdr->nops++;
- hdr->replen += decode_sequence_maxsz;
#endif /* CONFIG_NFS_V4_1 */
}
@@ -1919,14 +1880,12 @@ encode_getdevicelist(struct xdr_stream *xdr,
.data = "dummmmmy",
};
- p = reserve_space(xdr, 20);
- *p++ = cpu_to_be32(OP_GETDEVICELIST);
+ encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
+ p = reserve_space(xdr, 16);
*p++ = cpu_to_be32(args->layoutclass);
*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
xdr_encode_hyper(p, 0ULL); /* cookie */
encode_nfs4_verifier(xdr, &dummy);
- hdr->nops++;
- hdr->replen += decode_getdevicelist_maxsz;
}
static void
@@ -1936,15 +1895,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
{
__be32 *p;
- p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
- *p++ = cpu_to_be32(OP_GETDEVICEINFO);
+ encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr);
+ p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE);
p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
NFS4_DEVICEID4_SIZE);
*p++ = cpu_to_be32(args->pdev->layout_type);
*p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
*p++ = cpu_to_be32(0); /* bitmap length 0 */
- hdr->nops++;
- hdr->replen += decode_getdeviceinfo_maxsz;
}
static void
@@ -1954,16 +1911,16 @@ encode_layoutget(struct xdr_stream *xdr,
{
__be32 *p;
- p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_LAYOUTGET);
+ encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr);
+ p = reserve_space(xdr, 36);
*p++ = cpu_to_be32(0); /* Signal layout available */
*p++ = cpu_to_be32(args->type);
*p++ = cpu_to_be32(args->range.iomode);
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
p = xdr_encode_hyper(p, args->minlength);
- p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
- *p = cpu_to_be32(args->maxcount);
+ encode_nfs4_stateid(xdr, &args->stateid);
+ encode_uint32(xdr, args->maxcount);
dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
__func__,
@@ -1972,8 +1929,6 @@ encode_layoutget(struct xdr_stream *xdr,
(unsigned long)args->range.offset,
(unsigned long)args->range.length,
args->maxcount);
- hdr->nops++;
- hdr->replen += decode_layoutget_maxsz;
}
static int
@@ -1987,13 +1942,14 @@ encode_layoutcommit(struct xdr_stream *xdr,
dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
NFS_SERVER(args->inode)->pnfs_curr_ld->id);
- p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
+ encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr);
+ p = reserve_space(xdr, 20);
/* Only whole file layouts */
p = xdr_encode_hyper(p, 0); /* offset */
p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */
- *p++ = cpu_to_be32(0); /* reclaim */
- p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
+ *p = cpu_to_be32(0); /* reclaim */
+ encode_nfs4_stateid(xdr, &args->stateid);
+ p = reserve_space(xdr, 20);
*p++ = cpu_to_be32(1); /* newoffset = TRUE */
p = xdr_encode_hyper(p, args->lastbytewritten);
*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
@@ -2002,13 +1958,9 @@ encode_layoutcommit(struct xdr_stream *xdr,
if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
NFS_I(inode)->layout, xdr, args);
- else {
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(0); /* no layout-type payload */
- }
+ else
+ encode_uint32(xdr, 0); /* no layout-type payload */
- hdr->nops++;
- hdr->replen += decode_layoutcommit_maxsz;
return 0;
}
@@ -2019,27 +1971,23 @@ encode_layoutreturn(struct xdr_stream *xdr,
{
__be32 *p;
- p = reserve_space(xdr, 20);
- *p++ = cpu_to_be32(OP_LAYOUTRETURN);
+ encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
+ p = reserve_space(xdr, 16);
*p++ = cpu_to_be32(0); /* reclaim. always 0 for now */
*p++ = cpu_to_be32(args->layout_type);
*p++ = cpu_to_be32(IOMODE_ANY);
*p = cpu_to_be32(RETURN_FILE);
- p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
+ p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, 0);
p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
spin_lock(&args->inode->i_lock);
- xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+ encode_nfs4_stateid(xdr, &args->stateid);
spin_unlock(&args->inode->i_lock);
if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
NFS_I(args->inode)->layout, xdr, args);
- } else {
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(0);
- }
- hdr->nops++;
- hdr->replen += decode_layoutreturn_maxsz;
+ } else
+ encode_uint32(xdr, 0);
}
static int
@@ -2047,12 +1995,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
const struct nfs41_secinfo_no_name_args *args,
struct compound_hdr *hdr)
{
- __be32 *p;
- p = reserve_space(xdr, 8);
- *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
- *p++ = cpu_to_be32(args->style);
- hdr->nops++;
- hdr->replen += decode_secinfo_no_name_maxsz;
+ encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr);
+ encode_uint32(xdr, args->style);
return 0;
}
@@ -2060,26 +2004,17 @@ static void encode_test_stateid(struct xdr_stream *xdr,
struct nfs41_test_stateid_args *args,
struct compound_hdr *hdr)
{
- __be32 *p;
-
- p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_TEST_STATEID);
- *p++ = cpu_to_be32(1);
- xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
- hdr->nops++;
- hdr->replen += decode_test_stateid_maxsz;
+ encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
+ encode_uint32(xdr, 1);
+ encode_nfs4_stateid(xdr, args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
struct nfs41_free_stateid_args *args,
struct compound_hdr *hdr)
{
- __be32 *p;
- p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
- *p++ = cpu_to_be32(OP_FREE_STATEID);
- xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
- hdr->nops++;
- hdr->replen += decode_free_stateid_maxsz;
+ encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
+ encode_nfs4_stateid(xdr, args->stateid);
}
#endif /* CONFIG_NFS_V4_1 */
@@ -2166,7 +2101,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_remove(xdr, &args->name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2186,9 +2120,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->new_dir, &hdr);
encode_rename(xdr, args->old_name, args->new_name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2208,7 +2139,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dir_fh, &hdr);
encode_link(xdr, args->name, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_restorefh(xdr, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
@@ -2227,12 +2157,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->dir_fh, &hdr);
- encode_savefh(xdr, &hdr);
encode_create(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2293,12 +2220,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- encode_savefh(xdr, &hdr);
encode_open(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
- encode_restorefh(xdr, &hdr);
- encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
encode_nops(&hdr);
}
@@ -2517,11 +2441,12 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
+ replen = hdr.replen + op_decode_hdr_maxsz + 1;
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
args->acl_pages, args->acl_pgbase, args->acl_len);
+
encode_nops(&hdr);
}
@@ -2549,7 +2474,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
* a COMMIT request
*/
static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_commitargs *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2559,8 +2484,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_commit(xdr, args, &hdr);
- if (args->bitmask)
- encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -2632,6 +2555,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fhandle, &hdr);
encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+ FATTR4_WORD0_FH_EXPIRE_TYPE|
FATTR4_WORD0_LINK_SUPPORT|
FATTR4_WORD0_SYMLINK_SUPPORT|
FATTR4_WORD0_ACLSUPPORT, &hdr);
@@ -2649,7 +2573,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
};
encode_compound_hdr(xdr, req, &hdr);
- encode_renew(xdr, clp, &hdr);
+ encode_renew(xdr, clp->cl_clientid, &hdr);
encode_nops(&hdr);
}
@@ -2702,8 +2626,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fhandle, &hdr);
- encode_delegreturn(xdr, args->stateid, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_delegreturn(xdr, args->stateid, &hdr);
encode_nops(&hdr);
}
@@ -2751,6 +2675,22 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
#if defined(CONFIG_NFS_V4_1)
/*
+ * BIND_CONN_TO_SESSION request
+ */
+static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_client *clp)
+{
+ struct compound_hdr hdr = {
+ .minorversion = clp->cl_mvops->minor_version,
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_bind_conn_to_session(xdr, clp->cl_session, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* EXCHANGE_ID request
*/
static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
@@ -2799,6 +2739,22 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
}
/*
+ * a DESTROY_CLIENTID request
+ */
+static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs_client *clp)
+{
+ struct compound_hdr hdr = {
+ .minorversion = clp->cl_mvops->minor_version,
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_destroy_clientid(xdr, clp->cl_clientid, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* a SEQUENCE request
*/
static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -3179,6 +3135,28 @@ out_overflow:
return -EIO;
}
+static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
+ uint32_t *bitmap, uint32_t *type)
+{
+ __be32 *p;
+
+ *type = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ *type = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
+ }
+ dprintk("%s: expire type=0x%x\n", __func__, *type);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
{
__be32 *p;
@@ -3512,16 +3490,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
n = be32_to_cpup(p);
if (n == 0)
goto root_path;
- dprintk("path ");
+ dprintk("pathname4: ");
path->ncomponents = 0;
while (path->ncomponents < n) {
struct nfs4_string *component = &path->components[path->ncomponents];
status = decode_opaque_inline(xdr, &component->len, &component->data);
if (unlikely(status != 0))
goto out_eio;
- if (path->ncomponents != n)
- dprintk("/");
- dprintk("%s", component->data);
+ ifdebug (XDR)
+ pr_cont("%s%.*s ",
+ (path->ncomponents != n ? "/ " : ""),
+ component->len, component->data);
if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
path->ncomponents++;
else {
@@ -3530,14 +3509,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
}
}
out:
- dprintk("\n");
return status;
root_path:
/* a root pathname is sent as a zero component4 */
path->ncomponents = 1;
path->components[0].len=0;
path->components[0].data=NULL;
- dprintk("path /\n");
+ dprintk("pathname4: /\n");
goto out;
out_eio:
dprintk(" status %d", status);
@@ -3559,7 +3537,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
status = 0;
if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
goto out;
- dprintk("%s: fsroot ", __func__);
+ status = -EIO;
+ /* Ignore borken servers that return unrequested attrs */
+ if (unlikely(res == NULL))
+ goto out;
+ dprintk("%s: fsroot:\n", __func__);
status = decode_pathname(xdr, &res->fs_path);
if (unlikely(status != 0))
goto out;
@@ -3580,7 +3562,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
m = be32_to_cpup(p);
loc->nservers = 0;
- dprintk("%s: servers ", __func__);
+ dprintk("%s: servers:\n", __func__);
while (loc->nservers < m) {
struct nfs4_string *server = &loc->servers[loc->nservers];
status = decode_opaque_inline(xdr, &server->len, &server->data);
@@ -3612,7 +3594,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
res->nlocations++;
}
if (res->nlocations != 0)
- status = NFS_ATTR_FATTR_V4_REFERRAL;
+ status = NFS_ATTR_FATTR_V4_LOCATIONS;
out:
dprintk("%s: fs_locations done, error = %d\n", __func__, status);
return status;
@@ -3790,7 +3772,8 @@ out_overflow:
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
- const struct nfs_server *server, uint32_t *uid, int may_sleep)
+ const struct nfs_server *server, uint32_t *uid,
+ struct nfs4_string *owner_name)
{
uint32_t len;
__be32 *p;
@@ -3807,8 +3790,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
goto out_overflow;
- if (!may_sleep) {
- /* do nothing */
+ if (owner_name != NULL) {
+ owner_name->data = kmemdup(p, len, GFP_NOWAIT);
+ if (owner_name->data != NULL) {
+ owner_name->len = len;
+ ret = NFS_ATTR_FATTR_OWNER_NAME;
+ }
} else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
ret = NFS_ATTR_FATTR_OWNER;
@@ -3828,7 +3815,8 @@ out_overflow:
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
- const struct nfs_server *server, uint32_t *gid, int may_sleep)
+ const struct nfs_server *server, uint32_t *gid,
+ struct nfs4_string *group_name)
{
uint32_t len;
__be32 *p;
@@ -3845,8 +3833,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
goto out_overflow;
- if (!may_sleep) {
- /* do nothing */
+ if (group_name != NULL) {
+ group_name->data = kmemdup(p, len, GFP_NOWAIT);
+ if (group_name->data != NULL) {
+ group_name->len = len;
+ ret = NFS_ATTR_FATTR_GROUP_NAME;
+ }
} else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
ret = NFS_ATTR_FATTR_GROUP;
@@ -4146,7 +4138,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
- return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+ return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE);
}
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -4163,10 +4155,10 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
static int decode_verifier(struct xdr_stream *xdr, void *verifier)
{
- return decode_opaque_fixed(xdr, verifier, 8);
+ return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
}
-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
{
int status;
@@ -4213,6 +4205,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
goto xdr_error;
if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_fh_expire_type(xdr, bitmap,
+ &res->fh_expire_type)) != 0)
+ goto xdr_error;
if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
goto xdr_error;
if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
@@ -4281,9 +4276,118 @@ xdr_error:
return status;
}
+static int decode_threshold_hint(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ uint64_t *res,
+ uint32_t hint_bit)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (likely(bitmap[0] & hint_bit)) {
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ xdr_decode_hyper(p, res);
+ }
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_first_threshold_item4(struct xdr_stream *xdr,
+ struct nfs4_threshold *res)
+{
+ __be32 *p, *savep;
+ uint32_t bitmap[3] = {0,}, attrlen;
+ int status;
+
+ /* layout type */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p)) {
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+ }
+ res->l_type = be32_to_cpup(p);
+
+ /* thi_hintset bitmap */
+ status = decode_attr_bitmap(xdr, bitmap);
+ if (status < 0)
+ goto xdr_error;
+
+ /* thi_hintlist length */
+ status = decode_attr_length(xdr, &attrlen, &savep);
+ if (status < 0)
+ goto xdr_error;
+ /* thi_hintlist */
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_sz, THRESHOLD_RD);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_sz, THRESHOLD_WR);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->rd_io_sz,
+ THRESHOLD_RD_IO);
+ if (status < 0)
+ goto xdr_error;
+ status = decode_threshold_hint(xdr, bitmap, &res->wr_io_sz,
+ THRESHOLD_WR_IO);
+ if (status < 0)
+ goto xdr_error;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+ res->bm = bitmap[0];
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, res->bm, res->rd_sz, res->wr_sz, res->rd_io_sz,
+ res->wr_io_sz);
+xdr_error:
+ dprintk("%s ret=%d!\n", __func__, status);
+ return status;
+}
+
+/*
+ * Thresholds on pNFS direct I/O vrs MDS I/O
+ */
+static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ struct nfs4_threshold *res)
+{
+ __be32 *p;
+ int status = 0;
+ uint32_t num;
+
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
+ return -EIO;
+ if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ /* Did the server return an unrequested attribute? */
+ if (unlikely(res == NULL))
+ return -EREMOTEIO;
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ num = be32_to_cpup(p);
+ if (num == 0)
+ return 0;
+ if (num > 1)
+ printk(KERN_INFO "%s: Warning: Multiple pNFS layout "
+ "drivers per filesystem not supported\n",
+ __func__);
+
+ status = decode_first_threshold_item4(xdr, res);
+ bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;
+ }
+ return status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_fattr *fattr, struct nfs_fh *fh,
- const struct nfs_server *server, int may_sleep)
+ struct nfs4_fs_locations *fs_loc,
+ const struct nfs_server *server)
{
int status;
umode_t fmode = 0;
@@ -4318,8 +4422,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
status = decode_attr_error(xdr, bitmap, &err);
if (status < 0)
goto xdr_error;
- if (err == -NFS4ERR_WRONGSEC)
- nfs_fixup_secinfo_attributes(fattr, fh);
status = decode_attr_filehandle(xdr, bitmap, fh);
if (status < 0)
@@ -4330,9 +4432,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
- struct nfs4_fs_locations,
- fattr));
+ status = decode_attr_fs_locations(xdr, bitmap, fs_loc);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
@@ -4350,12 +4450,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
+ status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
- status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
+ status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
@@ -4390,13 +4490,18 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
+ status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
+ if (status < 0)
+ goto xdr_error;
+
xdr_error:
dprintk("%s: xdr returned %d\n", __func__, -status);
return status;
}
static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
- struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
+ struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
+ const struct nfs_server *server)
{
__be32 *savep;
uint32_t attrlen,
@@ -4415,7 +4520,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
if (status < 0)
goto xdr_error;
- status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
+ status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server);
if (status < 0)
goto xdr_error;
@@ -4426,9 +4531,9 @@ xdr_error:
}
static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
- const struct nfs_server *server, int may_sleep)
+ const struct nfs_server *server)
{
- return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
+ return decode_getfattr_generic(xdr, fattr, NULL, NULL, server);
}
/*
@@ -4452,8 +4557,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
return 0;
}
if (num > 1)
- printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
- "per filesystem not supported\n", __func__);
+ printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
+ "drivers per filesystem not supported\n", __func__);
/* Decode and set first layout type, move xdr->p past unused types */
p = xdr_inline_decode(xdr, num * 4);
@@ -4852,17 +4957,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
size_t hdrlen;
u32 recvd, pglen = rcvbuf->page_len;
int status;
+ __be32 verf[2];
status = decode_op_hdr(xdr, OP_READDIR);
if (!status)
status = decode_verifier(xdr, readdir->verifier.data);
if (unlikely(status))
return status;
+ memcpy(verf, readdir->verifier.data, sizeof(verf));
dprintk("%s: verifier = %08x:%08x\n",
- __func__,
- ((u32 *)readdir->verifier.data)[0],
- ((u32 *)readdir->verifier.data)[1]);
-
+ __func__, verf[0], verf[1]);
hdrlen = (char *) xdr->p - (char *) iov->iov_base;
recvd = rcvbuf->len - hdrlen;
@@ -4957,17 +5061,26 @@ decode_restorefh(struct xdr_stream *xdr)
}
static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
- size_t *acl_len)
+ struct nfs_getaclres *res)
{
- __be32 *savep;
+ __be32 *savep, *bm_p;
uint32_t attrlen,
bitmap[3] = {0};
struct kvec *iov = req->rq_rcv_buf.head;
int status;
+ size_t page_len = xdr->buf->page_len;
- *acl_len = 0;
+ res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
goto out;
+
+ bm_p = xdr->p;
+ res->acl_data_offset = be32_to_cpup(bm_p) + 2;
+ res->acl_data_offset <<= 2;
+ /* Check if the acl data starts beyond the allocated buffer */
+ if (res->acl_data_offset > page_len)
+ return -ERANGE;
+
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4977,20 +5090,28 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
size_t hdrlen;
- u32 recvd;
+
+ /* The bitmap (xdr len + bitmaps) and the attr xdr len words
+ * are stored with the acl data to handle the problem of
+ * variable length bitmaps.*/
+ xdr->p = bm_p;
/* We ignore &savep and don't do consistency checks on
* the attr length. Let userspace figure it out.... */
hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (attrlen > recvd) {
- dprintk("NFS: server cheating in getattr"
- " acl reply: attrlen %u > recvd %u\n",
- attrlen, recvd);
+ attrlen += res->acl_data_offset;
+ if (attrlen > page_len) {
+ if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
+ /* getxattr interface called with a NULL buf */
+ res->acl_len = attrlen;
+ goto out;
+ }
+ dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
+ attrlen, page_len);
return -EINVAL;
}
xdr_read_pages(xdr, attrlen);
- *acl_len = attrlen;
+ res->acl_len = attrlen;
} else
status = -EOPNOTSUPP;
@@ -5096,7 +5217,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
goto out_overflow;
res->count = be32_to_cpup(p++);
res->verf->committed = be32_to_cpup(p++);
- memcpy(res->verf->verifier, p, 8);
+ memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -5139,16 +5260,13 @@ out_err:
return -EINVAL;
}
-static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
{
struct nfs4_secinfo_flavor *sec_flavor;
int status;
__be32 *p;
int i, num_flavors;
- status = decode_op_hdr(xdr, OP_SECINFO);
- if (status)
- goto out;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
@@ -5174,6 +5292,7 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
res->flavors->num_flavors++;
}
+ status = 0;
out:
return status;
out_overflow:
@@ -5181,7 +5300,23 @@ out_overflow:
return -EIO;
}
+static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+{
+ int status = decode_op_hdr(xdr, OP_SECINFO);
+ if (status)
+ return status;
+ return decode_secinfo_common(xdr, res);
+}
+
#if defined(CONFIG_NFS_V4_1)
+static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+{
+ int status = decode_op_hdr(xdr, OP_SECINFO_NO_NAME);
+ if (status)
+ return status;
+ return decode_secinfo_common(xdr, res);
+}
+
static int decode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_res *res)
{
@@ -5189,7 +5324,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
uint32_t dummy;
char *dummy_str;
int status;
- struct nfs_client *clp = res->client;
+ uint32_t impl_id_count;
status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
if (status)
@@ -5198,44 +5333,74 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_clientid);
+ xdr_decode_hyper(p, &res->clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
- clp->cl_seqid = be32_to_cpup(p++);
- clp->cl_exchange_flags = be32_to_cpup(p++);
+ res->seqid = be32_to_cpup(p++);
+ res->flags = be32_to_cpup(p++);
/* We ask for SP4_NONE */
dummy = be32_to_cpup(p);
if (dummy != SP4_NONE)
return -EIO;
- /* Throw away minor_id */
+ /* server_owner4.so_minor_id */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
+ p = xdr_decode_hyper(p, &res->server_owner->minor_id);
- /* Throw away Major id */
+ /* server_owner4.so_major_id */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(res->server_owner->major_id, dummy_str, dummy);
+ res->server_owner->major_id_sz = dummy;
- /* Save server_scope */
+ /* server_scope4 */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
-
if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
return -EIO;
-
memcpy(res->server_scope->server_scope, dummy_str, dummy);
res->server_scope->server_scope_sz = dummy;
- /* Throw away Implementation id array */
- status = decode_opaque_inline(xdr, &dummy, &dummy_str);
- if (unlikely(status))
- return status;
+ /* Implementation Id */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ impl_id_count = be32_to_cpup(p++);
+ if (impl_id_count) {
+ /* nii_domain */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(res->impl_id->domain, dummy_str, dummy);
+
+ /* nii_name */
+ status = decode_opaque_inline(xdr, &dummy, &dummy_str);
+ if (unlikely(status))
+ return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+ memcpy(res->impl_id->name, dummy_str, dummy);
+
+ /* nii_date */
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
+ res->impl_id->date.nseconds = be32_to_cpup(p);
+
+ /* if there's more than one entry, ignore the rest */
+ }
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -5261,8 +5426,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
attrs->max_reqs = be32_to_cpup(p++);
nr_attrs = be32_to_cpup(p);
if (unlikely(nr_attrs > 1)) {
- printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
- __func__, nr_attrs);
+ printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs "
+ "count %u\n", __func__, nr_attrs);
return -EINVAL;
}
if (nr_attrs == 1) {
@@ -5281,6 +5446,37 @@ static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
}
+static int decode_bind_conn_to_session(struct xdr_stream *xdr,
+ struct nfs41_bind_conn_to_session_res *res)
+{
+ __be32 *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_BIND_CONN_TO_SESSION);
+ if (!status)
+ status = decode_sessionid(xdr, &res->session->sess_id);
+ if (unlikely(status))
+ return status;
+
+ /* dir flags, rdma mode bool */
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ res->dir = be32_to_cpup(p++);
+ if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
+ return -EIO;
+ if (be32_to_cpup(p) == 0)
+ res->use_conn_in_rdma_mode = false;
+ else
+ res->use_conn_in_rdma_mode = true;
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_create_session(struct xdr_stream *xdr,
struct nfs41_create_session_res *res)
{
@@ -5317,6 +5513,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
return decode_op_hdr(xdr, OP_DESTROY_SESSION);
}
+static int decode_destroy_clientid(struct xdr_stream *xdr, void *dummy)
+{
+ return decode_op_hdr(xdr, OP_DESTROY_CLIENTID);
+}
+
static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy)
{
return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE);
@@ -5412,14 +5613,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
p += 2;
/* Read verifier */
- p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
+ p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE);
res->num_devs = be32_to_cpup(p);
dprintk("%s: num_dev %d\n", __func__, res->num_devs);
if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
- printk(KERN_ERR "%s too many result dev_num %u\n",
+ printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
__func__, res->num_devs);
return -EIO;
}
@@ -5513,11 +5714,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
status = decode_op_hdr(xdr, OP_LAYOUTGET);
if (status)
return status;
- p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->return_on_close = be32_to_cpup(p);
+ decode_stateid(xdr, &res->stateid);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- res->return_on_close = be32_to_cpup(p++);
- p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
layout_count = be32_to_cpup(p);
if (!layout_count) {
dprintk("%s: server responded with empty layout array\n",
@@ -5642,7 +5846,8 @@ static int decode_test_stateid(struct xdr_stream *xdr,
if (unlikely(!p))
goto out_overflow;
res->status = be32_to_cpup(p++);
- return res->status;
+
+ return status;
out_overflow:
print_overflow_msg(__func__, xdr);
out:
@@ -5696,8 +5901,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
status = decode_open_downgrade(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5723,8 +5927,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_access(xdr, res);
if (status != 0)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5753,8 +5956,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
- status = decode_getfattr(xdr, res->fattr, res->server
- ,!RPC_IS_ASYNC(rqstp->rq_task));
+ status = decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5780,8 +5982,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
goto out;
status = decode_getfh(xdr, res->fh);
if (status == 0)
- status = decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ status = decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5805,10 +6006,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_remove(xdr, &res->cinfo);
- if (status)
- goto out;
- decode_getfattr(xdr, res->dir_attr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -5838,17 +6035,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
- if (status)
- goto out;
- /* Current FH is target directory */
- if (decode_getfattr(xdr, res->new_fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
- goto out;
- status = decode_restorefh(xdr);
- if (status)
- goto out;
- decode_getfattr(xdr, res->old_fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -5884,14 +6070,10 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
* Note order: OP_LINK leaves the directory as the current
* filehandle.
*/
- if (decode_getfattr(xdr, res->dir_attr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
- goto out;
status = decode_restorefh(xdr);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5914,23 +6096,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(xdr);
- if (status)
- goto out;
status = decode_create(xdr, &res->dir_cinfo);
if (status)
goto out;
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
- if (decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
- goto out;
- status = decode_restorefh(xdr);
- if (status)
- goto out;
- decode_getfattr(xdr, res->dir_fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -5962,8 +6134,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ status = decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6019,6 +6190,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct compound_hdr hdr;
int status;
+ if (res->acl_scratch != NULL) {
+ void *p = page_address(res->acl_scratch);
+ xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+ }
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
@@ -6028,7 +6203,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getacl(xdr, rqstp, &res->acl_len);
+ status = decode_getacl(xdr, rqstp, res);
out:
return status;
@@ -6061,8 +6236,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
* an ESTALE error. Shouldn't be a problem,
* though, since fattr->valid will remain unset.
*/
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6085,21 +6259,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_savefh(xdr);
- if (status)
- goto out;
status = decode_open(xdr, res);
if (status)
goto out;
if (decode_getfh(xdr, &res->fh) != 0)
goto out;
- if (decode_getfattr(xdr, res->f_attr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
- goto out;
- if (decode_restorefh(xdr) != 0)
- goto out;
- decode_getfattr(xdr, res->dir_attr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->f_attr, res->server);
out:
return status;
}
@@ -6147,8 +6312,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
status = decode_open(xdr, res);
if (status)
goto out;
- decode_getfattr(xdr, res->f_attr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->f_attr, res->server);
out:
return status;
}
@@ -6175,8 +6339,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
status = decode_setattr(xdr);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6356,8 +6519,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
if (res->fattr)
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
if (!status)
status = res->count;
out:
@@ -6368,7 +6530,7 @@ out:
* Decode COMMIT response
*/
static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_commitres *res)
{
struct compound_hdr hdr;
int status;
@@ -6383,11 +6545,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
if (status)
goto out;
status = decode_commit(xdr, res);
- if (status)
- goto out;
- if (res->fattr)
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
out:
return status;
}
@@ -6543,11 +6700,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_delegreturn(xdr);
+ status = decode_getfattr(xdr, res->fattr, res->server);
if (status != 0)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ status = decode_delegreturn(xdr);
out:
return status;
}
@@ -6575,9 +6731,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
if (status)
goto out;
xdr_enter_page(xdr, PAGE_SIZE);
- status = decode_getfattr(xdr, &res->fs_locations->fattr,
- res->fs_locations->server,
- !RPC_IS_ASYNC(req->rq_task));
+ status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
+ NULL, res->fs_locations,
+ res->fs_locations->server);
out:
return status;
}
@@ -6608,6 +6764,22 @@ out:
#if defined(CONFIG_NFS_V4_1)
/*
+ * Decode BIND_CONN_TO_SESSION response
+ */
+static int nfs4_xdr_dec_bind_conn_to_session(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (!status)
+ status = decode_bind_conn_to_session(xdr, res);
+ return status;
+}
+
+/*
* Decode EXCHANGE_ID response
*/
static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
@@ -6656,6 +6828,22 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
}
/*
+ * Decode DESTROY_CLIENTID response
+ */
+static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (!status)
+ status = decode_destroy_clientid(xdr, res);
+ return status;
+}
+
+/*
* Decode SEQUENCE response
*/
static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
@@ -6826,8 +7014,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
status = decode_layoutcommit(xdr, rqstp, res);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
@@ -6851,7 +7038,7 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
status = decode_putrootfh(xdr);
if (status)
goto out;
- status = decode_secinfo(xdr, res);
+ status = decode_secinfo_no_name(xdr, res);
out:
return status;
}
@@ -6958,7 +7145,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
goto out_overflow;
if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
- entry->server, 1) < 0)
+ NULL, entry->server) < 0)
goto out_overflow;
if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
entry->ino = entry->fattr->mounted_on_fileid;
@@ -7103,10 +7290,13 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
+ PROC(BIND_CONN_TO_SESSION,
+ enc_bind_conn_to_session, dec_bind_conn_to_session),
+ PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
#endif /* CONFIG_NFS_V4_1 */
};
-struct rpc_version nfs_version4 = {
+const struct rpc_version nfs_version4 = {
.number = 4,
.nrprocs = ARRAY_SIZE(nfs4_procedures),
.procs = nfs4_procedures
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index c4744e1d513..cd3c910d2d1 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
/* server:export path string passed to super.c */
static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
-#ifdef RPC_DEBUG
+#ifdef NFS_DEBUG
/*
* When the "nfsrootdebug" kernel command line option is specified,
* enable debugging messages for NFSROOT.
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c807ab93140..f50d3e8d6f2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
struct objio_dev_ent *ode;
struct osd_dev *od;
struct osd_dev_info odi;
+ bool retry_flag = true;
int err;
ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
@@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
goto out;
}
+retry_lookup:
od = osduld_info_lookup(&odi);
if (unlikely(IS_ERR(od))) {
err = PTR_ERR(od);
dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
+ if (err == -ENODEV && retry_flag) {
+ err = objlayout_autologin(deviceaddr);
+ if (likely(!err)) {
+ retry_flag = false;
+ goto retry_lookup;
+ }
+ }
goto out;
}
@@ -202,28 +211,39 @@ static void copy_single_comp(struct ore_components *oc, unsigned c,
memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
}
-int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
+static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
struct objio_segment **pseg)
{
- struct __alloc_objio_segment {
- struct objio_segment olseg;
- struct ore_dev *ods[numdevs];
- struct ore_comp comps[numdevs];
- } *aolseg;
-
- aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
- if (unlikely(!aolseg)) {
+/* This is the in memory structure of the objio_segment
+ *
+ * struct __alloc_objio_segment {
+ * struct objio_segment olseg;
+ * struct ore_dev *ods[numdevs];
+ * struct ore_comp comps[numdevs];
+ * } *aolseg;
+ * NOTE: The code as above compiles and runs perfectly. It is elegant,
+ * type safe and compact. At some Past time Linus has decided he does not
+ * like variable length arrays, For the sake of this principal we uglify
+ * the code as below.
+ */
+ struct objio_segment *lseg;
+ size_t lseg_size = sizeof(*lseg) +
+ numdevs * sizeof(lseg->oc.ods[0]) +
+ numdevs * sizeof(*lseg->oc.comps);
+
+ lseg = kzalloc(lseg_size, gfp_flags);
+ if (unlikely(!lseg)) {
dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
- numdevs, sizeof(*aolseg));
+ numdevs, lseg_size);
return -ENOMEM;
}
- aolseg->olseg.oc.numdevs = numdevs;
- aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
- aolseg->olseg.oc.comps = aolseg->comps;
- aolseg->olseg.oc.ods = aolseg->ods;
+ lseg->oc.numdevs = numdevs;
+ lseg->oc.single_comp = EC_MULTPLE_COMPS;
+ lseg->oc.ods = (void *)(lseg + 1);
+ lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
- *pseg = &aolseg->olseg;
+ *pseg = lseg;
return 0;
}
@@ -420,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)
int objio_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
int ret;
- ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
- rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+ hdr->lseg, rdata->args.pages, rdata->args.pgbase,
rdata->args.offset, rdata->args.count, rdata,
GFP_KERNEL, &objios);
if (unlikely(ret))
@@ -433,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
rdata->args.offset, rdata->args.count);
- return ore_read(objios->ios);
+ ret = ore_read(objios->ios);
+ if (unlikely(ret))
+ objio_free_result(&objios->oir);
+ return ret;
}
/*
@@ -463,12 +487,20 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
- struct page *page = find_get_page(wdata->inode->i_mapping, index);
+ struct page *page;
+ loff_t i_size = i_size_read(wdata->header->inode);
+ if (offset >= i_size) {
+ *uptodate = true;
+ dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+ return ZERO_PAGE(0);
+ }
+
+ page = find_get_page(mapping, index);
if (!page) {
- page = find_or_create_page(wdata->inode->i_mapping,
- index, GFP_NOFS);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s: grab_cache_page Failed index=0x%lx\n",
__func__, index);
@@ -486,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
static void __r4w_put_page(void *priv, struct page *page)
{
- dprintk("%s: index=0x%lx\n", __func__, page->index);
- page_cache_release(page);
+ dprintk("%s: index=0x%lx\n", __func__,
+ (page == ZERO_PAGE(0)) ? -1UL : page->index);
+ if (ZERO_PAGE(0) != page)
+ page_cache_release(page);
return;
}
@@ -498,11 +532,12 @@ static const struct _ore_r4w_op _r4w_op = {
int objio_write_pagelist(struct nfs_write_data *wdata, int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
int ret;
- ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
- wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+ hdr->lseg, wdata->args.pages, wdata->args.pgbase,
wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
&objios);
if (unlikely(ret))
@@ -517,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
wdata->args.offset, wdata->args.count);
ret = ore_write(objios->ios);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ objio_free_result(&objios->oir);
return ret;
+ }
if (objios->sync)
_write_done(objios->ios, objios);
@@ -551,7 +588,8 @@ static const struct nfs_pageio_ops objio_pg_write_ops = {
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
- .flags = PNFS_LAYOUTRET_ON_SETATTR,
+ .flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_LAYOUTRET_ON_ERROR,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
@@ -581,10 +619,10 @@ objlayout_init(void)
if (ret)
printk(KERN_INFO
- "%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
+ "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
__func__, ret);
else
- printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
+ printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
__func__);
return ret;
}
@@ -593,7 +631,7 @@ static void __exit
objlayout_exit(void)
{
pnfs_unregister_layoutdriver(&objlayout_type);
- printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
+ printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
__func__);
}
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 72074e3a04f..87461354530 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -37,6 +37,9 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/kmod.h>
+#include <linux/moduleparam.h>
+#include <linux/ratelimit.h>
#include <scsi/osd_initiator.h>
#include "objlayout.h"
@@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len)
return end > start ? end - 1 : NFS4_MAX_UINT64;
}
-void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
+static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
struct page ***p_pages, unsigned *p_pgbase,
u64 offset, unsigned long count)
{
@@ -254,6 +257,8 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
oir->status = rdata->task.tk_status = status;
if (status >= 0)
rdata->res.count = status;
+ else
+ rdata->header->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */
@@ -274,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
enum pnfs_try_status
objlayout_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct inode *inode = hdr->inode;
loff_t offset = rdata->args.offset;
size_t count = rdata->args.count;
int err;
loff_t eof;
- eof = i_size_read(rdata->inode);
+ eof = i_size_read(inode);
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
@@ -292,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
}
rdata->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
&rdata->args.pgbase,
rdata->args.offset, rdata->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
- __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+ __func__, inode->i_ino, offset, count, rdata->res.eof);
err = objio_read_pagelist(rdata);
out:
if (unlikely(err)) {
- rdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
@@ -334,6 +341,8 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
if (status >= 0) {
wdata->res.count = status;
wdata->verf.committed = oir->committed;
+ } else {
+ wdata->header->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
@@ -356,15 +365,16 @@ enum pnfs_try_status
objlayout_write_pagelist(struct nfs_write_data *wdata,
int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
int err;
- _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
&wdata->args.pgbase,
wdata->args.offset, wdata->args.count);
err = objio_write_pagelist(wdata, how);
if (unlikely(err)) {
- wdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
@@ -486,9 +496,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
if (!ioerr->oer_errno)
continue;
- printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
- "dev(%llx:%llx) par=0x%llx obj=0x%llx "
- "offset=0x%llx length=0x%llx\n",
+ printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
+ "is_write=%d dev(%llx:%llx) par=0x%llx "
+ "obj=0x%llx offset=0x%llx length=0x%llx\n",
__func__, i, ioerr->oer_errno,
ioerr->oer_iswrite,
_DEVID_LO(&ioerr->oer_component.oid_device_id),
@@ -597,7 +607,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
{
struct objlayout_deviceinfo *odi;
struct pnfs_device pd;
- struct super_block *sb;
struct page *page, **pages;
u32 *p;
int err;
@@ -616,7 +625,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
pd.pglen = PAGE_SIZE;
pd.mincount = 0;
- sb = pnfslay->plh_inode->i_sb;
err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
if (err)
@@ -647,3 +655,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
__free_page(odi->page);
kfree(odi);
}
+
+enum {
+ OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
+ OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
+ OSD_LOGIN_UPCALL_PATHLEN = 256
+};
+
+static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
+
+module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
+ 0600);
+MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
+
+struct __auto_login {
+ char uri[OBJLAYOUT_MAX_URI_LEN];
+ char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
+ char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
+};
+
+static int __objlayout_upcall(struct __auto_login *login)
+{
+ static char *envp[] = { "HOME=/",
+ "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL
+ };
+ char *argv[8];
+ int ret;
+
+ if (unlikely(!osd_login_prog[0])) {
+ dprintk("%s: osd_login_prog is disabled\n", __func__);
+ return -EACCES;
+ }
+
+ dprintk("%s uri: %s\n", __func__, login->uri);
+ dprintk("%s osdname %s\n", __func__, login->osdname);
+ dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
+
+ argv[0] = (char *)osd_login_prog;
+ argv[1] = "-u";
+ argv[2] = login->uri;
+ argv[3] = "-o";
+ argv[4] = login->osdname;
+ argv[5] = "-s";
+ argv[6] = login->systemid_hex;
+ argv[7] = NULL;
+
+ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+ /*
+ * Disable the upcall mechanism if we're getting an ENOENT or
+ * EACCES error. The admin can re-enable it on the fly by using
+ * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
+ * the problem has been fixed.
+ */
+ if (ret == -ENOENT || ret == -EACCES) {
+ printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
+ "objlayoutdriver.osd_login_prog kernel parameter!\n",
+ osd_login_prog);
+ osd_login_prog[0] = '\0';
+ }
+ dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
+
+ return ret;
+}
+
+/* Assume dest is all zeros */
+static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
+ char *dest, int max_len,
+ const char *var_name)
+{
+ if (!s.len)
+ return;
+
+ if (s.len >= max_len) {
+ pr_warn_ratelimited(
+ "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
+ var_name, s.len, max_len);
+ s.len = max_len - 1; /* space for null terminator */
+ }
+
+ memcpy(dest, s.data, s.len);
+}
+
+/* Assume sysid is all zeros */
+static void _sysid_2_hex(struct nfs4_string s,
+ char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
+{
+ int i;
+ char *cur;
+
+ if (!s.len)
+ return;
+
+ if (s.len != OSD_SYSTEMID_LEN) {
+ pr_warn_ratelimited(
+ "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
+ s.len);
+ if (s.len > OSD_SYSTEMID_LEN)
+ s.len = OSD_SYSTEMID_LEN;
+ }
+
+ cur = sysid;
+ for (i = 0; i < s.len; i++)
+ cur = hex_byte_pack(cur, s.data[i]);
+}
+
+int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
+{
+ int rc;
+ struct __auto_login login;
+
+ if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
+ return -ENODEV;
+
+ memset(&login, 0, sizeof(login));
+ __copy_nfsS_and_zero_terminate(
+ deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
+ login.uri, sizeof(login.uri), "URI");
+
+ __copy_nfsS_and_zero_terminate(
+ deviceaddr->oda_osdname,
+ login.osdname, sizeof(login.osdname), "OSDNAME");
+
+ _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
+
+ rc = __objlayout_upcall(&login);
+ if (rc > 0) /* script returns positive values */
+ rc = -ENODEV;
+
+ return rc;
+}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 8ec34727ed2..880ba086be9 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn(
struct xdr_stream *,
const struct nfs4_layoutreturn_args *);
+extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
+
#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5668f7c54c4..aed913c833f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/sched.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/nfs_page.h>
@@ -25,6 +26,47 @@
static struct kmem_cache *nfs_page_cachep;
+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+ p->npages = pagecount;
+ if (pagecount <= ARRAY_SIZE(p->page_array))
+ p->pagevec = p->page_array;
+ else {
+ p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ if (!p->pagevec)
+ p->npages = 0;
+ }
+ return p->pagevec != NULL;
+}
+
+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr))
+{
+ hdr->req = nfs_list_entry(desc->pg_list.next);
+ hdr->inode = desc->pg_inode;
+ hdr->cred = hdr->req->wb_context->cred;
+ hdr->io_start = req_offset(hdr->req);
+ hdr->good_bytes = desc->pg_count;
+ hdr->dreq = desc->pg_dreq;
+ hdr->release = release;
+ hdr->completion_ops = desc->pg_completion_ops;
+ if (hdr->completion_ops->init_hdr)
+ hdr->completion_ops->init_hdr(hdr);
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+{
+ spin_lock(&hdr->lock);
+ if (pos < hdr->io_start + hdr->good_bytes) {
+ set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+ hdr->good_bytes = pos - hdr->io_start;
+ hdr->error = error;
+ }
+ spin_unlock(&hdr->lock);
+}
+
static inline struct nfs_page *
nfs_page_alloc(void)
{
@@ -75,12 +117,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
- atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
- BUG_ON(PagePrivate(page));
- BUG_ON(!PageLocked(page));
- BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -103,37 +141,16 @@ void nfs_unlock_request(struct nfs_page *req)
clear_bit(PG_BUSY, &req->wb_flags);
smp_mb__after_clear_bit();
wake_up_bit(&req->wb_flags, PG_BUSY);
- nfs_release_request(req);
}
/**
- * nfs_set_page_tag_locked - Tag a request as locked
+ * nfs_unlock_and_release_request - Unlock request and release the nfs_page
* @req:
*/
-int nfs_set_page_tag_locked(struct nfs_page *req)
+void nfs_unlock_and_release_request(struct nfs_page *req)
{
- if (!nfs_lock_request_dontget(req))
- return 0;
- if (test_bit(PG_MAPPED, &req->wb_flags))
- radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
- return 1;
-}
-
-/**
- * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
- */
-void nfs_clear_page_tag_locked(struct nfs_page *req)
-{
- if (test_bit(PG_MAPPED, &req->wb_flags)) {
- struct inode *inode = req->wb_context->dentry->d_inode;
- struct nfs_inode *nfsi = NFS_I(inode);
-
- spin_lock(&inode->i_lock);
- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
- nfs_unlock_request(req);
- spin_unlock(&inode->i_lock);
- } else
- nfs_unlock_request(req);
+ nfs_unlock_request(req);
+ nfs_release_request(req);
}
/*
@@ -232,6 +249,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
+ const struct nfs_pgio_completion_ops *compl_ops,
size_t bsize,
int io_flags)
{
@@ -244,9 +262,11 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
+ desc->pg_completion_ops = compl_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
+ desc->pg_dreq = NULL;
}
/**
@@ -270,12 +290,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (req->wb_context->state != prev->wb_context->state)
return false;
- if (req->wb_index != (prev->wb_index + 1))
- return false;
if (req->wb_pgbase != 0)
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
return pgio->pg_ops->pg_test(pgio, prev, req);
}
@@ -425,67 +445,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
}
}
-#define NFS_SCAN_MAXENTRIES 16
-/**
- * nfs_scan_list - Scan a list for matching requests
- * @nfsi: NFS inode
- * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
- * @npages: idx_start + npages sets the upper bound to scan.
- * @tag: tag to scan for
- *
- * Moves elements from one of the inode request lists.
- * If the number of requests is set to 0, the entire address_space
- * starting at index idx_start, is scanned.
- * The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's i_lock when calling this function
- */
-int nfs_scan_list(struct nfs_inode *nfsi,
- struct list_head *dst, pgoff_t idx_start,
- unsigned int npages, int tag)
-{
- struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
- struct nfs_page *req;
- pgoff_t idx_end;
- int found, i;
- int res;
- struct list_head *list;
-
- res = 0;
- if (npages == 0)
- idx_end = ~0;
- else
- idx_end = idx_start + npages - 1;
-
- for (;;) {
- found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
- (void **)&pgvec[0], idx_start,
- NFS_SCAN_MAXENTRIES, tag);
- if (found <= 0)
- break;
- for (i = 0; i < found; i++) {
- req = pgvec[i];
- if (req->wb_index > idx_end)
- goto out;
- idx_start = req->wb_index + 1;
- if (nfs_set_page_tag_locked(req)) {
- kref_get(&req->wb_kref);
- radix_tree_tag_clear(&nfsi->nfs_page_tree,
- req->wb_index, tag);
- list = pnfs_choose_commit_list(req, dst);
- nfs_list_add_request(req, list);
- res++;
- if (res == INT_MAX)
- goto out;
- }
- }
- /* for latency reduction */
- cond_resched_lock(&nfsi->vfs_inode.i_lock);
- }
-out:
- return res;
-}
-
int __init nfs_init_nfspagecache(void)
{
nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8e672a2b2d6..bbc49caa7a8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -70,6 +70,10 @@ find_pnfs_driver(u32 id)
spin_lock(&pnfs_spinlock);
local = find_pnfs_driver_locked(id);
+ if (local != NULL && !try_module_get(local->owner)) {
+ dprintk("%s: Could not grab reference on module\n", __func__);
+ local = NULL;
+ }
spin_unlock(&pnfs_spinlock);
return local;
}
@@ -80,6 +84,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
if (nfss->pnfs_curr_ld) {
if (nfss->pnfs_curr_ld->clear_layoutdriver)
nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
+ /* Decrement the MDS count. Purge the deviceid cache if zero */
+ if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
+ nfs4_deviceid_purge_client(nfss->nfs_client);
module_put(nfss->pnfs_curr_ld->owner);
}
nfss->pnfs_curr_ld = NULL;
@@ -101,8 +108,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
goto out_no_driver;
if (!(server->nfs_client->cl_exchange_flags &
(EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
- printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
- id, server->nfs_client->cl_exchange_flags);
+ printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
+ __func__, id, server->nfs_client->cl_exchange_flags);
goto out_no_driver;
}
ld_type = find_pnfs_driver(id);
@@ -115,18 +122,16 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
goto out_no_driver;
}
}
- if (!try_module_get(ld_type->owner)) {
- dprintk("%s: Could not grab reference on module\n", __func__);
- goto out_no_driver;
- }
server->pnfs_curr_ld = ld_type;
if (ld_type->set_layoutdriver
&& ld_type->set_layoutdriver(server, mntfh)) {
- printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n",
- __func__, id);
+ printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
+ "driver %u.\n", __func__, id);
module_put(ld_type->owner);
goto out_no_driver;
}
+ /* Bump the MDS count */
+ atomic_inc(&server->nfs_client->cl_mds_count);
dprintk("%s: pNFS module for %u set\n", __func__, id);
return;
@@ -143,11 +148,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
struct pnfs_layoutdriver_type *tmp;
if (ld_type->id == 0) {
- printk(KERN_ERR "%s id 0 is reserved\n", __func__);
+ printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
return status;
}
if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
- printk(KERN_ERR "%s Layout driver must provide "
+ printk(KERN_ERR "NFS: %s Layout driver must provide "
"alloc_lseg and free_lseg.\n", __func__);
return status;
}
@@ -160,7 +165,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
ld_type->name);
} else {
- printk(KERN_ERR "%s Module with id %d already loaded!\n",
+ printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
__func__, ld_type->id);
}
spin_unlock(&pnfs_spinlock);
@@ -395,6 +400,9 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs)) {
+ /* Reset MDS Threshold I/O counters */
+ NFS_I(lo->plh_inode)->write_io = 0;
+ NFS_I(lo->plh_inode)->read_io = 0;
if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
put_layout_hdr_locked(lo);
return 0;
@@ -455,6 +463,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
}
+EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
/*
* Called by the state manger to remove all layouts established under an
@@ -496,12 +505,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
{
u32 oldseq, newseq;
- oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
- newseq = be32_to_cpu(new->stateid.seqid);
+ oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ newseq = be32_to_cpu(new->seqid);
if ((int)(newseq - oldseq) > 0) {
- memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
+ nfs4_stateid_copy(&lo->plh_stateid, new);
if (update_barrier) {
- u32 new_barrier = be32_to_cpu(new->stateid.seqid);
+ u32 new_barrier = be32_to_cpu(new->seqid);
if ((int)(new_barrier - lo->plh_barrier))
lo->plh_barrier = new_barrier;
@@ -525,7 +534,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
int lget)
{
if ((stateid) &&
- (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
return true;
return lo->plh_block_lgets ||
test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
@@ -549,11 +558,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
do {
seq = read_seqbegin(&open_state->seqlock);
- memcpy(dst->data, open_state->stateid.data,
- sizeof(open_state->stateid.data));
+ nfs4_stateid_copy(dst, &open_state->stateid);
} while (read_seqretry(&open_state->seqlock, seq));
} else
- memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
+ nfs4_stateid_copy(dst, &lo->plh_stateid);
spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
return status;
@@ -588,9 +596,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
/* allocate pages for xdr post processing */
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
- pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
+ pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (!pages)
goto out_err_free;
@@ -693,6 +701,7 @@ out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
+EXPORT_SYMBOL_GPL(_pnfs_return_layout);
bool pnfs_roc(struct inode *ino)
{
@@ -760,7 +769,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
}
if (!found) {
struct pnfs_layout_hdr *lo = nfsi->layout;
- u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
+ u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
/* Since close does not return a layout stateid for use as
* a barrier, we choose the worst-case barrier.
@@ -932,6 +941,81 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
/*
+ * Use mdsthreshold hints set at each OPEN to determine if I/O should go
+ * to the MDS or over pNFS
+ *
+ * The nfs_inode read_io and write_io fields are cumulative counters reset
+ * when there are no layout segments. Note that in pnfs_update_layout iomode
+ * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
+ * WRITE request.
+ *
+ * A return of true means use MDS I/O.
+ *
+ * From rfc 5661:
+ * If a file's size is smaller than the file size threshold, data accesses
+ * SHOULD be sent to the metadata server. If an I/O request has a length that
+ * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
+ * server. If both file size and I/O size are provided, the client SHOULD
+ * reach or exceed both thresholds before sending its read or write
+ * requests to the data server.
+ */
+static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
+ struct inode *ino, int iomode)
+{
+ struct nfs4_threshold *t = ctx->mdsthreshold;
+ struct nfs_inode *nfsi = NFS_I(ino);
+ loff_t fsize = i_size_read(ino);
+ bool size = false, size_set = false, io = false, io_set = false, ret = false;
+
+ if (t == NULL)
+ return ret;
+
+ dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
+ __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
+
+ switch (iomode) {
+ case IOMODE_READ:
+ if (t->bm & THRESHOLD_RD) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->rd_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_RD_IO) {
+ dprintk("%s nfsi->read_io %llu\n", __func__,
+ nfsi->read_io);
+ io_set = true;
+ if (nfsi->read_io < t->rd_io_sz)
+ io = true;
+ }
+ break;
+ case IOMODE_RW:
+ if (t->bm & THRESHOLD_WR) {
+ dprintk("%s fsize %llu\n", __func__, fsize);
+ size_set = true;
+ if (fsize < t->wr_sz)
+ size = true;
+ }
+ if (t->bm & THRESHOLD_WR_IO) {
+ dprintk("%s nfsi->write_io %llu\n", __func__,
+ nfsi->write_io);
+ io_set = true;
+ if (nfsi->write_io < t->wr_io_sz)
+ io = true;
+ }
+ break;
+ }
+ if (size_set && io_set) {
+ if (size && io)
+ ret = true;
+ } else if (size || io)
+ ret = true;
+
+ dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
+ return ret;
+}
+
+/*
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
@@ -958,6 +1042,10 @@ pnfs_update_layout(struct inode *ino,
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
+
+ if (pnfs_within_mdsthreshold(ctx, ino, iomode))
+ return NULL;
+
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
@@ -966,8 +1054,7 @@ pnfs_update_layout(struct inode *ino,
}
/* Do we even need to bother with this? */
- if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
- test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
@@ -1032,7 +1119,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode;
- struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
/* Inject layout blob into I/O device driver */
@@ -1048,8 +1134,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
spin_lock(&ino->i_lock);
- if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
- test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
dprintk("%s forget reply due to recall\n", __func__);
goto out_forget_reply;
}
@@ -1086,6 +1171,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1104,6 +1193,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
{
BUG_ON(pgio->pg_lseg != NULL);
+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_write_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1117,26 +1210,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+ server->rsize, 0);
return true;
}
bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+ server->wsize, ioflags);
return true;
}
@@ -1166,20 +1264,65 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
+int pnfs_write_done_resend_to_mds(struct inode *inode,
+ struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops)
+{
+ struct nfs_pageio_descriptor pgio;
+ LIST_HEAD(failed);
+
+ /* Resend all requests through the MDS */
+ nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+
+ nfs_list_remove_request(req);
+ if (!nfs_pageio_add_request(&pgio, req))
+ nfs_list_add_request(req, &failed);
+ }
+ nfs_pageio_complete(&pgio);
+
+ if (!list_empty(&failed)) {
+ /* For some reason our attempt to resend pages. Mark the
+ * overall send request as having failed, and let
+ * nfs_writeback_release_full deal with the error.
+ */
+ list_move(&failed, head);
+ return -EIO;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
+
+static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+
+ dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+ if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+ pnfs_return_layout(hdr->inode);
+ }
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+ data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+ &hdr->pages,
+ hdr->completion_ops);
+}
+
/*
* Called by non rpc-based layout drivers
*/
void pnfs_ld_write_done(struct nfs_write_data *data)
{
- if (likely(!data->pnfs_error)) {
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!hdr->pnfs_error) {
pnfs_set_layoutcommit(data);
- data->mds_ops->rpc_call_done(&data->task, data);
- } else {
- put_lseg(data->lseg);
- data->lseg = NULL;
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- }
- data->mds_ops->rpc_release(data);
+ hdr->mds_ops->rpc_call_done(&data->task, data);
+ } else
+ pnfs_ld_handle_write_error(data);
+ hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1187,11 +1330,13 @@ static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_write_data *data)
{
- list_splice_tail_init(&data->pages, &desc->pg_list);
- if (data->req && list_empty(&data->req->wb_list))
- nfs_list_add_request(data->req, &desc->pg_list);
- nfs_pageio_reset_write_mds(desc);
- desc->pg_recoalesce = 1;
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ }
nfs_writedata_release(data);
}
@@ -1201,23 +1346,18 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
struct pnfs_layout_segment *lseg,
int how)
{
- struct inode *inode = wdata->inode;
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
- wdata->mds_ops = call_ops;
- wdata->lseg = get_lseg(lseg);
+ hdr->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(wdata->lseg);
- wdata->lseg = NULL;
- } else
+ if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
@@ -1233,7 +1373,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;
- data = list_entry(head->next, struct nfs_write_data, list);
+ data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);
trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1243,40 +1383,82 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
put_lseg(lseg);
}
+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)