From 59948db3be76099b14c7103d0f46c5454b173c3a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:49 -0400 Subject: NFS: fix pnfs regression with directio reads Commit 1abb50886af "NFS: Create an read_pageio_init() function" did not modify the call in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 48253372ab1..69f20c73220 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -393,7 +393,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_read(&desc, dreq->inode, + NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; -- cgit v1.2.3 From c95908e4c50d218f016e3866f5abf786055df635 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:50 -0400 Subject: NFS: fix pnfs regression with directio writes Commit 57208fa7e51 "NFS: Create an write_pageio_init() function" did not modify the calls in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 69f20c73220..42dce909ec7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -478,7 +478,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -782,7 +782,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, + NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); -- cgit v1.2.3 From 1c606fb74c758beafd98cbad9a9133eadeec2371 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:24 -0400 Subject: NFS: Convert v3 into a module This patch exports symbols and moves over the final structures needed by the v3 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V3 or CONFIG_NFS_V3_MODULE are set. The module (nfs3.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v3. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec7..899238156b1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; -- cgit v1.2.3 From 89d77c8fa8e6d1cb7e2cce95b428be30ddcc6f23 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:25 -0400 Subject: NFS: Convert v4 into a module This patch exports symbols needed by the v4 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V4 or CONFIG_NFS_V4_MODULE are set. The module (nfs4.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 899238156b1..b7b4f80968b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; -- cgit v1.2.3 From a564b8f0398636ba30b07c0eaebdef7ff7837249 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:12 -0700 Subject: nfs: enable swap on NFS Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol ->connect() method. PF_MEMALLOC should allow the allocation of struct socket and related objects and the early (re)setting of SOCK_MEMALLOC should allow us to receive the packets required for the TCP connection buildup. [jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases] [dfeng@redhat.com: Fix handling of multiple swap files] [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 82 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 28 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec7..bf9c8d0ec16 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * @nr_segs: size of iovec array * * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, we shunt off direct - * read and write requests before the VFS gets them, so this method - * should never be called. + * the NFS client supports direct I/O. However, for most direct IO, we + * shunt off direct read and write requests before the VFS gets them, + * so this method is only ever called for swap. */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { +#ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, (long long) pos, nr_segs); return -EINVAL; +#else + VM_BUG_ON(iocb->ki_left != PAGE_SIZE); + VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + + if (rw == READ || rw == KERNEL_READ) + return nfs_file_direct_read(iocb, iov, nr_segs, pos, + rw == READ ? true : false); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, + rw == WRITE ? true : false); +#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { */ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de GFP_KERNEL); if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 1, pagevec); + if (WARN_ON(result != 1)) + break; + } + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { @@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; @@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); NFS_I(inode)->read_io += result; @@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + npages, 0, 0, pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 0, pagevec); + if (WARN_ON(result != 1)) + break; + } if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; @@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos); + result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, - size_t count) + size_t count, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -867,7 +893,7 @@ out: * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, task_io_account_read(count); - retval = nfs_direct_read(iocb, iov, nr_segs, pos); + retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); if (retval > 0) iocb->ki_pos = pos + retval; @@ -923,7 +949,7 @@ out: * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); if (retval > 0) { struct inode *inode = mapping->host; -- cgit v1.2.3 From b3c54de6f82d01637796bcc1f667a45f3b32e814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2012 17:15:50 -0400 Subject: NFS: Convert nfs_get_lock_context to return an ERR_PTR on failure We want to be able to distinguish between allocation failures, and the case where the lock context is not needed (because there are no locks). Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ba385b7c90..22130df1621 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -450,6 +450,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (dreq == NULL) @@ -457,9 +458,12 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -849,6 +853,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (!dreq) @@ -856,9 +861,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; -- cgit v1.2.3 From 7acdb026818455638543b04b68d4a580c367fba8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:48 +0800 Subject: NFSv41: fix DIO write_io calculation pnfs_within_mdsthreshold() is called inside pg_init. We need to set read_io/write_io before that. Otherwise we fail pnfs_within_mdsthreshold() and IO goes to MDS. A simple test case: dd if=foo of=/mnt/pnfs/bar bs=10M count=1 oflag=direct Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 22130df1621..253d397780b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -467,10 +467,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + NFS_I(inode)->read_io += iov_length(iov, nr_segs); result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); - NFS_I(inode)->read_io += result; out_release: nfs_direct_req_release(dreq); out: @@ -818,6 +818,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, get_dreq(dreq); atomic_inc(&inode->i_dio_count); + NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); @@ -829,7 +830,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } nfs_pageio_complete(&desc); - NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; /* * If no bytes were started, return the error, and let the -- cgit v1.2.3 From 35754bc00e94e598c432ad02f7a3d3063c4402e3 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS: track direct IO left bytes Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 253d397780b..4be8673ee18 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -78,6 +78,7 @@ struct nfs_direct_req { atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ ssize_t count, /* bytes actually processed */ + bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -390,6 +391,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -457,6 +459,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, goto out; dreq->inode = inode; + dreq->bytes_left = iov_length(iov, nr_segs); dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -710,6 +713,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -860,6 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, goto out; dreq->inode = inode; + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { -- cgit v1.2.3 From 6296556f0b31eaff29f2a3aee2c17b7eae895b98 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS41: send real write size in layoutget For buffer write, block layout client scan inode mapping to find next hole and use offset-to-hole as layoutget length. Object layout client uses offset-to-isize as layoutget length. For direct write, both block layout and object layout use dreq->bytes_left. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4be8673ee18..cae26cbd59e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +{ + return dreq->bytes_left; +} +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); + /* * Collects and returns the final error value/byte-count. */ -- cgit v1.2.3 From 67fad106a219e083c91c79695bd1807dde1bf7b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 12 Dec 2012 11:38:44 -0500 Subject: nfs: don't zero out the rest of the page if we hit the EOF on a DIO READ Eryu provided a test program that would segfault when attempting to read past the EOF on file that was opened O_DIRECT. The buffer given to the read() call was on the stack, and when he attempted to read past it it would scribble over the rest of the stack page. If we hit the end of the file on a DIO READ request, then we don't want to zero out the rest of the buffer. These aren't pagecache pages after all, and there's no guarantee that the buffers that were passed in represent entire pages. Cc: # v3.5+ Cc: Fred Isaman Reported-by: Eryu Guan Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index cae26cbd59e..594f4e7e0b9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,14 +266,6 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } if (!PageCompound(page)) { if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes < hdr->good_bytes) -- cgit v1.2.3 From be7e985804c610fcdcee8730cf42718b8a4e1c41 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 12 Dec 2012 12:36:31 -0500 Subject: nfs: fix page dirtying in NFS DIO read codepath The NFS DIO code will dirty pages that catch read responses in order to handle the case where someone is doing DIO reads into an mmapped buffer. The existing code doesn't really do the right thing though since it doesn't take into account the case where we might be attempting to read past the EOF. Fix the logic in that code to only dirty pages that ended up receiving data from the read. Note too that it really doesn't matter if NFS_IOHDR_ERROR is set or not. All that matters is if the page was altered by the read. Cc: Fred Isaman Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 594f4e7e0b9..0bd7a55a5f0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,13 +266,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (!PageCompound(page)) { - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - if (bytes < hdr->good_bytes) - set_page_dirty(page); - } else - set_page_dirty(page); - } + if (!PageCompound(page) && bytes < hdr->good_bytes) + set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); nfs_direct_readpage_release(req); -- cgit v1.2.3