From db3a3bcf08d5c79290b369d70cc0b80c369d8ad9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Jun 2012 15:50:14 -0400 Subject: NFSv2/v3: Remove incorrect dprintks from the readdir reply code The actual size of the directory is unknown to the client, so it is always requesting the maximum number it can handle. If the server is replying with fewer entries than was requested, then that will usually reflect the fact that we've hit the end of the directory. Flagging it as an error is therefore incorrect. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 10 ++-------- fs/nfs/nfs3xdr.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index baf759bccd0..db81166182c 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -978,16 +978,10 @@ static int decode_readdirok(struct xdr_stream *xdr) pglen = xdr->buf->page_len; hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: + if (pglen > recvd) + pglen = recvd; xdr_read_pages(xdr, pglen); return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; } static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 902de489ec9..3c61c7f80a4 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2045,16 +2045,10 @@ static int decode_dirlist3(struct xdr_stream *xdr) pglen = xdr->buf->page_len; hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; recvd = xdr->buf->len - hdrlen; - if (unlikely(pglen > recvd)) - goto out_cheating; -out: + if (pglen > recvd) + pglen = recvd; xdr_read_pages(xdr, pglen); return pglen; -out_cheating: - dprintk("NFS: server cheating in readdir result: " - "pglen %u > recvd %u\n", pglen, recvd); - pglen = recvd; - goto out; } static int decode_readdir3resok(struct xdr_stream *xdr, -- cgit v1.2.3 From 64bd577ea0021f5903505de061b3b7d8a785ee94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Jun 2012 22:35:05 -0400 Subject: NFS: Let xdr_read_pages() check for buffer overflows xdr_read_pages will already do all of the buffer overflow checks that are currently being open-coded in the various callers. This patch simplifies the existing code by replacing the open coded checks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 22 +++------------------- fs/nfs/nfs3xdr.c | 23 +++-------------------- fs/nfs/nfs4xdr.c | 39 ++++++--------------------------------- 3 files changed, 12 insertions(+), 72 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index db81166182c..d04f0df7be5 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -106,19 +106,16 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_overflow; count = be32_to_cpup(p); - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; out: - xdr_read_pages(xdr, count); result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */ result->count = count; return count; @@ -440,7 +437,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) static int decode_path(struct xdr_stream *xdr) { u32 length, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -449,12 +445,9 @@ static int decode_path(struct xdr_stream *xdr) length = be32_to_cpup(p); if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN)) goto out_size; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, length); if (unlikely(length > recvd)) goto out_cheating; - - xdr_read_pages(xdr, length); xdr_terminate_string(xdr->buf, length); return 0; out_size: @@ -972,16 +965,7 @@ out_overflow: */ static int decode_readdirok(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 3c61c7f80a4..d64a00ff5a1 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -246,7 +246,6 @@ static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, static int decode_nfspath3(struct xdr_stream *xdr) { u32 recvd, count; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4); @@ -255,12 +254,9 @@ static int decode_nfspath3(struct xdr_stream *xdr) count = be32_to_cpup(p); if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN)) goto out_nametoolong; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - - xdr_read_pages(xdr, count); xdr_terminate_string(xdr->buf, count); return 0; @@ -1587,7 +1583,6 @@ static int decode_read3resok(struct xdr_stream *xdr, struct nfs_readres *result) { u32 eof, count, ocount, recvd; - size_t hdrlen; __be32 *p; p = xdr_inline_decode(xdr, 4 + 4 + 4); @@ -1598,13 +1593,10 @@ static int decode_read3resok(struct xdr_stream *xdr, ocount = be32_to_cpup(p++); if (unlikely(ocount != count)) goto out_mismatch; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (unlikely(count > recvd)) goto out_cheating; - out: - xdr_read_pages(xdr, count); result->eof = eof; result->count = count; return count; @@ -2039,16 +2031,7 @@ out_truncated: */ static int decode_dirlist3(struct xdr_stream *xdr) { - u32 recvd, pglen; - size_t hdrlen; - - pglen = xdr->buf->page_len; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; - recvd = xdr->buf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readdir3resok(struct xdr_stream *xdr, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 18fae29b030..2754f7268c1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4920,9 +4920,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) { - struct kvec *iov = req->rq_rcv_buf.head; __be32 *p; - uint32_t count, eof, recvd, hdrlen; + uint32_t count, eof, recvd; int status; status = decode_op_hdr(xdr, OP_READ); @@ -4933,15 +4932,13 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ goto out_overflow; eof = be32_to_cpup(p++); count = be32_to_cpup(p); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, count); if (count > recvd) { dprintk("NFS: server cheating in read reply: " "count %u > recvd %u\n", count, recvd); count = recvd; eof = 0; } - xdr_read_pages(xdr, count); res->eof = eof; res->count = count; return 0; @@ -4952,10 +4949,6 @@ out_overflow: static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) { - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; - u32 recvd, pglen = rcvbuf->page_len; int status; __be32 verf[2]; @@ -4967,22 +4960,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", __func__, verf[0], verf[1]); - - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = rcvbuf->len - hdrlen; - if (pglen > recvd) - pglen = recvd; - xdr_read_pages(xdr, pglen); - - - return pglen; + return xdr_read_pages(xdr, xdr->buf->page_len); } static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - size_t hdrlen; u32 len, recvd; __be32 *p; int status; @@ -5000,14 +4983,12 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) dprintk("nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } - hdrlen = (char *) xdr->p - (char *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, len); if (recvd < len) { dprintk("NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } - xdr_read_pages(xdr, len); /* * The XDR encode routine has set things up so that * the link text will be copied directly into the @@ -5066,7 +5047,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, __be32 *savep, *bm_p; uint32_t attrlen, bitmap[3] = {0}; - struct kvec *iov = req->rq_rcv_buf.head; int status; size_t page_len = xdr->buf->page_len; @@ -5089,7 +5069,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { - size_t hdrlen; /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of @@ -5098,7 +5077,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; attrlen += res->acl_data_offset; if (attrlen > page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { @@ -5707,9 +5685,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, __be32 *p; int status; u32 layout_count; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - struct kvec *iov = rcvbuf->head; - u32 hdrlen, recvd; + u32 recvd; status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) @@ -5746,8 +5722,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, res->type, res->layoutp->len); - hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; - recvd = req->rq_rcv_buf.len - hdrlen; + recvd = xdr_read_pages(xdr, res->layoutp->len); if (res->layoutp->len > recvd) { dprintk("NFS: server cheating in layoutget reply: " "layout len %u > recvd %u\n", @@ -5755,8 +5730,6 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, return -EINVAL; } - xdr_read_pages(xdr, res->layoutp->len); - if (layout_count > 1) { /* We only handle a length one array at the moment. Any * further entries are just ignored. Note that this means -- cgit v1.2.3 From 256e48bb473b631fbb5aa03d6ed38c652ad3caa7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jun 2012 11:18:13 -0400 Subject: NFSv4: Simplify the GETATTR attribute length calculation Use the xdr_stream position counter as the basis for the calculation instead of assuming that we can calculate an offset to the start of the iovec. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 2754f7268c1..93f8bec9f4f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3078,7 +3078,7 @@ out_overflow: return -EIO; } -static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) +static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep) { __be32 *p; @@ -3086,7 +3086,7 @@ static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, if (unlikely(!p)) goto out_overflow; *attrlen = be32_to_cpup(p); - *savep = xdr->p; + *savep = xdr_stream_pos(xdr); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -4068,10 +4068,10 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } -static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen) +static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) { unsigned int attrwords = XDR_QUADLEN(attrlen); - unsigned int nwords = xdr->p - savep; + unsigned int nwords = (xdr_stream_pos(xdr) - savep) >> 2; if (unlikely(attrwords != nwords)) { dprintk("%s: server returned incorrect attribute length: " @@ -4193,7 +4193,7 @@ out_overflow: static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4222,7 +4222,7 @@ xdr_error: static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4254,7 +4254,7 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4299,7 +4299,8 @@ out_overflow: static int decode_first_threshold_item4(struct xdr_stream *xdr, struct nfs4_threshold *res) { - __be32 *p, *savep; + __be32 *p; + unsigned int savep; uint32_t bitmap[3] = {0,}, attrlen; int status; @@ -4503,7 +4504,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -4615,7 +4616,7 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { - __be32 *savep; + unsigned int savep; uint32_t attrlen, bitmap[3]; int status; @@ -5044,7 +5045,8 @@ decode_restorefh(struct xdr_stream *xdr) static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { - __be32 *savep, *bm_p; + unsigned int savep; + __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; @@ -7076,6 +7078,7 @@ out: int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { + unsigned int savep; uint32_t bitmap[3] = {0}; uint32_t len; __be32 *p = xdr_inline_decode(xdr, 4); @@ -7114,7 +7117,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (decode_attr_bitmap(xdr, bitmap) < 0) goto out_overflow; - if (decode_attr_length(xdr, &len, &p) < 0) + if (decode_attr_length(xdr, &len, &savep) < 0) goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, -- cgit v1.2.3 From 1aecca3e83e5da981ade916920d3d2a6b9644cc3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jun 2012 11:41:29 -0400 Subject: NFSv3: Don't open code stream position calculation in decode_getacl3resok Use the new xdr_stream_pos() helper instead. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d64a00ff5a1..5013bdd85ab 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2341,7 +2341,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr, if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) goto out; - hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base; + hdrlen = xdr_stream_pos(xdr); acl = NULL; if (result->mask & NFS_ACL) -- cgit v1.2.3 From 8ed27d4fb1ce95e65f5a3b26b02d3b77135cc7a1 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 29 May 2012 15:57:59 -0400 Subject: NFS: add more context to state manager error mesgs Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f38300e9f17..76bbac36788 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1857,10 +1857,12 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; + const char *section = "", *section_sep = ""; /* Ensure exclusive access to NFSv4 state */ do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { + section = "purge state"; status = nfs4_reclaim_lease(clp); if (status < 0) goto out_error; @@ -1869,6 +1871,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + section = "lease expired"; /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status < 0) @@ -1888,6 +1891,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + section = "check lease"; status = nfs4_check_lease(clp); if (status < 0) goto out_error; @@ -1898,6 +1902,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Initialize or reset the session */ if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) && nfs4_has_session(clp)) { + section = "reset session"; status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) continue; @@ -1908,6 +1913,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Send BIND_CONN_TO_SESSION */ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state) && nfs4_has_session(clp)) { + section = "bind conn to session"; status = nfs4_bind_conn_to_session(clp); if (status < 0) goto out_error; @@ -1916,6 +1922,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1930,6 +1937,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || @@ -1948,6 +1956,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Recall session slots */ if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) && nfs4_has_session(clp)) { + section = "recall slot"; status = nfs4_recall_slot(clp); if (status < 0) goto out_error; @@ -1964,8 +1973,11 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" - " with error %d\n", clp->cl_hostname, -status); + if (strlen(section)) + section_sep = ": "; + pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" + " with error %d\n", section_sep, section, + clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } -- cgit v1.2.3 From 1a2dd948e2b1e27476982bc7dd6961585823aec5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:40:47 -0400 Subject: NFSv4.1: Handle slot recalls before doing state recovery Handling a slot recall situation should always takes precedence over state recovery to allow the server to manage its resources. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 76bbac36788..da62f66a85a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1814,7 +1814,6 @@ static int nfs4_recall_slot(struct nfs_client *clp) spin_unlock(&fc_tbl->slot_tbl_lock); kfree(old); - nfs4_end_drain_session(clp); return 0; } @@ -1920,6 +1919,16 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; } + /* Recall session slots */ + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) + && nfs4_has_session(clp)) { + section = "recall slot"; + status = nfs4_recall_slot(clp); + if (status < 0) + goto out_error; + continue; + } + /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { section = "reclaim reboot"; @@ -1953,16 +1962,6 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs_client_return_marked_delegations(clp); continue; } - /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) - && nfs4_has_session(clp)) { - section = "recall slot"; - status = nfs4_recall_slot(clp); - if (status < 0) - goto out_error; - continue; - } - nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ -- cgit v1.2.3 From 60f00153d93e0bea872f1a9f5b01423247649083 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:51:00 -0400 Subject: NFSv4.1: Clean up nfs4_recall_slot() Move the test for nfs4_has_session out of the nfs4_state_manager() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da62f66a85a..338a12acdc3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1792,12 +1792,14 @@ out: static int nfs4_recall_slot(struct nfs_client *clp) { - struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table; - struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs; + struct nfs4_slot_table *fc_tbl; struct nfs4_slot *new, *old; int i; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); + fc_tbl = &clp->cl_session->fc_slot_table; new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), GFP_NOFS); if (!new) @@ -1810,7 +1812,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) fc_tbl->slots = new; fc_tbl->max_slots = fc_tbl->target_max_slots; fc_tbl->target_max_slots = 0; - fc_attrs->max_reqs = fc_tbl->max_slots; + clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); kfree(old); @@ -1920,8 +1922,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state) - && nfs4_has_session(clp)) { + if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) { section = "recall slot"; status = nfs4_recall_slot(clp); if (status < 0) -- cgit v1.2.3 From 1a47e7a6662f155c8118d64737086a72cf34edf1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 10:53:38 -0400 Subject: NFSv4.1: Cleanup - move nfs4_has_session tests out of state manager loop Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 338a12acdc3..d04e0a1c023 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1764,6 +1764,8 @@ static int nfs4_reset_session(struct nfs_client *clp) struct rpc_cred *cred; int status; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); @@ -1824,6 +1826,8 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) struct rpc_cred *cred; int ret; + if (!nfs4_has_session(clp)) + return 0; nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); @@ -1901,8 +1905,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Initialize or reset the session */ - if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) - && nfs4_has_session(clp)) { + if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { section = "reset session"; status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) @@ -1913,7 +1916,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Send BIND_CONN_TO_SESSION */ if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, - &clp->cl_state) && nfs4_has_session(clp)) { + &clp->cl_state)) { section = "bind conn to session"; status = nfs4_bind_conn_to_session(clp); if (status < 0) -- cgit v1.2.3 From b42353ff8d346a2f6afac3e3983b7286ed4238d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 11:19:47 -0400 Subject: NFSv4.1: Clean up nfs4_reclaim_lease Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d04e0a1c023..1cfc4603fd9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1642,7 +1642,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return 0; } -static int nfs4_reclaim_lease(struct nfs_client *clp) +static int nfs4_establish_lease(struct nfs_client *clp) { struct rpc_cred *cred; const struct nfs4_state_recovery_ops *ops = @@ -1655,7 +1655,37 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) status = ops->establish_clid(clp, cred); put_rpccred(cred); if (status != 0) + return status; + pnfs_destroy_all_layouts(clp); + return 0; +} + +static int nfs4_reclaim_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) + return nfs4_handle_reclaim_lease_error(clp, status); + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + return 0; +} + +static int nfs4_purge_lease(struct nfs_client *clp) +{ + int status; + + status = nfs4_establish_lease(clp); + if (status < 0) return nfs4_handle_reclaim_lease_error(clp, status); + clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); return 0; } @@ -1868,31 +1898,19 @@ static void nfs4_state_manager(struct nfs_client *clp) do { if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { section = "purge state"; - status = nfs4_reclaim_lease(clp); + status = nfs4_purge_lease(clp); if (status < 0) goto out_error; - clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + continue; } - if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { section = "lease expired"; /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status < 0) goto out_error; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - continue; - clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - - if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, - &clp->cl_state)) - nfs4_state_start_reclaim_nograce(clp); - else - set_bit(NFS4CLNT_RECLAIM_REBOOT, - &clp->cl_state); - - pnfs_destroy_all_layouts(clp); + continue; } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { -- cgit v1.2.3 From 140150dbb1f9cf3ef963fb55505f994d74ff3276 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 15:20:25 -0400 Subject: SUNRPC: Remove unused function xdr_encode_pages Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4664e..5a7b3723cc6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2766,9 +2766,7 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) * * In the case of WRITE, we also want to put the GETATTR after * the operation -- in this case because we want to make sure - * we get the post-operation mtime and size. This means that - * we can't use xdr_encode_pages() as written: we need a variant - * of it which would leave room in the 'tail' iovec. + * we get the post-operation mtime and size. * * Both of these changes to the XDR layer would in fact be quite * minor, but I decided to leave them for a subsequent patch. -- cgit v1.2.3 From 98d9452448122486f81030c6c70f29471f65e1ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 12:01:14 -0400 Subject: NFSv4: Decode getdevicelist should use nfs4_verifier The verifier returned by the GETDEVICELIST operation is not a write verifier, but a nfs4_verifier. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 93f8bec9f4f..1e2c47b3889 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5579,7 +5579,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, { __be32 *p; int status, i; - struct nfs_writeverf verftemp; + nfs4_verifier verftemp; status = decode_op_hdr(xdr, OP_GETDEVICELIST); if (status) @@ -5593,7 +5593,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); + p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); res->num_devs = be32_to_cpup(p); -- cgit v1.2.3 From 2f2c63bc221c5fcded24de2704575d0abf96b910 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2012 11:56:09 -0400 Subject: NFS: Cleanup - only store the write verifier in struct nfs_page The 'committed' field is not needed once we have put the struct nfs_page on the right list. Also correct the type of the verifier: it is not an array of __be32, but simply an 8 byte long opaque array. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 12 +++++++----- fs/nfs/nfs4filelayout.c | 6 +++--- fs/nfs/nfs4xdr.c | 12 ++++++++---- fs/nfs/write.c | 4 ++-- 4 files changed, 20 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 5013bdd85ab..6cbe89400df 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -325,14 +325,14 @@ static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier) memcpy(p, verifier, NFS3_CREATEVERFSIZE); } -static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier) +static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) { __be32 *p; p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE); if (unlikely(p == NULL)) goto out_overflow; - memcpy(verifier, p, NFS3_WRITEVERFSIZE); + memcpy(verifier->data, p, NFS3_WRITEVERFSIZE); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -1668,20 +1668,22 @@ static int decode_write3resok(struct xdr_stream *xdr, { __be32 *p; - p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE); + p = xdr_inline_decode(xdr, 4 + 4); if (unlikely(p == NULL)) goto out_overflow; result->count = be32_to_cpup(p++); result->verf->committed = be32_to_cpup(p++); if (unlikely(result->verf->committed > NFS_FILE_SYNC)) goto out_badvalue; - memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE); + if (decode_writeverf3(xdr, &result->verf->verifier)) + goto out_eio; return result->count; out_badvalue: dprintk("NFS: bad stable_how value: %u\n", result->verf->committed); return -EIO; out_overflow: print_overflow_msg(__func__, xdr); +out_eio: return -EIO; } @@ -2314,7 +2316,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, goto out; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, result->verf->verifier); + error = decode_writeverf3(xdr, &result->verf->verifier); out: return error; out_status: diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e1340293872..85b70639921 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -351,9 +351,9 @@ static void prepare_to_resend_writes(struct nfs_commit_data *data) struct nfs_page *first = nfs_list_entry(data->pages.next); data->task.tk_status = 0; - memcpy(data->verf.verifier, first->wb_verf.verifier, - sizeof(first->wb_verf.verifier)); - data->verf.verifier[0]++; /* ensure verifier mismatch */ + memcpy(&data->verf.verifier, &first->wb_verf, + sizeof(data->verf.verifier)); + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ } static int filelayout_commit_done_cb(struct rpc_task *task, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1e2c47b3889..610ebccbde5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4158,13 +4158,18 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier) return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } +static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifier *verifier) +{ + return decode_opaque_fixed(xdr, verifier->data, NFS4_VERIFIER_SIZE); +} + static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_verifier(xdr, res->verf->verifier); + status = decode_write_verifier(xdr, &res->verf->verifier); return status; } @@ -5192,13 +5197,12 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) if (status) return status; - p = xdr_inline_decode(xdr, 16); + p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); - return 0; + return decode_write_verifier(xdr, &res->verf->verifier); out_overflow: print_overflow_msg(__func__, xdr); return -EIO; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d6861c0dc1..ee929e5e1f7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -620,7 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1547,7 +1547,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) { /* We have a match */ nfs_inode_remove_request(req); dprintk(" OK\n"); -- cgit v1.2.3 From 05bf14adcac188f573e22f72734fd0e2fab71aec Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 6 Jun 2012 23:57:13 -0400 Subject: NFSv4.1: Use session max response size for GETDEVICEINFO gdia_maxcount We prepare for the largest possible GETDEVICEINFO response, which can not be greater than the negotiated session maximum response size. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index a1fab8da7f0..f81231f30d9 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -728,7 +728,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla pdev->layout_type = LAYOUT_NFSV4_1_FILES; pdev->pages = pages; pdev->pgbase = 0; - pdev->pglen = PAGE_SIZE * max_pages; + pdev->pglen = max_resp_sz; pdev->mincount = 0; rc = nfs4_proc_getdeviceinfo(server, pdev); -- cgit v1.2.3 From e3074507d93a0b7f1430dec7c6addb307d4f30da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 May 2012 15:30:41 -0400 Subject: NFS: Simplify NFSv4.1 Kconfig Convert the pNFS file layout to use the same system as the object and block layout. Remove unnecessary dependencies on NFS_FS Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f90f4f5cd42..404c6a8ac39 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -88,9 +88,8 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_FS && NFS_V4 && EXPERIMENTAL + depends on NFS_V4 && EXPERIMENTAL select SUNRPC_BACKCHANNEL - select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol (RFC 5661) in the kernel's NFS client. @@ -99,15 +98,17 @@ config NFS_V4_1 config PNFS_FILE_LAYOUT tristate + depends on NFS_V4_1 + default m config PNFS_BLOCK tristate - depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM + depends on NFS_V4_1 && BLK_DEV_DM default m config PNFS_OBJLAYOUT tristate - depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD + depends on NFS_V4_1 && SCSI_OSD_ULD default m config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN -- cgit v1.2.3 From 6e5b587d2f4271a1a4a47e3169db7157aefc31ed Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 31 May 2012 15:16:03 -0400 Subject: NFSv4.1 handle OPEN O_CREATE mdsthreshold Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5a7b3723cc6..c84c93c4cd3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2825,6 +2825,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct dentry *de = dentry; struct nfs4_state *state; struct rpc_cred *cred = NULL; + struct nfs4_threshold **thp = NULL; fmode_t fmode = 0; int status = 0; @@ -2832,9 +2833,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, cred = ctx->cred; de = ctx->dentry; fmode = ctx->mode; + thp = &ctx->mdsthreshold; } sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, thp); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); -- cgit v1.2.3 From e38eb6506ff426a2bb93433fecfcc863a95fcd03 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:40 -0400 Subject: NFS: set_pnfs_layoutdriver() from nfs4_proc_fsinfo() The generic client doesn't need to know about pnfs layout drivers, so this should be done in the v4 code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/nfs4proc.c | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f005b5bebdc..e646b14024c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -975,7 +975,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c84c93c4cd3..e9a8ad2df7a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3332,8 +3332,14 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { + int error; + nfs_fattr_init(fsinfo->fattr); - return nfs4_do_fsinfo(server, fhandle, fsinfo); + error = nfs4_do_fsinfo(server, fhandle, fsinfo); + if (error == 0) + set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + + return error; } static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, -- cgit v1.2.3 From eeebf91675421b730448489ebf4720e5c419beec Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:41 -0400 Subject: NFS: Use nfs4_destroy_server() to clean up NFS v4 I can use this function to return delegations and unset the pnfs layout driver rather than continuing to do these things in the generic client. With this change, we no longer need an nfs4_kill_super(). Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 ++- fs/nfs/delegation.c | 3 +-- fs/nfs/delegation.h | 2 +- fs/nfs/super.c | 23 +++++------------------ 4 files changed, 9 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e646b14024c..bf0f896284a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -266,6 +266,8 @@ static void pnfs_init_server(struct nfs_server *server) static void nfs4_destroy_server(struct nfs_server *server) { + nfs_server_return_all_delegations(server); + unset_pnfs_layoutdriver(server); nfs4_purge_state_owners(server); } @@ -1137,7 +1139,6 @@ void nfs_free_server(struct nfs_server *server) dprintk("--> nfs_free_server()\n"); nfs_server_remove_lists(server); - unset_pnfs_layoutdriver(server); if (server->destroy != NULL) server->destroy(server); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bd3a9601d32..9a7a1b488af 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -417,9 +417,8 @@ static void nfs_mark_return_delegation(struct nfs_server *server, * @sb: sb to process * */ -void nfs_super_return_all_delegations(struct super_block *sb) +void nfs_server_return_all_delegations(struct nfs_server *server) { - struct nfs_server *server = NFS_SB(sb); struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 72709c4193f..206db567999 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -38,7 +38,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); -void nfs_super_return_all_delegations(struct super_block *sb); +void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 906f09c7d84..5a1c860743c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -347,13 +347,12 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs_fs_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -361,7 +360,7 @@ static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_remote_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -369,7 +368,7 @@ struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_xdev_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -377,7 +376,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_remote_referral_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -385,7 +384,7 @@ struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .mount = nfs4_referral_mount, - .kill_sb = nfs4_kill_super, + .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; @@ -2874,18 +2873,6 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -static void nfs4_kill_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - dprintk("--> %s\n", __func__); - nfs_super_return_all_delegations(sb); - kill_anon_super(sb); - nfs_fscache_release_super_cookie(sb); - nfs_free_server(server); - dprintk("<-- %s\n", __func__); -} - /* * Clone an NFS4 server record on xdev traversal (FSID-change) */ -- cgit v1.2.3 From a5c58892b427a2752e3ec44b0aad4ce9221dc63b Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:42 -0400 Subject: NFS: Create a v4-specific fsync function v2 and v3 don't need to worry about doing a pnfs layoutcommit. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a6708e6b438..8941ac41c59 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -265,7 +265,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * fall back to doing a synchronous write. */ static int -nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); @@ -277,9 +277,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) dentry->d_parent->d_name.name, dentry->d_name.name, datasync); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); @@ -290,10 +287,20 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = xchg(&ctx->error, 0); if (!ret && status < 0) ret = status; - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); + return ret; +} + +static int +nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); mutex_unlock(&inode->i_mutex); + return ret; } @@ -956,6 +963,23 @@ out_drop: goto out_put_ctx; } +static int +nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + + return ret; +} + const struct file_operations nfs4_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -966,7 +990,7 @@ const struct file_operations nfs4_file_operations = { .open = nfs4_file_open, .flush = nfs_file_flush, .release = nfs_file_release, - .fsync = nfs_file_fsync, + .fsync = nfs4_file_fsync, .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, -- cgit v1.2.3 From 011e2a7fd5e9e0c2fdba6b9466d53fc437f8bfaf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:43 -0400 Subject: NFS: Create a have_delegation rpc_op Delegations are a v4 feature, so push them out of the generic code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 9 ++------- fs/nfs/dir.c | 2 +- fs/nfs/file.c | 6 +++--- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 6 ++++++ fs/nfs/nfs4proc.c | 5 +++-- fs/nfs/proc.c | 6 ++++++ fs/nfs/write.c | 2 +- 9 files changed, 24 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9a7a1b488af..36c7c647a1d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -47,7 +47,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) * * Returns one if inode has the indicated delegation, otherwise zero. */ -int nfs_have_delegation(struct inode *inode, fmode_t flags) +int nfs4_have_delegation(struct inode *inode, fmode_t flags) { struct nfs_delegation *delegation; int ret = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 206db567999..d134fc5fda7 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -56,14 +56,9 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs_have_delegation(struct inode *inode, fmode_t flags); +int nfs4_have_delegation(struct inode *inode, fmode_t flags); #else -static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) -{ - return 0; -} - static inline int nfs_inode_return_delegation(struct inode *inode) { nfs_wb_all(inode); @@ -73,7 +68,7 @@ static inline int nfs_inode_return_delegation(struct inode *inode) static inline int nfs_have_delegated_attributes(struct inode *inode) { - return nfs_have_delegation(inode, FMODE_READ) && + return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057ff3b..4a3e23aea14 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1144,7 +1144,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_bad; } - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8941ac41c59..57a22a1533e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -178,7 +178,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) * If we're holding a write delegation, then just start the i/o * but don't wait for completion (or send a commit). */ - if (nfs_have_delegation(inode, FMODE_WRITE)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return filemap_fdatawrite(file->f_mapping); /* Flush writes to the server and return any errors */ @@ -677,7 +677,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) } fl->fl_type = saved_type; - if (nfs_have_delegation(inode, FMODE_READ)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) goto out_noconflict; if (is_local) @@ -772,7 +772,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!nfs_have_delegation(inode, FMODE_READ)) { + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { if (is_time_granular(&NFS_SERVER(inode)->time_delta)) __nfs_revalidate_inode(NFS_SERVER(inode), inode); else diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f7296983eba..0f0b928ef25 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1457,7 +1457,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!nfs_have_delegation(inode, FMODE_READ) || + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0fd2bf..08f832634ef 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -877,6 +877,11 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } +static int nfs3_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -921,5 +926,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, + .have_delegation = nfs3_have_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9a8ad2df7a..86f428bb5e0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -294,7 +294,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case 0: return 0; case -NFS4ERR_OPENMODE: - if (inode && nfs_have_delegation(inode, FMODE_READ)) { + if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs_inode_return_delegation(inode); exception->retry = 1; return 0; @@ -3466,7 +3466,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) @@ -6804,6 +6804,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, + .have_delegation = nfs4_have_delegation, .init_client = nfs4_init_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c7419a08..4aed3ddf9bb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -734,6 +734,11 @@ out_einval: return -EINVAL; } +static int nfs_have_delegation(struct inode *inode, fmode_t flags) +{ + return 0; +} + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -777,5 +782,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, + .have_delegation = nfs_have_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee929e5e1f7..f163355b961 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -410,7 +410,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) + if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); -- cgit v1.2.3 From 57ec14c55dee2733330327499d16e40f8c23219e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:44 -0400 Subject: NFS: Create a return_delegation rpc op Delegations are a v4 feature, so push return_delegation out of the generic client by creating a new rpc_op and renaming the old function to be in the nfs v4 "namespace" Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 8 +------- fs/nfs/dir.c | 8 ++++---- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 7 +++++++ fs/nfs/nfs4proc.c | 7 ++++--- fs/nfs/proc.c | 7 +++++++ fs/nfs/unlink.c | 2 +- 8 files changed, 26 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 36c7c647a1d..81c5eec3cf3 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -388,7 +388,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) * * Returns zero on success, or a negative errno value. */ -int nfs_inode_return_delegation(struct inode *inode) +int nfs4_inode_return_delegation(struct inode *inode) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d134fc5fda7..1f3ccd93463 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -33,7 +33,7 @@ enum { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_return_delegation_noreclaim(struct inode *inode); @@ -58,12 +58,6 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); -#else -static inline int nfs_inode_return_delegation(struct inode *inode) -{ - nfs_wb_all(inode); - return 0; -} #endif static inline int nfs_have_delegated_attributes(struct inode *inode) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a3e23aea14..68e451f5930 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1778,7 +1778,7 @@ static int nfs_safe_remove(struct dentry *dentry) } if (inode != NULL) { - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) @@ -1906,7 +1906,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) old_dentry->d_parent->d_name.name, old_dentry->d_name.name, dentry->d_parent->d_name.name, dentry->d_name.name); - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); @@ -1990,9 +1990,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - nfs_inode_return_delegation(old_inode); + NFS_PROTO(old_inode)->return_delegation(old_inode); if (new_inode != NULL) - nfs_inode_return_delegation(new_inode); + NFS_PROTO(new_inode)->return_delegation(new_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0f0b928ef25..28c9ebbe78a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -430,7 +430,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) * Return any delegations if we're going to change ACLs */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - nfs_inode_return_delegation(inode); + NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) nfs_refresh_inode(inode, fattr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 08f832634ef..4749a32e54b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -882,6 +882,12 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags) return 0; } +static int nfs3_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -927,5 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, + .return_delegation = nfs3_return_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86f428bb5e0..035f7a0829e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -295,7 +295,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc return 0; case -NFS4ERR_OPENMODE: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); exception->retry = 1; return 0; } @@ -1065,7 +1065,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo return; } rcu_read_unlock(); - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); } static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) @@ -3870,7 +3870,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; - nfs_inode_return_delegation(inode); + nfs4_inode_return_delegation(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -6805,6 +6805,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, + .return_delegation = nfs4_inode_return_delegation, .init_client = nfs4_init_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4aed3ddf9bb..16632930abd 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -739,6 +739,12 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags) return 0; } +static int nfs_return_delegation(struct inode *inode) +{ + nfs_wb_all(inode); + return 0; +} + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, @@ -783,5 +789,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, + .return_delegation = nfs_return_delegation, .init_client = nfs_init_client, }; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3210a03342f..13cea637eff 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -501,7 +501,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ - nfs_inode_return_delegation(dentry->d_inode); + NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode); sdentry = NULL; do { -- cgit v1.2.3 From cdb7ecedec766861e7c4cc35a203518f92023bff Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:45 -0400 Subject: NFS: Create a free_client rpc_op NFS v4 needs a way to shut down callbacks and sessions, but v2 and v3 don't. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 18 +++++++++--------- fs/nfs/internal.h | 1 + fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + 6 files changed, 15 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index bf0f896284a..82cb8a386a8 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -242,6 +242,12 @@ static void nfs4_shutdown_client(struct nfs_client *clp) kfree(clp->cl_implid); } +void nfs4_free_client(struct nfs_client *clp) +{ + nfs4_shutdown_client(clp); + nfs_free_client(clp); +} + /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -272,10 +278,6 @@ static void nfs4_destroy_server(struct nfs_server *server) } #else -static void nfs4_shutdown_client(struct nfs_client *clp) -{ -} - void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -293,12 +295,10 @@ static void pnfs_init_server(struct nfs_server *server) /* * Destroy a shared client record */ -static void nfs_free_client(struct nfs_client *clp) +void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs4_shutdown_client(clp); - nfs_fscache_release_client_cookie(clp); /* -EIO all pending I/O */ @@ -335,7 +335,7 @@ void nfs_put_client(struct nfs_client *clp) BUG_ON(!list_empty(&clp->cl_superblocks)); - nfs_free_client(clp); + clp->rpc_ops->free_client(clp); } } EXPORT_SYMBOL_GPL(nfs_put_client); @@ -574,7 +574,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (clp) { spin_unlock(&nn->nfs_client_lock); if (new) - nfs_free_client(new); + new->rpc_ops->free_client(new); return nfs_found_client(cl_init, clp); } if (new) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 18f99ef7134..93b73252334 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -151,6 +151,7 @@ extern void nfs_clients_init(struct net *net); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); +extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4749a32e54b..4ccb34bf173 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -935,4 +935,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .have_delegation = nfs3_have_delegation, .return_delegation = nfs3_return_delegation, .init_client = nfs_init_client, + .free_client = nfs_free_client, }; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cc5900ac61b..9889ee476e3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -301,6 +301,8 @@ extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[2]; +void nfs4_free_client(struct nfs_client *); + /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 035f7a0829e..f301c53926b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6807,6 +6807,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .have_delegation = nfs4_have_delegation, .return_delegation = nfs4_inode_return_delegation, .init_client = nfs4_init_client, + .free_client = nfs4_free_client, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 16632930abd..53620bf1096 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -791,4 +791,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .have_delegation = nfs_have_delegation, .return_delegation = nfs_return_delegation, .init_client = nfs_init_client, + .free_client = nfs_free_client, }; -- cgit v1.2.3 From 6663ee7f8187708143255c057bc132bbc84c1894 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:46 -0400 Subject: NFS: Create an alloc_client rpc_op This gives NFS v4 a way to set up callbacks and sessions without v2 or v3 having to do them as well. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 40 ++++++++++++++++++++++++++-------------- fs/nfs/internal.h | 1 + fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + 6 files changed, 32 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 82cb8a386a8..254719c4a57 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -147,7 +147,7 @@ struct nfs_client_initdata { * Since these are allocated/deallocated very rarely, we don't * bother putting them in a slab cache... */ -static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) +struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; struct rpc_cred *cred; @@ -177,18 +177,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_proto = cl_init->proto; clp->cl_net = get_net(cl_init->net); -#ifdef CONFIG_NFS_V4 - err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); - if (err) - goto error_cleanup; - - spin_lock_init(&clp->cl_lock); - INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; - clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; -#endif cred = rpc_lookup_machine_cred("*"); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; @@ -218,6 +206,30 @@ static void nfs4_shutdown_session(struct nfs_client *clp) } #endif /* CONFIG_NFS_V4_1 */ +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) +{ + int err; + struct nfs_client *clp = nfs_alloc_client(cl_init); + if (IS_ERR(clp)) + return clp; + + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error; + + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; + clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + return clp; + +error: + kfree(clp); + return ERR_PTR(err); +} + /* * Destroy the NFS4 callback service */ @@ -588,7 +600,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, spin_unlock(&nn->nfs_client_lock); - new = nfs_alloc_client(cl_init); + new = cl_init->rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 93b73252334..633af813984 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,6 +148,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); +extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4ccb34bf173..77c7aac228b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -934,6 +934,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, .return_delegation = nfs3_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, }; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9889ee476e3..a0be2d1af04 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -303,6 +303,8 @@ extern const u32 nfs4_fs_locations_bitmap[2]; void nfs4_free_client(struct nfs_client *); +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); + /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f301c53926b..7f39e7ecde6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6806,6 +6806,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .open_context = nfs4_atomic_open, .have_delegation = nfs4_have_delegation, .return_delegation = nfs4_inode_return_delegation, + .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 53620bf1096..99a002515df 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -790,6 +790,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .close_context = nfs_close_context, .have_delegation = nfs_have_delegation, .return_delegation = nfs_return_delegation, + .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, }; -- cgit v1.2.3 From 1abb50886afe8a126705c93dab2b50c1252a9c19 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:47 -0400 Subject: NFS: Create an read_pageio_init() function pNFS needs to select a read function based on the layout driver currently in use, so I let each NFS version decide how to best handle initializing reads. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 11 +++++------ fs/nfs/pnfs.h | 6 +++--- fs/nfs/proc.c | 1 + fs/nfs/read.c | 16 +++------------- 7 files changed, 15 insertions(+), 23 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 633af813984..b3121123b40 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -306,7 +306,7 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 77c7aac228b..9864d05432d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -921,6 +921,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f39e7ecde6..f99cf71f4e3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6792,6 +6792,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_pageio_init = pnfs_pageio_init_read, .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bbc49caa7a8..9c830603a16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1209,7 +1209,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1217,10 +1217,9 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, - server->rsize, 0); - return true; + nfs_pageio_init_read(pgio, inode, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } bool @@ -1427,7 +1426,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read_mds(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 64f90d845f6..80ee8919dd5 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,7 +178,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); @@ -438,10 +438,10 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, +static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_read(pgio, inode, compl_ops); } static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 99a002515df..6fea6e107bc 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -778,6 +778,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 86ced783621..6267b873bbc 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -20,8 +20,6 @@ #include #include -#include "pnfs.h" - #include "nfs4_fs.h" #include "internal.h" #include "iostat.h" @@ -108,7 +106,7 @@ int nfs_return_empty_page(struct page *page) return 0; } -void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, const struct nfs_pgio_completion_ops *compl_ops) { @@ -123,14 +121,6 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) - nfs_pageio_init_read_mds(pgio, inode, compl_ops); -} - int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -149,7 +139,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -652,7 +642,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); + NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); -- cgit v1.2.3 From 57208fa7e51ca16cd68de8e8bf482f16b06d3ea1 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:48 -0400 Subject: NFS: Create an write_pageio_init() function pNFS needs to select a write function based on the layout driver currently in use, so I let each NFS version decide how to best handle initializing writes. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 11 +++++------ fs/nfs/pnfs.h | 6 +++--- fs/nfs/proc.c | 1 + fs/nfs/write.c | 18 ++++++------------ 7 files changed, 18 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b3121123b40..7edc172c371 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -320,7 +320,7 @@ extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9864d05432d..f3344f7f46a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -925,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f99cf71f4e3..7d387cb8ceb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6796,6 +6796,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_pageio_init = pnfs_pageio_init_write, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9c830603a16..2617831afd3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1222,7 +1222,7 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -bool +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) @@ -1231,10 +1231,9 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) - return false; - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, - server->wsize, ioflags); - return true; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); + else + nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); } bool @@ -1271,7 +1270,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 80ee8919dd5..592beb02c95 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,7 +180,7 @@ void put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); @@ -444,10 +444,10 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str nfs_pageio_init_read(pgio, inode, compl_ops); } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, +static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { - return false; + nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); } static inline int diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6fea6e107bc..cf6499742b1 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -782,6 +782,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f163355b961..c11fb0025f0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -336,8 +336,10 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(page->mapping->host)->write_pageio_init(&pgio, + page->mapping->host, + wb_priority(wbc), + &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -380,8 +382,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), - &nfs_async_write_completion_ops); + NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -1202,7 +1203,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_doio = nfs_generic_pg_writepages, }; -void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, const struct nfs_pgio_completion_ops *compl_ops) { @@ -1217,13 +1218,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) - nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); -} void nfs_write_prepare(struct rpc_task *task, void *calldata) { -- cgit v1.2.3 From a8d8f02cf0c379693762107afe812b9e52090e39 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 20 Jun 2012 15:53:49 -0400 Subject: NFS: Create custom NFS v4 write_inode() function This gives pnfs a chance to do a layout commit inside the v4 code. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/super.c | 2 +- fs/nfs/write.c | 10 ++++++++-- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a0be2d1af04..3696ca7f5f4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -202,6 +202,9 @@ struct nfs4_state_maintenance_ops { extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; +/* write.c */ +int nfs4_write_inode(struct inode *, struct writeback_control *); + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5a1c860743c..9d33fb22f28 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -391,7 +391,7 @@ struct file_system_type nfs4_referral_fs_type = { static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, + .write_inode = nfs4_write_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, .evict_inode = nfs4_evict_inode, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c11fb0025f0..f312860c15d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1671,9 +1671,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - int ret; + return nfs_commit_unstable_pages(inode, wbc); +} + +#ifdef CONFIG_NFS_V4 +int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); - ret = nfs_commit_unstable_pages(inode, wbc); if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { int status; bool sync = true; @@ -1687,6 +1692,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) } return ret; } +#endif /* * flush the inode to disk. -- cgit v1.2.3 From b3d9b7a3c752dc4b6976a4ff7b8298887a5b734d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:51:19 -0400 Subject: vfs: switch i_dentry/d_alias to hlist Signed-off-by: Al Viro --- fs/nfs/getroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8abfb19bd3a..a67990f90bd 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - list_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } -- cgit v1.2.3 From 0dd2b474d0b69d58859399b1df7fdc699ea005d4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:18 +0200 Subject: nfs: implement i_op->atomic_open() Replace NFS4 specific ->lookup implementation with ->atomic_open impelementation and use the generic nfs_lookup for other lookups. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 183 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 97 insertions(+), 86 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057ff3b..0d8c71271d1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,11 +111,15 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd); +static struct file *nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + bool *); +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_open_create, - .lookup = nfs_atomic_lookup, + .create = nfs4_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, @@ -1403,120 +1407,132 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +static struct file *nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags) { struct file *filp; - int ret = 0; + int err; + + if (ctx->dentry != dentry) { + dput(ctx->dentry); + ctx->dentry = dget(dentry); + } /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { - ret = nfs_may_open(ctx->dentry->d_inode, - ctx->cred, - nd->intent.open.flags); - if (ret < 0) + err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); + if (err < 0) { + filp = ERR_PTR(err); goto out; + } } - filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); - if (IS_ERR(filp)) - ret = PTR_ERR(filp); - else + + filp = finish_open(od, dentry, do_open); + if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); + out: put_nfs_open_context(ctx); - return ret; + return filp; } -static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, bool *created) { struct nfs_open_context *ctx; - struct iattr attr; - struct dentry *res = NULL; + struct dentry *res; + struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - int open_flags; + struct file *filp; int err; - dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + /* Expect a negative dentry */ + BUG_ON(dentry->d_inode); + + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - /* Check that we are indeed trying to open this file */ - if (!is_atomic_open(nd)) + /* NFS only supports OPEN on regular files */ + if ((open_flags & O_DIRECTORY)) { + err = -ENOENT; + if (!d_unhashed(dentry)) { + /* + * Hashed negative dentry with O_DIRECTORY: dentry was + * revalidated and is fine, no need to perform lookup + * again + */ + goto out_err; + } goto no_open; - - if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { - res = ERR_PTR(-ENAMETOOLONG); - goto out; } - /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash - * the dentry. */ - if (nd->flags & LOOKUP_EXCL) { - d_instantiate(dentry, NULL); - goto out; - } - - open_flags = nd->intent.open.flags; - attr.ia_valid = ATTR_OPEN; - - ctx = create_nfs_open_context(dentry, open_flags); - res = ERR_CAST(ctx); - if (IS_ERR(ctx)) - goto out; + err = -ENAMETOOLONG; + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + goto out_err; - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; + if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; - attr.ia_mode &= ~current_umask(); - } else - open_flags &= ~(O_EXCL | O_CREAT); - + attr.ia_mode = mode & ~current_umask(); + } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; } - /* Open the file on the server */ + ctx = create_nfs_open_context(dentry, open_flags); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err; + nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + d_drop(dentry); if (IS_ERR(inode)) { nfs_unblock_sillyrename(dentry->d_parent); put_nfs_open_context(ctx); - switch (PTR_ERR(inode)) { - /* Make a negative dentry */ - case -ENOENT: - d_add(dentry, NULL); - res = NULL; - goto out; - /* This turned out not to be a regular file */ - case -EISDIR: - case -ENOTDIR: + err = PTR_ERR(inode); + switch (err) { + case -ENOENT: + d_add(dentry, NULL); + break; + case -EISDIR: + case -ENOTDIR: + goto no_open; + case -ELOOP: + if (!(open_flags & O_NOFOLLOW)) goto no_open; - case -ELOOP: - if (!(nd->intent.open.flags & O_NOFOLLOW)) - goto no_open; + break; /* case -EINVAL: */ - default: - res = ERR_CAST(inode); - goto out; + default: + break; } + goto out_err; } res = d_add_unique(dentry, inode); - nfs_unblock_sillyrename(dentry->d_parent); - if (res != NULL) { - dput(ctx->dentry); - ctx->dentry = dget(res); + if (res != NULL) dentry = res; - } - err = nfs_intent_set_file(nd, ctx); - if (err < 0) { - if (res != NULL) - dput(res); - return ERR_PTR(err); - } -out: + + nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - return res; + + filp = nfs_finish_open(ctx, dentry, od, open_flags); + + dput(res); + return filp; + +out_err: + return ERR_PTR(err); + no_open: - return nfs_lookup(dir, dentry, nd); + res = nfs_lookup(dir, dentry, NULL); + err = PTR_ERR(res); + if (IS_ERR(res)) + goto out_err; + + finish_no_open(od, res); + return NULL; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1566,8 +1582,8 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs_open_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd) { struct nfs_open_context *ctx = NULL; struct iattr attr; @@ -1591,19 +1607,14 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); if (error != 0) goto out_put_ctx; - if (nd) { - error = nfs_intent_set_file(nd, ctx); - if (error < 0) - goto out_err; - } else { - put_nfs_open_context(ctx); - } + + put_nfs_open_context(ctx); + return 0; out_put_ctx: put_nfs_open_context(ctx); out_err_drop: d_drop(dentry); -out_err: return error; } -- cgit v1.2.3 From 8867fe5899010a0c0ac36dadfdacf1072b1c990c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:19 +0200 Subject: nfs: clean up ->create in nfs_rpc_ops Don't pass nfs_open_context() to ->create(). Only the NFS4 implementation needed that and only because it wanted to return an open file using open intents. That task has been replaced by ->atomic_open so it is not necessary anymore to pass the context to the create rpc operation. Despite nfs4_proc_create apparently being okay with a NULL context it Oopses somewhere down the call chain. So allocate a context here. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 42 ++---------------------------------------- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 37 ++++++++++--------------------------- fs/nfs/proc.c | 2 +- 4 files changed, 14 insertions(+), 69 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0d8c71271d1..45015d32a86 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -114,10 +114,8 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, bool *); -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs4_create, + .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, .link = nfs_link, @@ -1582,42 +1580,6 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) -{ - struct nfs_open_context *ctx = NULL; - struct iattr attr; - int error; - int open_flags = O_CREAT|O_EXCL; - - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - - attr.ia_mode = mode; - attr.ia_valid = ATTR_MODE; - - if (nd) - open_flags = nd->intent.open.flags; - - ctx = create_nfs_open_context(dentry, open_flags); - error = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out_err_drop; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); - if (error != 0) - goto out_put_ctx; - - put_nfs_open_context(ctx); - - return 0; -out_put_ctx: - put_nfs_open_context(ctx); -out_err_drop: - d_drop(dentry); - return error; -} - #endif /* CONFIG_NFSV4 */ /* @@ -1684,7 +1646,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, if (nd) open_flags = nd->intent.open.flags; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; return 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0fd2bf..3187e24e8f7 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs3_createdata *data; umode_t mode = sattr->ia_mode; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4664e..c157b2089b4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, } /* - * Got race? - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this create/open method is prone to inefficiency and race - * conditions due to the lookup, create, and open VFS calls from sys_open() - * placed on the wire. - * - * Given the above sorry state of affairs, I'm simply sending an OPEN. - * The file will be opened again in the subsequent VFS open call - * (nfs4_proc_file_open). - * - * The open for read will just hang around to be used by any process that - * opens the file O_RDONLY. This will all be resolved with the VFS changes. + * This is just for mknod. open(O_CREAT) will always do ->open_context(). */ - static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { - struct dentry *de = dentry; + struct nfs_open_context *ctx; struct nfs4_state *state; - struct rpc_cred *cred = NULL; - fmode_t fmode = 0; int status = 0; - if (ctx != NULL) { - cred = ctx->cred; - de = ctx->dentry; - fmode = ctx->mode; - } + ctx = alloc_nfs_open_context(dentry, FMODE_READ); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (ctx != NULL) - ctx->state = state; - else - nfs4_close_sync(state, fmode); + ctx->state = state; out: + put_nfs_open_context(ctx); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c7419a08..4433806e116 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs_createdata *data; struct rpc_message msg = { -- cgit v1.2.3 From 50de348c3604f7684a89ce64180666d4dd74623f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:20 +0200 Subject: nfs: don't use nd->intent.open.flags Instead check LOOKUP_EXCL in nd->flags, which is basically what the open intent flags were used for. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 45015d32a86..0432f474771 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1538,7 +1538,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - int openflags, ret = 0; + int ret = 0; if (nd->flags & LOOKUP_RCU) return -ECHILD; @@ -1562,9 +1562,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open_dput; - openflags = nd->intent.open.flags; /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + if (nd && nd->flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ @@ -1643,8 +1642,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd) - open_flags = nd->intent.open.flags; + if (nd && !(nd->flags & LOOKUP_EXCL)) + open_flags = O_CREAT; error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) -- cgit v1.2.3 From eda72afb9ef9f45941fb09260c0f268ff81ec40d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:21 +0200 Subject: nfs: don't use intents for checking atomic open is_atomic_open() is now only used by nfs4_lookup_revalidate() to check whether it's okay to skip normal revalidation. It does a racy check for mount read-onlyness and falls back to normal revalidation if the open would fail. This makes little sense now that this function isn't used for determining whether to actually open the file or not. The d_mountpoint() check still makes sense since it is an indication that we might be following a mount and so open may not revalidate the dentry. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0432f474771..e6d55dc93ff 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1366,24 +1366,6 @@ const struct dentry_operations nfs4_dentry_operations = { .d_release = nfs_d_release, }; -/* - * Use intent information to determine whether we need to substitute - * the NFSv4-style stateful OPEN for the LOOKUP call - */ -static int is_atomic_open(struct nameidata *nd) -{ - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) - return 0; - /* NFS does not (yet) have a stateful open for directories */ - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ - if (__mnt_is_readonly(nd->path.mnt) && - (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) - return 0; - return 1; -} - static fmode_t flags_to_mode(int flags) { fmode_t res = (__force fmode_t)flags & FMODE_EXEC; @@ -1543,10 +1525,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (nd->flags & LOOKUP_RCU) return -ECHILD; - inode = dentry->d_inode; - if (!is_atomic_open(nd) || d_mountpoint(dentry)) + if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + goto no_open; + if (d_mountpoint(dentry)) goto no_open; + inode = dentry->d_inode; parent = dget_parent(dentry); dir = parent->d_inode; -- cgit v1.2.3 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro --- fs/nfs/dir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e6d55dc93ff..6deb2549ead 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -113,7 +113,7 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, - bool *); + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1389,7 +1389,8 @@ static int do_open(struct inode *inode, struct file *filp) static struct file *nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags) + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1408,7 +1409,7 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, } } - filp = finish_open(od, dentry, do_open); + filp = finish_open(od, dentry, do_open, opened); if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); @@ -1419,7 +1420,7 @@ out: static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned open_flags, - umode_t mode, bool *created) + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; @@ -1497,7 +1498,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags); + filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); return filp; -- cgit v1.2.3 From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:39:14 +0400 Subject: make ->atomic_open() return int Change of calling conventions: old new NULL 1 file 0 ERR_PTR(-ve) -ve Caller *knows* that struct file *; no need to return it. Signed-off-by: Al Viro --- fs/nfs/dir.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6deb2549ead..b56f4b36ed4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,9 +111,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct file *nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, - int *); +static int nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1387,10 +1387,10 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static struct file *nfs_finish_open(struct nfs_open_context *ctx, - struct dentry *dentry, - struct opendata *od, unsigned open_flags, - int *opened) +static int nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1403,30 +1403,31 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); - if (err < 0) { - filp = ERR_PTR(err); + if (err < 0) goto out; - } } filp = finish_open(od, dentry, do_open, opened); - if (!IS_ERR(filp)) - nfs_file_set_open_context(filp, ctx); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + nfs_file_set_open_context(filp, ctx); + err = 0; out: put_nfs_open_context(ctx); - return filp; + return err; } -static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, - umode_t mode, int *opened) +static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - struct file *filp; int err; /* Expect a negative dentry */ @@ -1437,21 +1438,19 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { - err = -ENOENT; if (!d_unhashed(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup * again */ - goto out_err; + return -ENOENT; } goto no_open; } - err = -ENAMETOOLONG; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) - goto out_err; + return -ENAMETOOLONG; if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; @@ -1465,7 +1464,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, ctx = create_nfs_open_context(dentry, open_flags); err = PTR_ERR(ctx); if (IS_ERR(ctx)) - goto out_err; + goto out; nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); @@ -1489,7 +1488,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, default: break; } - goto out_err; + goto out; } res = d_add_unique(dentry, inode); if (res != NULL) @@ -1498,22 +1497,20 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); - return filp; - -out_err: - return ERR_PTR(err); +out: + return err; no_open: res = nfs_lookup(dir, dentry, NULL); err = PTR_ERR(res); if (IS_ERR(res)) - goto out_err; + goto out; finish_no_open(od, res); - return NULL; + return 1; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) -- cgit v1.2.3 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro --- fs/nfs/dir.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b56f4b36ed4..dafc86c1c35 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static int nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, + struct file *, unsigned, umode_t, int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, @@ -1389,10 +1389,9 @@ static int do_open(struct inode *inode, struct file *filp) static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, int *opened) { - struct file *filp; int err; if (ctx->dentry != dentry) { @@ -1407,13 +1406,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx, goto out; } - filp = finish_open(od, dentry, do_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, do_open, opened); + if (err) goto out; - } - nfs_file_set_open_context(filp, ctx); - err = 0; + nfs_file_set_open_context(file, ctx); out: put_nfs_open_context(ctx); @@ -1421,7 +1417,7 @@ out: } static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, umode_t mode, int *opened) { struct nfs_open_context *ctx; @@ -1497,7 +1493,7 @@ static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - err = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, file, open_flags, opened); dput(res); out: @@ -1509,7 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(od, res); + finish_no_open(file, res); return 1; } -- cgit v1.2.3 From e45198a6ac24bd2c4ad4a43b670c2f1a23dd2df3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 06:48:09 -0400 Subject: make finish_no_open() return int namely, 1 ;-) That's what we want to return from ->atomic_open() instances after finish_no_open(). Signed-off-by: Al Viro --- fs/nfs/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dafc86c1c35..f167c7a1d67 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1505,8 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) -- cgit v1.2.3 From 93420b40bb19433c3bc01c37c6c908ae7ce13228 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:18:15 -0400 Subject: switch nfs_lookup_check_intent() away from nameidata just pass the flags Signed-off-by: Al Viro --- fs/nfs/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f167c7a1d67..48485f1f0bd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1037,10 +1037,10 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path and none of them is set before that last * component. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, +static inline unsigned int nfs_lookup_check_intent(unsigned int flags, unsigned int mask) { - return nd->flags & mask; + return flags & mask; } /* @@ -1051,7 +1051,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); + return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); } /* @@ -1074,7 +1074,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (nd->flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && !(server->flags & NFS_MOUNT_NOCTO) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) @@ -1098,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) + if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; -- cgit v1.2.3 From facc3530fb5c89a40bc83045422add392b8db4a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:33:51 -0400 Subject: nfs_lookup_verify_inode() - nd is *always* non-NULL here Signed-off-by: Al Viro --- fs/nfs/dir.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48485f1f0bd..ad5aef4995a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1069,19 +1069,16 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (IS_AUTOMOUNT(inode)) return 0; - if (nd != NULL) { - /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) - goto out_force; - /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) - goto out_force; - return 0; - } - return nfs_revalidate_inode(server, inode); + /* VFS wants an on-the-wire revalidation */ + if (nd->flags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) + goto out_force; + return 0; out_force: return __nfs_revalidate_inode(server, inode); } -- cgit v1.2.3 From fa3c56bbda6c2ac2a57d96ba501dbe85cccd312b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:36:40 -0400 Subject: fs/nfs/dir.c: switch to passing nd->flags instead of nd wherever possible Signed-off-by: Al Viro --- fs/nfs/dir.c | 51 +++++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ad5aef4995a..71a199435ca 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1030,28 +1030,15 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) return 1; } -/* - * Return the intent data that applies to this particular path component - * - * Note that the current set of intents only apply to the very last - * component of the path and none of them is set before that last - * component. - */ -static inline unsigned int nfs_lookup_check_intent(unsigned int flags, - unsigned int mask) -{ - return flags & mask; -} - /* * Use intent information to check whether or not we're going to do * an O_EXCL create using this path component. */ -static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); + return flags & LOOKUP_EXCL; } /* @@ -1063,20 +1050,18 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) * */ static inline -int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); if (IS_AUTOMOUNT(inode)) return 0; /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) + if (flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) + if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; return 0; out_force: @@ -1092,10 +1077,10 @@ out_force: */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) + if (flags & LOOKUP_CREATE) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1115,6 +1100,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, */ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1122,7 +1108,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); @@ -1131,7 +1117,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, nd)) + if (nfs_neg_need_reval(dir, dentry, flags)) goto out_bad; goto out_valid_noent; } @@ -1147,8 +1133,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, nd)) + if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) goto out_zap_parent; goto out_valid; } @@ -1306,7 +1292,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, nd)) { + if (nd && nfs_is_exclusive_create(dir, nd->flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1507,15 +1493,16 @@ no_open: static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; int ret = 0; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; - if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto no_open; if (d_mountpoint(dentry)) goto no_open; @@ -1528,7 +1515,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) * optimize away revalidation of negative dentries. */ if (inode == NULL) { - if (!nfs_neg_need_reval(dir, dentry, nd)) + if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; goto out; } @@ -1537,7 +1524,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (!S_ISREG(inode->i_mode)) goto no_open_dput; /* We cannot do exclusive creation on a positive dentry */ - if (nd && nd->flags & LOOKUP_EXCL) + if (flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ -- cgit v1.2.3 From 0b728e1911cbe6e24020727c3870628b9653f32a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:03:43 -0400 Subject: stop passing nameidata * to ->d_revalidate() Just the lookup flags. Die, bastard, die... Signed-off-by: Al Viro --- fs/nfs/dir.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 71a199435ca..656f52e9aa2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1098,9 +1098,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1339,7 +1338,7 @@ out: } #ifdef CONFIG_NFS_V4 -static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); +static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -1491,9 +1490,8 @@ no_open: return finish_no_open(file, res); } -static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; @@ -1537,7 +1535,7 @@ out: no_open_dput: dput(parent); no_open: - return nfs_lookup_revalidate(dentry, nd); + return nfs_lookup_revalidate(dentry, flags); } #endif /* CONFIG_NFSV4 */ -- cgit v1.2.3 From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro --- fs/nfs/dir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 656f52e9aa2..8f21205c589 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -46,7 +46,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); @@ -1270,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_release = nfs_d_release, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1291,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nd && nfs_is_exclusive_create(dir, nd->flags)) { + if (nfs_is_exclusive_create(dir, flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1482,7 +1482,7 @@ out: return err; no_open: - res = nfs_lookup(dir, dentry, NULL); + res = nfs_lookup(dir, dentry, 0); err = PTR_ERR(res); if (IS_ERR(res)) goto out; -- cgit v1.2.3 From ebfc3b49a7ac25920cb5be5445f602e51d2ea559 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:05:36 -0400 Subject: don't pass nameidata to ->create() boolean "does it have to be exclusive?" flag is passed instead; Local filesystem should just ignore it - the object is guaranteed not to be there yet. Signed-off-by: Al Viro --- fs/nfs/dir.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f21205c589..a6b1c7fb823 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -47,7 +47,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +static int nfs_create(struct inode *, struct dentry *, umode_t, bool); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_unlink(struct inode *, struct dentry *); @@ -1589,11 +1589,11 @@ out_error: * reply path made it appear to have failed. */ static int nfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct iattr attr; + int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1601,9 +1601,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && !(nd->flags & LOOKUP_EXCL)) - open_flags = O_CREAT; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; -- cgit v1.2.3 From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:37 +0100 Subject: VFS: Pass mount flags to sget() Pass mount flags to sget() so that it can use them in initialising a new superblock before the set function is called. They could also be passed to the compare function. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 06228192f64..8b2a2977b72 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; -- cgit v1.2.3 From 82c7c7a5a9270b13380a588dc57b7541a5e4f541 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:31 -0400 Subject: NFSv4.1 return the LAYOUT for each file with failed DS connection I/O First mark the deviceid invalid to prevent any future use. Then fence all files involved in I/O to a DS with a connection error by sending a LAYOUTRETURN. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 85b70639921..26b96de831e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -205,9 +205,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - if (!filelayout_test_devid_invalid(devid)) - _pnfs_return_layout(inode); filelayout_mark_devid_invalid(devid); + _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); nfs4_ds_disconnect(clp); /* fall through */ -- cgit v1.2.3 From baf6c2a44af02cf6f7cec1ff177189c78fc30f9a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:32 -0400 Subject: NFSv4.1 don't send LAYOUTCOMMIT if data resent through MDS Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 26b96de831e..53f94d915bd 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -206,6 +206,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); filelayout_mark_devid_invalid(devid); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); nfs4_ds_disconnect(clp); -- cgit v1.2.3 From 366d50521c57939e61e25dc27f009367447563e6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:33 -0400 Subject: NFSv4.1 mark layout when already returned When the file layout driver is fencing a DS, _pnfs_return_layout can be called mulitple times per inode due to in-flight i/o referencing lsegs on it's plh_segs list. Remember that LAYOUTRETURN has been called, and do not call it again. Allow LAYOUTRETURNs after a subsequent LAYOUTGET. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 10 ++++++++-- fs/nfs/pnfs.h | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2617831afd3..3ad768f2cef 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -662,11 +662,11 @@ _pnfs_return_layout(struct inode *ino) nfs4_stateid stateid; int status = 0; - dprintk("--> %s\n", __func__); + dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo) { + if (!lo || pnfs_test_layout_returned(lo)) { spin_unlock(&ino->i_lock); dprintk("%s: no layout to return\n", __func__); return status; @@ -676,6 +676,7 @@ _pnfs_return_layout(struct inode *ino) get_layout_hdr(lo); mark_matching_lsegs_invalid(lo, &tmp_list, NULL); lo->plh_block_lgets++; + pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); @@ -686,6 +687,7 @@ _pnfs_return_layout(struct inode *ino) status = -ENOMEM; set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); + pnfs_clear_layout_returned(lo); put_layout_hdr(lo); goto out; } @@ -1075,6 +1077,10 @@ pnfs_update_layout(struct inode *ino, get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; + + /* Enable LAYOUTRETURNs */ + pnfs_clear_layout_returned(lo); + spin_unlock(&ino->i_lock); if (first) { /* The lo must be on the clp list if there is any diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 592beb02c95..2c6c80503ba 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -64,6 +64,7 @@ enum { NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ NFS_LAYOUT_INVALID, /* layout is being destroyed */ + NFS_LAYOUT_RETURNED, /* layout has already been returned */ }; enum layoutdriver_policy_flags { @@ -255,6 +256,24 @@ struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node * bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_deviceid_purge_client(const struct nfs_client *); +static inline void +pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo) +{ + set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + +static inline void +pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo) +{ + clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + +static inline bool +pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); +} + static inline int lo_fail_bit(u32 iomode) { return iomode == IOMODE_RW ? -- cgit v1.2.3 From 293b3b065c5ec5d15c3087ca42a52c991d7d8235 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Jun 2012 15:03:34 -0400 Subject: NFSv4.1 do not send LAYOUTRETURN on emtpy plh_segs list mark_matching_lsegs_invalid() resets the mds_threshold counters and can dereference the layout hdr on an initial empty plh_segs list. It returns 0 both in the case of an initial empty list and in a non-emtpy list that was cleared by calls to mark_lseg_invalid. Don't send a LAYOUTRETURN if the list was initially empty. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3ad768f2cef..7fbd25afe41 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -651,7 +651,14 @@ out_err_free: return NULL; } -/* Initiates a LAYOUTRETURN(FILE) */ +/* + * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr + * when the layout segment list is empty. + * + * Note that a pnfs_layout_hdr can exist with an empty layout segment + * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the + * deviceid is marked invalid. + */ int _pnfs_return_layout(struct inode *ino) { @@ -660,7 +667,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); struct nfs4_layoutreturn *lrp; nfs4_stateid stateid; - int status = 0; + int status = 0, empty; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -668,13 +675,21 @@ _pnfs_return_layout(struct inode *ino) lo = nfsi->layout; if (!lo || pnfs_test_layout_returned(lo)) { spin_unlock(&ino->i_lock); - dprintk("%s: no layout to return\n", __func__); - return status; + dprintk("NFS: %s no layout to return\n", __func__); + goto out; } stateid = nfsi->layout->plh_stateid; /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); + empty = list_empty(&lo->plh_segs); mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + /* Don't send a LAYOUTRETURN if list was initially empty */ + if (empty) { + spin_unlock(&ino->i_lock); + put_layout_hdr(lo); + dprintk("NFS: %s no layout segments to return\n", __func__); + goto out; + } lo->plh_block_lgets++; pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); -- cgit v1.2.3 From 377e507d1572eca6372c862483f4ce4680ad310a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:29:45 -0400 Subject: NFS: Fix up TEST_STATEID and FREE_STATEID return code handling The TEST_STATEID and FREE_STATEID operations can return -NFS4ERR_BAD_STATEID, -NFS4ERR_OLD_STATEID, or -NFS4ERR_DEADSESSION. nfs41_{test,free}_stateid() should not pass these errors to nfs4_handle_exception() during state recovery, since that will recursively kick off state recovery again, resulting in a deadlock. In particular, when the TEST_STATEID operation returns NFS4_OK, res.status can contain one of these errors. _nfs41_test_stateid() replaces NFS4_OK with the value in res.status, which is then returned to callers. But res.status is not passed through nfs4_stat_to_errno(), and thus is a positive NFS4ERR value. Currently callers are only interested in !NFS4_OK, and nfs4_handle_exception() ignores positive values. Thus the res.status values are currently ignored by nfs4_handle_exception() and won't cause the deadlock above. Thanks to this missing negative, it is only when these operations fail (which is very rare) that a deadlock can occur. Bryan agrees the original intent was to return res.status as a negative NFS4ERR value to callers of nfs41_test_stateid(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 006e98da730..af3abf957f2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6578,10 +6578,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - - if (status == NFS_OK) - return res.status; - return status; + if (status != NFS_OK) + return status; + return -res.status; } static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) @@ -6589,9 +6588,10 @@ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, stateid), - &exception); + err = _nfs41_test_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -6609,7 +6609,8 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) }; nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + return nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res, 1); } static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) @@ -6617,9 +6618,10 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, stateid), - &exception); + err = _nfs4_free_stateid(server, stateid); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } -- cgit v1.2.3 From 89af2739589365bf0dd2023c6a076b22ccd530f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:29:56 -0400 Subject: NFS: Don't free a state ID the server does not recognize The result of a TEST_STATEID operation can indicate a few different things: o If NFS_OK is returned, then the client can continue using the state ID under test, and skip recovery. o RFC 5661 says that if the state ID was revoked, then the client must perform an explicit FREE_STATEID before trying to re-open. o If the server doesn't recognize the state ID at all, then no FREE_STATEID is needed, and the client can immediately continue with open recovery. Let's err on the side of caution: if the server clearly tells us the state ID is unknown, we skip the FREE_STATEID. For any other error, we issue a FREE_STATEID. Sometimes that FREE_STATEID will be unnecessary, but leaving unused state IDs on the server needlessly ties up resources. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af3abf957f2..afd61d72837 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1764,7 +1764,8 @@ static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *s if (state->flags & flags) { status = nfs41_test_stateid(server, stateid); if (status != NFS_OK) { - nfs41_free_stateid(server, stateid); + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); state->flags &= ~flags; } } @@ -4697,7 +4698,9 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { - nfs41_free_stateid(server, &lsp->ls_stateid); + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, + &lsp->ls_stateid); lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; ret = status; } -- cgit v1.2.3 From eb64cf964d453f8b559a8c0c2625952dbbcb5838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:05 -0400 Subject: NFS: State reclaim clears OPEN and LOCK state The "state->flags & flags" test in nfs41_check_expired_stateid() allows the state manager to squelch a TEST_STATEID operation when it is known for sure that a state ID is no longer valid. If the lease was purged, for example, the client already knows that state ID is now defunct. But open recovery is still needed for that inode. To force a call to nfs4_open_expired(), change the default return value for nfs41_check_expired_stateid() to force open recovery, and the default return value for nfs41_check_locks() to force lock recovery, if the requested flags are clear. Fix suggested by Bryan Schumaker. Also, the presence of a delegation state ID must not prevent normal open recovery. The delegation state ID must be cleared if it was revoked, but once cleared I don't think it's presence or absence has any bearing on whether open recovery is still needed. So the logic is adjusted to ignore the TEST_STATEID result for the delegation state ID. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index afd61d72837..d1c1016cd50 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1758,8 +1758,8 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta #if defined(CONFIG_NFS_V4_1) static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) { - int status = NFS_OK; struct nfs_server *server = NFS_SERVER(state->inode); + int status = -NFS4ERR_BAD_STATEID; if (state->flags & flags) { status = nfs41_test_stateid(server, stateid); @@ -1774,16 +1774,17 @@ static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *s static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_status, open_status; int deleg_flags = 1 << NFS_DELEGATED_STATE; int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); + int status; - deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); + status = nfs41_check_expired_stateid(state, &state->open_stateid, + open_flags); - if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) - return NFS_OK; - return nfs4_open_expired(sp, state); + if (status != NFS_OK) + status = nfs4_open_expired(sp, state); + return status; } #endif @@ -4690,7 +4691,7 @@ out: #if defined(CONFIG_NFS_V4_1) static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status, ret = NFS_OK; + int status, ret = -NFS4ERR_BAD_STATEID; struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); @@ -4716,9 +4717,9 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques if (test_bit(LK_STATE_IN_USE, &state->flags)) status = nfs41_check_expired_locks(state); - if (status == NFS_OK) - return status; - return nfs4_lock_expired(state, request); + if (status != NFS_OK) + status = nfs4_lock_expired(state, request); + return status; } #endif -- cgit v1.2.3 From 3e60ffdd36fa518cc1822941dbb011e7a9adf513 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:14 -0400 Subject: NFS: Clean up nfs41_check_expired_stateid() Clean up: Instead of open-coded flag manipulation, use test_bit() and clear_bit() just like all other accessors of the state->flag field. This also eliminates several unnecessary implicit integer type conversions. To make it absolutely clear what is going on, a number of comments are introduced. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 77 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d1c1016cd50..1364569f1d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1756,32 +1756,67 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) +static void nfs41_clear_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - int status = -NFS4ERR_BAD_STATEID; - - if (state->flags & flags) { - status = nfs41_test_stateid(server, stateid); - if (status != NFS_OK) { - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid); - state->flags &= ~flags; - } + nfs4_stateid *stateid = &state->stateid; + int status; + + /* If a state reset has been done, test_stateid is unneeded */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + return; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); + + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +} + +/** + * nfs41_check_open_stateid - possibly free an open stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_open_stateid(struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + nfs4_stateid *stateid = &state->stateid; + int status; + + /* If a state reset has been done, test_stateid is unneeded */ + if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && + (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) + return -NFS4ERR_BAD_STATEID; + + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + /* Free the stateid unless the server explicitly + * informs us the stateid is unrecognized. */ + if (status != -NFS4ERR_BAD_STATEID) + nfs41_free_stateid(server, stateid); + + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); } return status; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { - int deleg_flags = 1 << NFS_DELEGATED_STATE; - int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); int status; - nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); - status = nfs41_check_expired_stateid(state, &state->open_stateid, - open_flags); - + nfs41_clear_delegation_stateid(state); + status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); return status; @@ -4689,6 +4724,14 @@ out: } #if defined(CONFIG_NFS_V4_1) +/** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ static int nfs41_check_expired_locks(struct nfs4_state *state) { int status, ret = -NFS4ERR_BAD_STATEID; @@ -4699,6 +4742,8 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { + /* Free the stateid unless the server + * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, &lsp->ls_stateid); -- cgit v1.2.3 From 38527b153a7b43e5c8103f0c2d901d11cfa26d30 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:23 -0400 Subject: NFS: Clean up TEST_STATEID and FREE_STATEID error reporting As a finishing touch, add appropriate documenting comments and some debugging printk's. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1364569f1d1..0cb87664078 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6625,13 +6625,27 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_resp = &res, }; + dprintk("NFS call test_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - if (status != NFS_OK) + if (status != NFS_OK) { + dprintk("NFS reply test_stateid: failed, %d\n", status); return status; + } + dprintk("NFS reply test_stateid: succeeded, %d\n", -res.status); return -res.status; } +/** + * nfs41_test_stateid - perform a TEST_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to test + * + * Returns NFS_OK if the server recognizes that "stateid" is valid. + * Otherwise a negative NFS4ERR value is returned if the operation + * failed or the state ID is not currently valid. + */ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; @@ -6656,12 +6670,25 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_argp = &args, .rpc_resp = &res, }; + int status; + dprintk("NFS call free_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - return nfs4_call_sync_sequence(server->client, server, &msg, + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + dprintk("NFS reply free_stateid: %d\n", status); + return status; } +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * + * Returns NFS_OK if the server freed "stateid". Otherwise a + * negative NFS4ERR value is returned. + */ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; -- cgit v1.2.3 From 56d08fef2369d5ca9ad2e1fc697f5379fd8af751 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:32 -0400 Subject: NFS: nfs_getaclargs.acl_len is a size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squelch compiler warnings: fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’: fs/nfs/nfs4proc.c:3811:14: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/nfs4proc.c:3818:15: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Introduced by commit bf118a34 "NFSv4: include bitmap in nfsv4 get acl data", Dec 7, 2011. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0cb87664078..31369e9b5b0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3775,7 +3775,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i, acl_len = 0; + int ret = -ENOMEM, npages, i; + size_t acl_len = 0; npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, -- cgit v1.2.3 From 6a1a1e34dc55f17e7bd260809207442dbb7a0296 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:31:08 -0400 Subject: SUNRPC: Add rpcauth_list_flavors() The gss_mech_list_pseudoflavors() function provides a list of currently registered GSS pseudoflavors. This list does not include any non-GSS flavors that have been registered with the RPC client. nfs4_find_root_sec() currently adds these extra flavors by hand. Instead, nfs4_find_root_sec() should be looking at the set of flavors that have been explicitly registered via rpcauth_register(). And, other areas of code will soon need the same kind of list that contains all flavors the kernel currently knows about (see below). Rather than cloning the open-coded logic in nfs4_find_root_sec() to those new places, introduce a generic RPC function that generates a full list of registered auth flavors and pseudoflavors. A new rpc_authops method is added that lists a flavor's pseudoflavors, if it has any. I encountered an interesting module loader loop when I tried to get the RPC client to invoke gss_mech_list_pseudoflavors() by name. This patch is a pre-requisite for server trunking discovery, and a pre-requisite for fixing up the in-kernel mount client to do better automatic security flavor selection. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 31369e9b5b0..80bb5055d0b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -2412,11 +2411,15 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int i, len, status = 0; rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; - len = gss_mech_list_pseudoflavors(&flav_array[0]); - flav_array[len] = RPC_AUTH_NULL; - len += 1; + len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); + BUG_ON(len < 0); for (i = 0; i < len; i++) { + /* AUTH_UNIX is the default flavor if none was specified, + * thus has already been tried. */ + if (flav_array[i] == RPC_AUTH_UNIX) + continue; + status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; -- cgit v1.2.3 From 46a87b8a7b939900d779042da7097bf330ab787f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:41 -0400 Subject: NFS: When state recovery fails, waiting tasks should exit NFSv4 state recovery is not always successful. Failure is signalled by setting the nfs_client.cl_cons_state to a negative (errno) value, then waking waiters. Currently this can happen only during mount processing. I'm about to add an explicit case where state recovery failure during normal operation should force all NFS requests waiting on that state recovery to exit. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 80bb5055d0b..74dcd85f0a1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -258,7 +258,12 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); - return res; + if (res) + return res; + + if (clp->cl_cons_state < 0) + return clp->cl_cons_state; + return 0; } static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) -- cgit v1.2.3 From de734831224e74fcaf8917386e33644c4243db95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:50 -0400 Subject: NFS: Treat NFS4ERR_CLID_INUSE as a fatal error For NFSv4 minor version 0, currently the cl_id_uniquifier allows the Linux client to generate a unique nfs_client_id4 string whenever a server replies with NFS4ERR_CLID_INUSE. This implementation seems to be based on a flawed reading of RFC 3530. NFS4ERR_CLID_INUSE actually means that the client has presented this nfs_client_id4 string with a different principal at some time in the past, and that lease is still in use on the server. For a Linux client this might be rather difficult to achieve: the authentication flavor is named right in the nfs_client_id4.id string. If we change flavors, we change strings automatically. So, practically speaking, NFS4ERR_CLID_INUSE means there is some other client using our string. There is not much that can be done to recover automatically. Let's make it a permanent error. Remove the recovery logic in nfs4_proc_setclientid(), and remove the cl_id_uniquifier field from the nfs_client data structure. And, remove the authentication flavor from the nfs_client_id4 string. Keeping the authentication flavor in the nfs_client_id4.id string means that we could have a separate lease for each authentication flavor used by mounts on the client. But we want just one lease for all the mounts on this client. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 47 ++++++++++++++++------------------------------- fs/nfs/nfs4state.c | 7 ++++++- 2 files changed, 22 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 74dcd85f0a1..1148081e1a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4029,42 +4029,28 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; - int loop = 0; - int status; + /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - - for(;;) { - rcu_read_lock(); - setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%s %s %s %u", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO), - clp->cl_rpcclient->cl_auth->au_ops->au_name, - clp->cl_id_uniquifier); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + rcu_read_lock(); + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); + /* cb_client4 */ + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_NETID)); - setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + rcu_read_unlock(); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - rcu_read_unlock(); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status != -NFS4ERR_CLID_INUSE) - break; - if (loop != 0) { - ++clp->cl_id_uniquifier; - break; - } - ++loop; - ssleep(clp->cl_lease_time / HZ + 1); - } - return status; + return rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); } int nfs4_proc_setclientid_confirm(struct nfs_client *clp, @@ -5262,10 +5248,9 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) nfs4_init_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s/%u", + "%s/%s", clp->cl_ipaddr, - clp->cl_rpcclient->cl_nodename, - clp->cl_rpcclient->cl_auth->au_flavor); + clp->cl_rpcclient->cl_nodename); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1cfc4603fd9..81eabcdad0e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1606,10 +1606,15 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return -ESERVERFAULT; /* Lease confirmation error: retry after purging the lease */ ssleep(1); - case -NFS4ERR_CLID_INUSE: case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); break; + case -NFS4ERR_CLID_INUSE: + pr_err("NFS: Server %s reports our clientid is in use\n", + clp->cl_hostname); + nfs_mark_client_ready(clp, -EPERM); + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + return -EPERM; case -EACCES: if (clp->cl_machine_cred == NULL) return -EACCES; -- cgit v1.2.3 From 6bbb4ae8ffc4eef825c8742eff1fefae69a82e41 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Jul 2012 16:30:59 -0400 Subject: NFS: Clean up nfs4_proc_setclientid() and friends Add documenting comments and appropriate debugging messages. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 45 +++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs4state.c | 4 ++++ 2 files changed, 41 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1148081e1a5..05801be4a18 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4013,6 +4013,16 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +/** + * nfs4_proc_setclientid - Negotiate client ID + * @clp: state data structure + * @program: RPC program for NFSv4 callback service + * @port: IP port number for NFS4 callback service + * @cred: RPC credential to use for this call + * @res: where to place the result + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -4029,6 +4039,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + int status; /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); @@ -4050,9 +4061,22 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - return rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + dprintk("NFS call setclientid auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + setclientid.sc_name_len, setclientid.sc_name); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + dprintk("NFS reply setclientid: %d\n", status); + return status; } +/** + * nfs4_proc_setclientid_confirm - Confirm client ID + * @clp: state data structure + * @res: result of a previous SETCLIENTID + * @cred: RPC credential to use for this call + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + */ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) @@ -4067,6 +4091,9 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, unsigned long now; int status; + dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + clp->cl_clientid); now = jiffies; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status == 0) { @@ -4075,6 +4102,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, clp->cl_last_renewal = now; spin_unlock(&clp->cl_lock); } + dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -5218,6 +5246,8 @@ out: /* * nfs4_proc_exchange_id() * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * * Since the clientid has expired, all compounds using sessions * associated with the stale clientid will be returning * NFS4ERR_BADSESSION in the sequence operation, and will therefore @@ -5242,15 +5272,14 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_cred = cred, }; - dprintk("--> %s\n", __func__); - BUG_ON(clp == NULL); - nfs4_init_boot_verifier(clp, &verifier); - args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s", clp->cl_ipaddr, clp->cl_rpcclient->cl_nodename); + dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + clp->cl_rpcclient->cl_auth->au_ops->au_name, + args.id_len, args.id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); @@ -5313,12 +5342,12 @@ out_server_scope: kfree(res.server_scope); out: if (clp->cl_implid != NULL) - dprintk("%s: Server Implementation ID: " + dprintk("NFS reply exchange_id: Server Implementation ID: " "domain: %s, name: %s, date: %llu,%u\n", - __func__, clp->cl_implid->domain, clp->cl_implid->name, + clp->cl_implid->domain, clp->cl_implid->name, clp->cl_implid->date.seconds, clp->cl_implid->date.nseconds); - dprintk("<-- %s status= %d\n", __func__, status); + dprintk("NFS reply exchange_id: %d\n", status); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 81eabcdad0e..55148def554 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1665,6 +1665,10 @@ static int nfs4_establish_lease(struct nfs_client *clp) return 0; } +/* + * Returns zero or a negative errno. NFS4ERR values are converted + * to local errno values. + */ static int nfs4_reclaim_lease(struct nfs_client *clp) { int status; -- cgit v1.2.3 From 597d92891b8859b4b4949fd08e25e60fc80ddaaf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:10 -0400 Subject: NFS: Split out NFS v2 inode operations This patch moves the NFS v2 file and directory inode functions into files that are only compiled whet CONFIG_NFS_V2 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 43 +++++++++---------------------------------- fs/nfs/file.c | 6 ------ fs/nfs/internal.h | 9 +++++++++ fs/nfs/proc.c | 21 +++++++++++++++++++++ 4 files changed, 39 insertions(+), 40 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b7136853ca9..9ae329d6234 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -46,16 +46,6 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -static int nfs_create(struct inode *, struct dentry *, umode_t, bool); -static int nfs_mkdir(struct inode *, struct dentry *, umode_t); -static int nfs_rmdir(struct inode *, struct dentry *); -static int nfs_unlink(struct inode *, struct dentry *); -static int nfs_symlink(struct inode *, struct dentry *, const char *); -static int nfs_link(struct dentry *, struct inode *, struct dentry *); -static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); -static int nfs_rename(struct inode *, struct dentry *, - struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct page*); @@ -69,21 +59,6 @@ const struct file_operations nfs_dir_operations = { .fsync = nfs_fsync_dir, }; -const struct inode_operations nfs_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; @@ -1270,7 +1245,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_release = nfs_d_release, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) +struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1588,7 +1563,7 @@ out_error: * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, +int nfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct iattr attr; @@ -1613,7 +1588,7 @@ out_err: /* * See comments for nfs_proc_create regarding failed operations. */ -static int +int nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; @@ -1640,7 +1615,7 @@ out_err: /* * See comments for nfs_proc_create regarding failed operations. */ -static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; int error; @@ -1666,7 +1641,7 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry) d_delete(dentry); } -static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -1725,7 +1700,7 @@ out: * * If sillyrename() returns 0, we do nothing, otherwise we unlink. */ -static int nfs_unlink(struct inode *dir, struct dentry *dentry) +int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; int need_rehash = 0; @@ -1769,7 +1744,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct pagevec lru_pvec; struct page *page; @@ -1824,7 +1799,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym return 0; } -static int +int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; @@ -1869,7 +1844,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) * If these conditions are met, we can drop the dentries before doing * the rename. */ -static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, +int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 57a22a1533e..7da8745e22a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,12 +41,6 @@ static const struct vm_operations_struct nfs_file_vm_ops; -const struct inode_operations nfs_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, -}; - #ifdef CONFIG_NFS_V3 const struct inode_operations nfs3_file_inode_operations = { .permission = nfs_permission, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7edc172c371..35a8ffec69f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -247,6 +247,15 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc); +struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); +int nfs_create(struct inode *, struct dentry *, umode_t, bool); +int nfs_mkdir(struct inode *, struct dentry *, umode_t); +int nfs_rmdir(struct inode *, struct dentry *); +int nfs_unlink(struct inode *, struct dentry *); +int nfs_symlink(struct inode *, struct dentry *, const char *); +int nfs_link(struct dentry *, struct inode *, struct dentry *); +int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); +int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c5ed1c0a8ab..4d3356af330 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -745,6 +745,27 @@ static int nfs_return_delegation(struct inode *inode) return 0; } +static const struct inode_operations nfs_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +static const struct inode_operations nfs_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + const struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, -- cgit v1.2.3 From ab96291ea16b6b9c76bfac35ccbb26a15ecb01ce Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:11 -0400 Subject: NFS: Split out NFS v3 inode operations This patch moves the NFS v3 file and directory inode functions into files that are only compiled whet CONFIG_NFS_V3 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 21 --------------------- fs/nfs/file.c | 12 ------------ fs/nfs/nfs3proc.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 33 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9ae329d6234..e75f2aaafad 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -63,27 +63,6 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_V3 */ - #ifdef CONFIG_NFS_V4 static int nfs_atomic_open(struct inode *, struct dentry *, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7da8745e22a..76239178e95 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,18 +41,6 @@ static const struct vm_operations_struct nfs_file_vm_ops; -#ifdef CONFIG_NFS_V3 -const struct inode_operations nfs3_file_inode_operations = { - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, -}; -#endif /* CONFIG_NFS_v3 */ - /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f580358cad6..65d23eb92fe 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -888,6 +888,35 @@ static int nfs3_return_delegation(struct inode *inode) return 0; } +static const struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + +static const struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; + const struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, -- cgit v1.2.3 From 73a79706d7f197a428a43fbf335bbe75cdbc221f Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:12 -0400 Subject: NFS: Split out NFS v4 inode operations The NFS v4 file inode operations are already already in nfs4proc.c, so this patch just needs to move the directory operations to the same file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 33 +++------------------------------ fs/nfs/nfs4_fs.h | 5 ++++- fs/nfs/nfs4proc.c | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e75f2aaafad..d49f1b9cd3f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -63,33 +63,6 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -#ifdef CONFIG_NFS_V4 - -static int nfs_atomic_open(struct inode *, struct dentry *, - struct file *, unsigned, umode_t, - int *); -const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_create, - .lookup = nfs_lookup, - .atomic_open = nfs_atomic_open, - .link = nfs_link, - .unlink = nfs_unlink, - .symlink = nfs_symlink, - .mkdir = nfs_mkdir, - .rmdir = nfs_rmdir, - .mknod = nfs_mknod, - .rename = nfs_rename, - .permission = nfs_permission, - .getattr = nfs_getattr, - .setattr = nfs_setattr, - .getxattr = generic_getxattr, - .setxattr = generic_setxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, -}; - -#endif /* CONFIG_NFS_V4 */ - static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) { struct nfs_open_dir_context *ctx; @@ -1352,9 +1325,9 @@ out: return err; } -static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned open_flags, - umode_t mode, int *opened) +int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3696ca7f5f4..e2c4c72d386 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -200,7 +200,10 @@ struct nfs4_state_maintenance_ops { }; extern const struct dentry_operations nfs4_dentry_operations; -extern const struct inode_operations nfs4_dir_inode_operations; + +/* dir.c */ +int nfs_atomic_open(struct inode *, struct dentry *, struct file *, + unsigned, umode_t, int *); /* write.c */ int nfs4_write_inode(struct inode *, struct writeback_control *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 05801be4a18..5e373c30e8d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6832,6 +6832,26 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #endif }; +const struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, +}; + static const struct inode_operations nfs4_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, -- cgit v1.2.3 From 129d1977ed39cbb4f091a518e4a12498c04f45ba Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:13 -0400 Subject: NFS: Create an init_nfs_v4() function I want to initialize all of NFS v4 in a single function that will eventually be used as the v4 module init function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/inode.c | 67 ++++++++++++++++++++++++++++++------------------------ fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4super.c | 23 +++++++++++++++++++ 4 files changed, 65 insertions(+), 31 deletions(-) create mode 100644 fs/nfs/nfs4super.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7ddd45d9f17..162a699134c 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,7 +13,7 @@ nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - delegation.o idmap.o \ + nfs4super.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 28c9ebbe78a..35f7e4bc680 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -1628,87 +1627,96 @@ static int __init init_nfs_fs(void) { int err; - err = nfs_idmap_init(); - if (err < 0) - goto out10; - err = nfs_dns_resolver_init(); if (err < 0) - goto out9; + goto out11; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out8; + goto out10; err = nfs_fscache_register(); if (err < 0) - goto out7; + goto out9; err = nfsiod_start(); if (err) - goto out6; + goto out8; err = nfs_fs_proc_init(); if (err) - goto out5; + goto out7; err = nfs_init_nfspagecache(); if (err) - goto out4; + goto out6; err = nfs_init_inodecache(); if (err) - goto out3; + goto out5; err = nfs_init_readpagecache(); if (err) - goto out2; + goto out4; err = nfs_init_writepagecache(); if (err) - goto out1; + goto out3; err = nfs_init_directcache(); if (err) - goto out0; + goto out2; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif + +#ifdef CONFIG_NFS_V4 + err = init_nfs_v4(); + if (err) + goto out1; +#endif + if ((err = register_nfs_fs()) != 0) - goto out; + goto out0; + return 0; -out: +out0: +#ifdef CONFIG_NFS_V4 + exit_nfs_v4(); +out1: +#endif #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out0: - nfs_destroy_writepagecache(); -out1: - nfs_destroy_readpagecache(); out2: - nfs_destroy_inodecache(); + nfs_destroy_writepagecache(); out3: - nfs_destroy_nfspagecache(); + nfs_destroy_readpagecache(); out4: - nfs_fs_proc_exit(); + nfs_destroy_inodecache(); out5: - nfsiod_stop(); + nfs_destroy_nfspagecache(); out6: - nfs_fscache_unregister(); + nfs_fs_proc_exit(); out7: - unregister_pernet_subsys(&nfs_net_ops); + nfsiod_stop(); out8: - nfs_dns_resolver_destroy(); + nfs_fscache_unregister(); out9: - nfs_idmap_quit(); + unregister_pernet_subsys(&nfs_net_ops); out10: + nfs_dns_resolver_destroy(); +out11: return err; } static void __exit exit_nfs_fs(void) { +#ifdef CONFIG_NFS_V4 + exit_nfs_v4(); +#endif nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); @@ -1717,7 +1725,6 @@ static void __exit exit_nfs_fs(void) nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); - nfs_idmap_quit(); #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e2c4c72d386..1a6ed3f9a32 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -364,6 +364,10 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta extern const nfs4_stateid zero_stateid; +/* nfs4super.c */ +int init_nfs_v4(void); +void exit_nfs_v4(void); + /* nfs4xdr.c */ extern struct rpc_procinfo nfs4_procedures[]; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c new file mode 100644 index 00000000000..366e4145969 --- /dev/null +++ b/fs/nfs/nfs4super.c @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 Bryan Schumaker + */ +#include +#include + +int __init init_nfs_v4(void) +{ + int err; + + err = nfs_idmap_init(); + if (err) + goto out; + + return 0; +out: + return err; +} + +void __exit exit_nfs_v4(void) +{ + nfs_idmap_quit(); +} -- cgit v1.2.3 From 466bfe7f4a5bee4cdd73d3f6bd290173a8c75a40 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:14 -0400 Subject: NFS: Initialize v4 sysctls from nfs_init_v4() And split them out of the generic client into their own file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 7 +++++- fs/nfs/nfs4_fs.h | 15 ++++++++++++ fs/nfs/nfs4super.c | 9 +++++++ fs/nfs/nfs4sysctl.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/sysctl.c | 26 -------------------- 5 files changed, 98 insertions(+), 27 deletions(-) create mode 100644 fs/nfs/nfs4sysctl.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 162a699134c..4a78e76440f 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -17,7 +17,12 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o -nfs-$(CONFIG_SYSCTL) += sysctl.o + +ifeq ($(CONFIG_SYSCTL), y) +nfs-y += sysctl.o +nfs-$(CONFIG_NFS_V4) += nfs4sysctl.o +endif + nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1a6ed3f9a32..b508fef1a32 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -368,6 +368,21 @@ extern const nfs4_stateid zero_stateid; int init_nfs_v4(void); void exit_nfs_v4(void); +/* nfs4sysctl.c */ +#ifdef CONFIG_SYSCTL +int nfs4_register_sysctl(void); +void nfs4_unregister_sysctl(void); +#else +static inline int nfs4_register_sysctl(void) +{ + return 0; +} + +static inline int nfs4_unregister_sysctl(void) +{ +} +#endif + /* nfs4xdr.c */ extern struct rpc_procinfo nfs4_procedures[]; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 366e4145969..70c394e75ca 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -3,6 +3,8 @@ */ #include #include +#include +#include "nfs4_fs.h" int __init init_nfs_v4(void) { @@ -12,12 +14,19 @@ int __init init_nfs_v4(void) if (err) goto out; + err = nfs4_register_sysctl(); + if (err) + goto out1; + return 0; +out1: + nfs_idmap_quit(); out: return err; } void __exit exit_nfs_v4(void) { + nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c new file mode 100644 index 00000000000..5729bc8aa75 --- /dev/null +++ b/fs/nfs/nfs4sysctl.c @@ -0,0 +1,68 @@ +/* + * linux/fs/nfs/nfs4sysctl.c + * + * Sysctl interface to NFS v4 parameters + * + * Copyright (c) 2006 Trond Myklebust + */ +#include +#include +#include + +#include "callback.h" + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static struct ctl_table_header *nfs4_callback_sysctl_table; + +static ctl_table nfs4_cb_sysctls[] = { + { + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, + { + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_dir[] = { + { + .procname = "nfs", + .mode = 0555, + .child = nfs4_cb_sysctls, + }, + { } +}; + +static ctl_table nfs4_cb_sysctl_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = nfs4_cb_sysctl_dir, + }, + { } +}; + +int nfs4_register_sysctl(void) +{ + nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root); + if (nfs4_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs4_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs4_callback_sysctl_table); + nfs4_callback_sysctl_table = NULL; +} diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b2..6b3f2535a3e 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -9,37 +9,11 @@ #include #include #include -#include -#include #include -#include "callback.h" - -#ifdef CONFIG_NFS_V4 -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; -#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { -#ifdef CONFIG_NFS_V4 - { - .procname = "nfs_callback_tcpport", - .data = &nfs_callback_set_tcpport, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = (int *)&nfs_set_port_min, - .extra2 = (int *)&nfs_set_port_max, - }, - { - .procname = "idmap_cache_timeout", - .data = &nfs_idmap_cache_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, -#endif { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, -- cgit v1.2.3 From ce4ef7c0a8a0594d7b9d088d73866a4389402a7e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:15 -0400 Subject: NFS: Split out NFS v4 file operations This patch moves the NFS v4 file functions into a new file that is only compiled when CONFIG_NFS_V4 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/file.c | 151 ++++++------------------------------------------------ fs/nfs/internal.h | 17 ++++++ fs/nfs/nfs4file.c | 126 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 136 deletions(-) create mode 100644 fs/nfs/nfs4file.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4a78e76440f..e882a389b2e 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,7 +13,7 @@ nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - nfs4super.o delegation.o idmap.o \ + nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 76239178e95..70d124a61b9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -35,7 +35,6 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" -#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -46,7 +45,7 @@ static const struct vm_operations_struct nfs_file_vm_ops; # define IS_SWAPFILE(inode) (0) #endif -static int nfs_check_flags(int flags) +int nfs_check_flags(int flags) { if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) return -EINVAL; @@ -75,7 +74,7 @@ nfs_file_open(struct inode *inode, struct file *filp) return res; } -static int +int nfs_file_release(struct inode *inode, struct file *filp) { dprintk("NFS: release(%s/%s)\n", @@ -117,7 +116,7 @@ force_reval: return __nfs_revalidate_inode(server, inode); } -static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) { dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, @@ -142,7 +141,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) /* * Flush all dirty pages, and check for write errors. */ -static int +int nfs_file_flush(struct file *file, fl_owner_t id) { struct dentry *dentry = file->f_path.dentry; @@ -167,7 +166,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) return vfs_fsync(file, 0); } -static ssize_t +ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -191,7 +190,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, return result; } -static ssize_t +ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) @@ -213,7 +212,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, return res; } -static int +int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { struct dentry *dentry = file->f_path.dentry; @@ -246,7 +245,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) * nfs_file_write() that a write error occurred, and hence cause it to * fall back to doing a synchronous write. */ -static int +int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; @@ -561,8 +560,8 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) return 0; } -static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_path.dentry; struct inode * inode = dentry->d_inode; @@ -613,9 +612,9 @@ out_swapfile: goto out; } -static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) +ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -767,7 +766,7 @@ out: /* * Lock a (portion of) a file */ -static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int ret = -ENOLCK; @@ -807,7 +806,7 @@ out_err: /* * Lock a (portion of) a file */ -static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int is_local = 0; @@ -837,7 +836,7 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) * There is no protocol support for leases, so we have no way to implement * them correctly in the face of opens by other clients. */ -static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) +int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { dprintk("NFS: setlease(%s/%s, arg=%ld)\n", file->f_path.dentry->d_parent->d_name.name, @@ -863,121 +862,3 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; - -#ifdef CONFIG_NFS_V4 -static int -nfs4_file_open(struct inode *inode, struct file *filp) -{ - struct nfs_open_context *ctx; - struct dentry *dentry = filp->f_path.dentry; - struct dentry *parent = NULL; - struct inode *dir; - unsigned openflags = filp->f_flags; - struct iattr attr; - int err; - - BUG_ON(inode != dentry->d_inode); - /* - * If no cached dentry exists or if it's negative, NFSv4 handled the - * opens in ->lookup() or ->create(). - * - * We only get this far for a cached positive dentry. We skipped - * revalidation, so handle it here by dropping the dentry and returning - * -EOPENSTALE. The VFS will retry the lookup/create/open. - */ - - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); - - if ((openflags & O_ACCMODE) == 3) - openflags--; - - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - parent = dget_parent(dentry); - dir = parent->d_inode; - - ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); - err = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - switch (err) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; - - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs_file_set_open_context(filp, ctx); - err = 0; - -out_put_ctx: - put_nfs_open_context(ctx); -out: - dput(parent); - return err; - -out_drop: - d_drop(dentry); - err = -EOPENSTALE; - goto out_put_ctx; -} - -static int -nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - int ret; - struct inode *inode = file->f_path.dentry->d_inode; - - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -const struct file_operations nfs4_file_operations = { - .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, - .mmap = nfs_file_mmap, - .open = nfs4_file_open, - .flush = nfs_file_flush, - .release = nfs_file_release, - .fsync = nfs4_file_fsync, - .lock = nfs_lock, - .flock = nfs_flock, - .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, - .check_flags = nfs_check_flags, - .setlease = nfs_setlease, -}; -#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 35a8ffec69f..ca7200a53ca 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -257,6 +257,23 @@ int nfs_link(struct dentry *, struct inode *, struct dentry *); int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +/* file.c */ +int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); +loff_t nfs_file_llseek(struct file *, loff_t, int); +int nfs_file_flush(struct file *, fl_owner_t); +ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, + size_t, unsigned int); +int nfs_file_mmap(struct file *, struct vm_area_struct *); +ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +int nfs_file_release(struct inode *, struct file *); +int nfs_lock(struct file *, int, struct file_lock *); +int nfs_flock(struct file *, int, struct file_lock *); +ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, + size_t, unsigned int); +int nfs_check_flags(int); +int nfs_setlease(struct file *, long, struct file_lock **); + /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c new file mode 100644 index 00000000000..acb65e7887f --- /dev/null +++ b/fs/nfs/nfs4file.c @@ -0,0 +1,126 @@ +/* + * linux/fs/nfs/file.c + * + * Copyright (C) 1992 Rick Sladkey + */ +#include +#include "internal.h" +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_FILE + +static int +nfs4_file_open(struct inode *inode, struct file *filp) +{ + struct nfs_open_context *ctx; + struct dentry *dentry = filp->f_path.dentry; + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; + struct iattr attr; + int err; + + BUG_ON(inode != dentry->d_inode); + /* + * If no cached dentry exists or if it's negative, NFSv4 handled the + * opens in ->lookup() or ->create(). + * + * We only get this far for a cached positive dentry. We skipped + * revalidation, so handle it here by dropping the dentry and returning + * -EOPENSTALE. The VFS will retry the lookup/create/open. + */ + + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((openflags & O_ACCMODE) == 3) + openflags--; + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + parent = dget_parent(dentry); + dir = parent->d_inode; + + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + switch (err) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_file_set_open_context(filp, ctx); + err = 0; + +out_put_ctx: + put_nfs_open_context(ctx); +out: + dput(parent); + return err; + +out_drop: + d_drop(dentry); + err = -EOPENSTALE; + goto out_put_ctx; +} + +static int +nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct inode *inode = file->f_path.dentry->d_inode; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + + return ret; +} + +const struct file_operations nfs4_file_operations = { + .llseek = nfs_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, + .mmap = nfs_file_mmap, + .open = nfs4_file_open, + .flush = nfs_file_flush, + .release = nfs_file_release, + .fsync = nfs4_file_fsync, + .lock = nfs_lock, + .flock = nfs_flock, + .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, + .check_flags = nfs_check_flags, + .setlease = nfs_setlease, +}; -- cgit v1.2.3 From a38a9eac75f0d09f1941a6e85e291c8e96bc8375 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:16 -0400 Subject: NFS: Move the v4 getroot code to nfs4getroot.c Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/getroot.c | 50 -------------------------------------------------- fs/nfs/nfs4getroot.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 51 deletions(-) create mode 100644 fs/nfs/nfs4getroot.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index e882a389b2e..ec13afe2619 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,7 +15,7 @@ nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o + nfs4namespace.o nfs4getroot.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o ifeq ($(CONFIG_SYSCTL), y) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a67990f90bd..4654ced096a 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -23,21 +23,15 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include -#include "nfs4_fs.h" -#include "delegation.h" -#include "internal.h" - #define NFSDBG_FACILITY NFSDBG_CLIENT /* @@ -135,47 +129,3 @@ out: nfs_free_fattr(fsinfo.fattr); return ret; } - -#ifdef CONFIG_NFS_V4 - -int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) -{ - struct nfs_fsinfo fsinfo; - int ret = -ENOMEM; - - dprintk("--> nfs4_get_rootfh()\n"); - - fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) - goto out; - - /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); - if (ret < 0) { - dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); - goto out; - } - - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) - || !S_ISDIR(fsinfo.fattr->mode)) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot encountered non-directory\n"); - ret = -ENOTDIR; - goto out; - } - - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); -out: - nfs_free_fattr(fsinfo.fattr); - dprintk("<-- nfs4_get_rootfh() = %d\n", ret); - return ret; -} - -#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c new file mode 100644 index 00000000000..6a83780e0ce --- /dev/null +++ b/fs/nfs/nfs4getroot.c @@ -0,0 +1,49 @@ +/* +* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. +* Written by David Howells (dhowells@redhat.com) +*/ + +#include +#include "nfs4_fs.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) +{ + struct nfs_fsinfo fsinfo; + int ret = -ENOMEM; + + dprintk("--> nfs4_get_rootfh()\n"); + + fsinfo.fattr = nfs_alloc_fattr(); + if (fsinfo.fattr == NULL) + goto out; + + /* Start by getting the root filehandle from the server */ + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); + if (ret < 0) { + dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); + goto out; + } + + if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) + || !S_ISDIR(fsinfo.fattr->mode)) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot encountered non-directory\n"); + ret = -ENOTDIR; + goto out; + } + + if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + printk(KERN_ERR "nfs4_get_rootfh:" + " getroot obtained referral\n"); + ret = -EREMOTE; + goto out; + } + + memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); +out: + nfs_free_fattr(fsinfo.fattr); + dprintk("<-- nfs4_get_rootfh() = %d\n", ret); + return ret; +} -- cgit v1.2.3 From 428360d77c801932e4b28f15160aebbdb5f5a03e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:17 -0400 Subject: NFS: Initialize the NFS v4 client from init_nfs_v4() And split these functions out of the generic client into a v4 specific file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 137 ++----------------------------------------------- fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 135 deletions(-) create mode 100644 fs/nfs/nfs4client.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ec13afe2619..0b96c203834 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,7 +15,7 @@ nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o + nfs4namespace.o nfs4getroot.o nfs4client.o nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o ifeq ($(CONFIG_SYSCTL), y) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 254719c4a57..5664c7bbe50 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -662,9 +662,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, /* * Create an RPC client handle */ -static int nfs_create_rpc_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - rpc_authflavor_t flavor) +int nfs_create_rpc_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + rpc_authflavor_t flavor) { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { @@ -1304,137 +1304,6 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, } #endif /* CONFIG_NFS_V4_1 */ -/* - * Initialize the NFS4 callback service - */ -static int nfs4_init_callback(struct nfs_client *clp) -{ - int error; - - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; - - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); - return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); - } - return 0; -} - -/* - * Initialize the minor version specific parts of an NFS4 client record - */ -static int nfs4_init_client_minor_version(struct nfs_client *clp) -{ -#if defined(CONFIG_NFS_V4_1) - if (clp->cl_mvops->minor_version) { - struct nfs4_session *session = NULL; - /* - * Create the session and mark it expired. - * When a SEQUENCE operation encounters the expired session - * it will do session recovery to initialize it. - */ - session = nfs4_alloc_session(clp); - if (!session) - return -ENOMEM; - - clp->cl_session = session; - /* - * The create session reply races with the server back - * channel probe. Mark the client NFS_CS_SESSION_INITING - * so that the client back channel can find the - * nfs_client struct - */ - nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); - } -#endif /* CONFIG_NFS_V4_1 */ - - return nfs4_init_callback(clp); -} - -/** - * nfs4_init_client - Initialise an NFS4 client record - * - * @clp: nfs_client to initialise - * @timeparms: timeout parameters for underlying RPC transport - * @ip_addr: callback IP address in presentation format - * @authflavor: authentication flavor for underlying RPC transport - * - * Returns pointer to an NFS client, or an ERR_PTR value. - */ -struct nfs_client *nfs4_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour) -{ - char buf[INET6_ADDRSTRLEN + 1]; - int error; - - if (clp->cl_cons_state == NFS_CS_READY) { - /* the client is initialised already */ - dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); - return clp; - } - - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v4_clientops; - - __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - error = nfs_create_rpc_client(clp, timeparms, authflavour); - if (error < 0) - goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); - - error = nfs_idmap_new(clp); - if (error < 0) { - dprintk("%s: failed to create idmapper. Error = %d\n", - __func__, error); - goto error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - - error = nfs4_init_client_minor_version(clp); - if (error < 0) - goto error; - - if (!nfs4_has_session(clp)) - nfs_mark_client_ready(clp, NFS_CS_READY); - return clp; - -error: - nfs_mark_client_ready(clp, error); - nfs_put_client(clp); - dprintk("<-- nfs4_init_client() = xerror %d\n", error); - return ERR_PTR(error); -} - /* * Set up an NFS4 client */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ca7200a53ca..10df28d14f8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -149,6 +149,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); +int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c new file mode 100644 index 00000000000..c5234b58990 --- /dev/null +++ b/fs/nfs/nfs4client.c @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ +#include +#include +#include +#include +#include +#include "internal.h" +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CLIENT + +/* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_mvops->minor_version) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/** + * nfs4_init_client - Initialise an NFS4 client record + * + * @clp: nfs_client to initialise + * @timeparms: timeout parameters for underlying RPC transport + * @ip_addr: callback IP address in presentation format + * @authflavor: authentication flavor for underlying RPC transport + * + * Returns pointer to an NFS client, or an ERR_PTR value. + */ +struct nfs_client *nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour) +{ + char buf[INET6_ADDRSTRLEN + 1]; + int error; + + if (clp->cl_cons_state == NFS_CS_READY) { + /* the client is initialised already */ + dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); + return clp; + } + + /* Check NFS protocol revision and initialize RPC op vector */ + clp->rpc_ops = &nfs_v4_clientops; + + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + error = nfs_create_rpc_client(clp, timeparms, authflavour); + if (error < 0) + goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (error < 0) + goto error; + error = rpc_ntop(sap, buf, sizeof(buf)); + if (error < 0) + goto error; + ip_addr = (const char *)buf; + } + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + error = nfs_idmap_new(clp); + if (error < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, error); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); + + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); + return clp; + +error: + nfs_mark_client_ready(clp, error); + nfs_put_client(clp); + dprintk("<-- nfs4_init_client() = xerror %d\n", error); + return ERR_PTR(error); +} -- cgit v1.2.3 From fcf10398f641c4450119f8a4cc27e9e584edb010 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:18 -0400 Subject: NFS: Split out NFS v4 server creating code These functions are specific to NFS v4 and can be moved to nfs4client.c to keep them out of the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 457 ++-------------------------------------------------- fs/nfs/internal.h | 25 +++ fs/nfs/nfs4client.c | 428 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 462 insertions(+), 448 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5664c7bbe50..0d50629d9e2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -81,11 +81,6 @@ retry: } #endif /* CONFIG_NFS_V4 */ -/* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; - /* * RPC cruft for NFS */ @@ -130,17 +125,6 @@ const struct rpc_program nfsacl_program = { }; #endif /* CONFIG_NFS_V3_ACL */ -struct nfs_client_initdata { - unsigned long init_flags; - const char *hostname; - const struct sockaddr *addr; - size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; - int proto; - u32 minorversion; - struct net *net; -}; - /* * Allocate a shared client record * @@ -282,13 +266,6 @@ static void pnfs_init_server(struct nfs_server *server) rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); } -static void nfs4_destroy_server(struct nfs_server *server) -{ - nfs_server_return_all_delegations(server); - unset_pnfs_layoutdriver(server); - nfs4_purge_state_owners(server); -} - #else void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -426,8 +403,8 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. */ -static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, - const struct sockaddr *sa2) +int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) { if (sa1->sa_family != sa2->sa_family) return 0; @@ -461,33 +438,6 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } -#if defined(CONFIG_NFS_V4_1) -/* Common match routine for v4.0 and v4.1 callback services */ -static bool nfs4_cb_match_client(const struct sockaddr *addr, - struct nfs_client *clp, u32 minorversion) -{ - struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; - - /* Don't match clients that failed to initialise */ - if (!(clp->cl_cons_state == NFS_CS_READY || - clp->cl_cons_state == NFS_CS_SESSION_INITING)) - return false; - - smp_rmb(); - - /* Match the version and minorversion */ - if (clp->rpc_ops->version != 4 || - clp->cl_minorversion != minorversion) - return false; - - /* Match only the IP address, not the port number */ - if (!nfs_sockaddr_match_ipaddr(addr, clap)) - return false; - - return true; -} -#endif /* CONFIG_NFS_V4_1 */ - /* * Find an nfs_client on the list that matches the initialisation data * that is supplied. @@ -566,7 +516,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -static struct nfs_client * +struct nfs_client * nfs_get_client(const struct nfs_client_initdata *cl_init, const struct rpc_timeout *timeparms, const char *ip_addr, @@ -621,7 +571,7 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) /* * Initialise the timeout values for a connection */ -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, +void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) { to->to_initval = timeo * HZ / 10; @@ -781,7 +731,7 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) /* * Create a general RPC client */ -static int nfs_init_server_rpcclient(struct nfs_server *server, +int nfs_init_server_rpcclient(struct nfs_server *server, const struct rpc_timeout *timeo, rpc_authflavor_t pseudoflavour) { @@ -1014,7 +964,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, /* * Probe filesystem information, including the FSID on v2/v3 */ -static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -1058,7 +1008,7 @@ out_error: /* * Copy useful information when duplicating a server record */ -static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) +void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) { target->flags = source->flags; target->rsize = source->rsize; @@ -1071,7 +1021,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve target->options = source->options; } -static void nfs_server_insert_lists(struct nfs_server *server) +void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); @@ -1105,7 +1055,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) /* * Allocate and initialise a server record */ -static struct nfs_server *nfs_alloc_server(void) +struct nfs_server *nfs_alloc_server(void) { struct nfs_server *server; @@ -1238,391 +1188,6 @@ error: return ERR_PTR(error); } -#ifdef CONFIG_NFS_V4 -/* - * NFSv4.0 callback thread helper - * - * Find a client by callback identifier - */ -struct nfs_client * -nfs4_find_client_ident(struct net *net, int cb_ident) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - clp = idr_find(&nn->cb_ident_idr, cb_ident); - if (clp) - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; -} - -#if defined(CONFIG_NFS_V4_1) -/* - * NFSv4.1 callback thread helper - * For CB_COMPOUND calls, find a client by IP address, protocol version, - * minorversion, and sessionID - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - struct nfs_client *clp; - struct nfs_net *nn = net_generic(net, nfs_net_id); - - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 1) == false) - continue; - - if (!nfs4_has_session(clp)) - continue; - - /* Match sessionid*/ - if (memcmp(clp->cl_session->sess_id.data, - sid->data, NFS4_MAX_SESSIONID_LEN) != 0) - continue; - - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; -} - -#else /* CONFIG_NFS_V4_1 */ - -struct nfs_client * -nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) -{ - return NULL; -} -#endif /* CONFIG_NFS_V4_1 */ - -/* - * Set up an NFS4 client - */ -static int nfs4_set_client(struct nfs_server *server, - const char *hostname, - const struct sockaddr *addr, - const size_t addrlen, - const char *ip_addr, - rpc_authflavor_t authflavour, - int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) -{ - struct nfs_client_initdata cl_init = { - .hostname = hostname, - .addr = addr, - .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = proto, - .minorversion = minorversion, - .net = net, - }; - struct nfs_client *clp; - int error; - - dprintk("--> nfs4_set_client()\n"); - - if (server->flags & NFS_MOUNT_NORESVPORT) - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - - /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); - if (IS_ERR(clp)) { - error = PTR_ERR(clp); - goto error; - } - - /* - * Query for the lease time on clientid setup or renewal - * - * Note that this will be set on nfs_clients that were created - * only for the DS role and did not set this bit, but now will - * serve a dual role. - */ - set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); - - server->nfs_client = clp; - dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); - return 0; -error: - dprintk("<-- nfs4_set_client() = xerror %d\n", error); - return error; -} - -/* - * Set up a pNFS Data Server client. - * - * Return any existing nfs_client that matches server address,port,version - * and minorversion. - * - * For a new nfs_client, use a soft mount (default), a low retrans and a - * low timeout interval so that if a connection is lost, we retry through - * the MDS. - */ -struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, - const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) -{ - struct nfs_client_initdata cl_init = { - .addr = ds_addr, - .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, - .proto = ds_proto, - .minorversion = mds_clp->cl_minorversion, - .net = mds_clp->cl_net, - }; - struct rpc_timeout ds_timeout; - struct nfs_client *clp; - - /* - * Set an authflavor equual to the MDS value. Use the MDS nfs_client - * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS - * (section 13.1 RFC 5661). - */ - nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, - mds_clp->cl_rpcclient->cl_auth->au_flavor); - - dprintk("<-- %s %p\n", __func__, clp); - return clp; -} -EXPORT_SYMBOL_GPL(nfs4_set_ds_client); - -/* - * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. - */ -static void nfs4_session_set_rwsize(struct nfs_server *server) -{ -#ifdef CONFIG_NFS_V4_1 - struct nfs4_session *sess; - u32 server_resp_sz; - u32 server_rqst_sz; - - if (!nfs4_has_session(server->nfs_client)) - return; - sess = server->nfs_client->cl_session; - server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; - server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - - if (server->rsize > server_resp_sz) - server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) - server->wsize = server_rqst_sz; -#endif /* CONFIG_NFS_V4_1 */ -} - -static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh) -{ - struct nfs_fattr *fattr; - int error; - - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - - /* data servers support only a subset of NFSv4.1 */ - if (is_ds_only_client(server->nfs_client)) - return -EPROTONOSUPPORT; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* We must ensure the session is initialised first */ - error = nfs4_init_session(server); - if (error < 0) - goto out; - - /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); - if (error < 0) - goto out; - - dprintk("Server FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); - - nfs4_session_set_rwsize(server); - - error = nfs_probe_fsinfo(server, mntfh, fattr); - if (error < 0) - goto out; - - if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) - server->namelen = NFS4_MAXNAMLEN; - - nfs_server_insert_lists(server); - server->mount_time = jiffies; - server->destroy = nfs4_destroy_server; -out: - nfs_free_fattr(fattr); - return error; -} - -/* - * Create a version 4 volume record - */ -static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) -{ - struct rpc_timeout timeparms; - int error; - - dprintk("--> nfs4_init_server()\n"); - - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - - /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; - server->options = data->options; - - /* Get a client record */ - error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->auth_flavors[0], - data->nfs_server.protocol, - &timeparms, - data->minorversion, - data->net); - if (error < 0) - goto error; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - - server->port = data->nfs_server.port; - - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); - -error: - /* Done */ - dprintk("<-- nfs4_init_server() = %d\n", error); - return error; -} - -/* - * Create a version 4 volume record - * - keyed on server and FSID - */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) -{ - struct nfs_server *server; - int error; - - dprintk("--> nfs4_create_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - /* set up the general RPC client */ - error = nfs4_init_server(server, data); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs4_create_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_server() = error %d\n", error); - return ERR_PTR(error); -} - -/* - * Create an NFS4 referral server record - */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) -{ - struct nfs_client *parent_client; - struct nfs_server *server, *parent_server; - int error; - - dprintk("--> nfs4_create_referral_server()\n"); - - server = nfs_alloc_server(); - if (!server) - return ERR_PTR(-ENOMEM); - - parent_server = NFS_SB(data->sb); - parent_client = parent_server->nfs_client; - - /* Initialise the client representation from the parent server */ - nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; - - /* Get a client representation. - * Note: NFSv4 always uses TCP, */ - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, - parent_client->cl_ipaddr, - data->authflavor, - rpc_protocol(parent_server->client), - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_net); - if (error < 0) - goto error; - - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); - if (error < 0) - goto error; - - error = nfs4_server_common_setup(server, mntfh); - if (error < 0) - goto error; - - dprintk("<-- nfs_create_referral_server() = %p\n", server); - return server; - -error: - nfs_free_server(server); - dprintk("<-- nfs4_create_referral_server() = error %d\n", error); - return ERR_PTR(error); -} - -#endif /* CONFIG_NFS_V4 */ - /* * Clone an NFS2, NFS3 or NFS4 server record */ @@ -1972,7 +1537,3 @@ void nfs_fs_proc_exit(void) } #endif /* CONFIG_PROC_FS */ - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 10df28d14f8..b4a35705246 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -85,6 +85,17 @@ struct nfs_clone_mount { */ #define NFS_MAX_READDIR_PAGES 8 +struct nfs_client_initdata { + unsigned long init_flags; + const char *hostname; + const struct sockaddr *addr; + size_t addrlen; + const struct nfs_rpc_ops *rpc_ops; + int proto; + u32 minorversion; + struct net *net; +}; + /* * In-kernel mount arguments */ @@ -150,6 +161,16 @@ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t); +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, + const struct rpc_timeout *, const char *, + rpc_authflavor_t); +int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); +void nfs_server_insert_lists(struct nfs_server *); +void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); +int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, + rpc_authflavor_t); +struct nfs_server *nfs_alloc_server(void); +void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); @@ -191,6 +212,10 @@ static inline void nfs_fs_proc_exit(void) } #endif +#ifdef CONFIG_NFS_V4_1 +int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c5234b58990..a71d95ecbea 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -2,16 +2,26 @@ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ +#include #include #include +#include #include #include #include #include "internal.h" #include "callback.h" +#include "delegation.h" +#include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT +/* + * Turn off NFSv4 uid/gid mapping when using AUTH_SYS + */ +static bool nfs4_disable_idmapping = true; + /* * Initialize the NFS4 callback service */ @@ -142,3 +152,421 @@ error: dprintk("<-- nfs4_init_client() = xerror %d\n", error); return ERR_PTR(error); } + +static void nfs4_destroy_server(struct nfs_server *server) +{ + nfs_server_return_all_delegations(server); + unset_pnfs_layoutdriver(server); + nfs4_purge_state_owners(server); +} + +/* + * NFSv4.0 callback thread helper + * + * Find a client by callback identifier + */ +struct nfs_client * +nfs4_find_client_ident(struct net *net, int cb_ident) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + clp = idr_find(&nn->cb_ident_idr, cb_ident); + if (clp) + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; +} + +#if defined(CONFIG_NFS_V4_1) +/* Common match routine for v4.0 and v4.1 callback services */ +static bool nfs4_cb_match_client(const struct sockaddr *addr, + struct nfs_client *clp, u32 minorversion) +{ + struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; + + /* Don't match clients that failed to initialise */ + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) + return false; + + smp_rmb(); + + /* Match the version and minorversion */ + if (clp->rpc_ops->version != 4 || + clp->cl_minorversion != minorversion) + return false; + + /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(addr, clap)) + return false; + + return true; +} + +/* + * NFSv4.1 callback thread helper + * For CB_COMPOUND calls, find a client by IP address, protocol version, + * minorversion, and sessionID + * + * Returns NULL if no such client + */ +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + if (nfs4_cb_match_client(addr, clp, 1) == false) + continue; + + if (!nfs4_has_session(clp)) + continue; + + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) + continue; + + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; + } + spin_unlock(&nn->nfs_client_lock); + return NULL; +} + +#else /* CONFIG_NFS_V4_1 */ + +struct nfs_client * +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, + struct nfs4_sessionid *sid) +{ + return NULL; +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Set up an NFS4 client + */ +static int nfs4_set_client(struct nfs_server *server, + const char *hostname, + const struct sockaddr *addr, + const size_t addrlen, + const char *ip_addr, + rpc_authflavor_t authflavour, + int proto, const struct rpc_timeout *timeparms, + u32 minorversion, struct net *net) +{ + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = addr, + .addrlen = addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = proto, + .minorversion = minorversion, + .net = net, + }; + struct nfs_client *clp; + int error; + + dprintk("--> nfs4_set_client()\n"); + + if (server->flags & NFS_MOUNT_NORESVPORT) + set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + + /* Allocate or find a client reference we can use */ + clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); + if (IS_ERR(clp)) { + error = PTR_ERR(clp); + goto error; + } + + /* + * Query for the lease time on clientid setup or renewal + * + * Note that this will be set on nfs_clients that were created + * only for the DS role and did not set this bit, but now will + * serve a dual role. + */ + set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); + + server->nfs_client = clp; + dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); + return 0; +error: + dprintk("<-- nfs4_set_client() = xerror %d\n", error); + return error; +} + +/* + * Set up a pNFS Data Server client. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = ds_proto, + .minorversion = mds_clp->cl_minorversion, + .net = mds_clp->cl_net, + }; + struct rpc_timeout ds_timeout; + struct nfs_client *clp; + + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS + * (section 13.1 RFC 5661). + */ + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + mds_clp->cl_rpcclient->cl_auth->au_flavor); + + dprintk("<-- %s %p\n", __func__, clp); + return clp; +} +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + struct nfs4_session *sess; + u32 server_resp_sz; + u32 server_rqst_sz; + + if (!nfs4_has_session(server->nfs_client)) + return; + sess = server->nfs_client->cl_session; + server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; + server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; + + if (server->rsize > server_resp_sz) + server->rsize = server_resp_sz; + if (server->wsize > server_rqst_sz) + server->wsize = server_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +static int nfs4_server_common_setup(struct nfs_server *server, + struct nfs_fh *mntfh) +{ + struct nfs_fattr *fattr; + int error; + + BUG_ON(!server->nfs_client); + BUG_ON(!server->nfs_client->rpc_ops); + BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + + /* data servers support only a subset of NFSv4.1 */ + if (is_ds_only_client(server->nfs_client)) + return -EPROTONOSUPPORT; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* We must ensure the session is initialised first */ + error = nfs4_init_session(server); + if (error < 0) + goto out; + + /* Probe the root fh to retrieve its FSID and filehandle */ + error = nfs4_get_rootfh(server, mntfh); + if (error < 0) + goto out; + + dprintk("Server FSID: %llx:%llx\n", + (unsigned long long) server->fsid.major, + (unsigned long long) server->fsid.minor); + dprintk("Mount FH: %d\n", mntfh->size); + + nfs4_session_set_rwsize(server); + + error = nfs_probe_fsinfo(server, mntfh, fattr); + if (error < 0) + goto out; + + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + + nfs_server_insert_lists(server); + server->mount_time = jiffies; + server->destroy = nfs4_destroy_server; +out: + nfs_free_fattr(fattr); + return error; +} + +/* + * Create a version 4 volume record + */ +static int nfs4_init_server(struct nfs_server *server, + const struct nfs_parsed_mount_data *data) +{ + struct rpc_timeout timeparms; + int error; + + dprintk("--> nfs4_init_server()\n"); + + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + server->options = data->options; + + /* Get a client record */ + error = nfs4_set_client(server, + data->nfs_server.hostname, + (const struct sockaddr *)&data->nfs_server.address, + data->nfs_server.addrlen, + data->client_address, + data->auth_flavors[0], + data->nfs_server.protocol, + &timeparms, + data->minorversion, + data->net); + if (error < 0) + goto error; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + + server->acregmin = data->acregmin * HZ; + server->acregmax = data->acregmax * HZ; + server->acdirmin = data->acdirmin * HZ; + server->acdirmax = data->acdirmax * HZ; + + server->port = data->nfs_server.port; + + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + +error: + /* Done */ + dprintk("<-- nfs4_init_server() = %d\n", error); + return error; +} + +/* + * Create a version 4 volume record + * - keyed on server and FSID + */ +struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh) +{ + struct nfs_server *server; + int error; + + dprintk("--> nfs4_create_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + /* set up the general RPC client */ + error = nfs4_init_server(server, data); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs4_create_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_server() = error %d\n", error); + return ERR_PTR(error); +} + +/* + * Create an NFS4 referral server record + */ +struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, + struct nfs_fh *mntfh) +{ + struct nfs_client *parent_client; + struct nfs_server *server, *parent_server; + int error; + + dprintk("--> nfs4_create_referral_server()\n"); + + server = nfs_alloc_server(); + if (!server) + return ERR_PTR(-ENOMEM); + + parent_server = NFS_SB(data->sb); + parent_client = parent_server->nfs_client; + + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + + /* Get a client representation. + * Note: NFSv4 always uses TCP, */ + error = nfs4_set_client(server, data->hostname, + data->addr, + data->addrlen, + parent_client->cl_ipaddr, + data->authflavor, + rpc_protocol(parent_server->client), + parent_server->client->cl_timeout, + parent_client->cl_mvops->minor_version, + parent_client->cl_net); + if (error < 0) + goto error; + + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + if (error < 0) + goto error; + + error = nfs4_server_common_setup(server, mntfh); + if (error < 0) + goto error; + + dprintk("<-- nfs_create_referral_server() = %p\n", server); + return server; + +error: + nfs_free_server(server); + dprintk("<-- nfs4_create_referral_server() = error %d\n", error); + return ERR_PTR(error); +} + +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); -- cgit v1.2.3 From 3cadf4b864cab9d19b935289c004799d1065cd03 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:19 -0400 Subject: NFS: Create a single nfs_clone_super() function v2 and v3 shared a function for this, but v4 implemented something only slightly different. Might as well share code whenever possible... Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9bad4e75306..ca3c0e8cf77 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2218,7 +2218,7 @@ static void nfs_fill_super(struct super_block *sb, } /* - * Finish setting up a cloned NFS2/3 superblock + * Finish setting up a cloned NFS2/3/4 superblock */ static void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) @@ -2229,16 +2229,17 @@ static void nfs_clone_super(struct super_block *sb, sb->s_blocksize_bits = old_sb->s_blocksize_bits; sb->s_blocksize = old_sb->s_blocksize; sb->s_maxbytes = old_sb->s_maxbytes; + sb->s_xattr = old_sb->s_xattr; + sb->s_op = old_sb->s_op; + sb->s_time_gran = 1; - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ sb->s_flags |= MS_POSIXACL; - sb->s_time_gran = 1; } - sb->s_op = old_sb->s_op; nfs_initialise_sb(sb); } @@ -2579,27 +2580,6 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, #ifdef CONFIG_NFS_V4 -/* - * Finish setting up a cloned NFS4 superblock - */ -static void nfs4_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_time_gran = 1; - sb->s_op = old_sb->s_op; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = old_sb->s_xattr; - nfs_initialise_sb(sb); -} - /* * Set up an NFS4 superblock */ @@ -2883,7 +2863,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs4_clone_super, + .fill_super = nfs_clone_super, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; -- cgit v1.2.3 From fbdefd6442811392e857721573b63a51d1149cc8 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:20 -0400 Subject: NFS: Split out the NFS v4 filesystem types This allows me to move the v4 mounting and unmounting functions out of the generic client and into a file that is only compiled when CONFIG_NFS_V4 is enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 29 ++++ fs/nfs/nfs4_fs.h | 2 + fs/nfs/nfs4super.c | 328 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/super.c | 395 +++-------------------------------------------------- 4 files changed, 381 insertions(+), 373 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b4a35705246..cfafd13b6fe 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -153,6 +153,14 @@ struct nfs_mount_request { struct net *net; }; +struct nfs_mount_info { + void (*fill_super)(struct super_block *, struct nfs_mount_info *); + int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); + struct nfs_parsed_mount_data *parsed; + struct nfs_clone_mount *cloned; + struct nfs_fh *mntfh; +}; + extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); @@ -318,6 +326,16 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +void nfs_initialise_sb(struct super_block *); +int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct file_system_type *, struct nfs_server *, + int, const char *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); +struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, + const char *, struct nfs_mount_info *); +void nfs_kill_super(struct super_block *); +void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -364,6 +382,17 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); +/* super.c */ +void nfs_clone_super(struct super_block *, struct nfs_mount_info *); +void nfs_umount_begin(struct super_block *); +int nfs_statfs(struct dentry *, struct kstatfs *); +int nfs_show_options(struct seq_file *, struct dentry *); +int nfs_show_devname(struct seq_file *, struct dentry *); +int nfs_show_path(struct seq_file *, struct dentry *); +int nfs_show_stats(struct seq_file *, struct dentry *); +void nfs_put_super(struct super_block *); +int nfs_remount(struct super_block *sb, int *flags, char *raw_data); + /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b508fef1a32..b1ecacd8784 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -365,6 +365,8 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta extern const nfs4_stateid zero_stateid; /* nfs4super.c */ +struct nfs_mount_info; +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 70c394e75ca..2af26913884 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -2,10 +2,331 @@ * Copyright (c) 2012 Bryan Schumaker */ #include +#include #include +#include #include +#include "internal.h" #include "nfs4_fs.h" +#define NFSDBG_FACILITY NFSDBG_VFS + +static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_xdev_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_xdev_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_remote_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs4_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs4_referral_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static const struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs4_write_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .evict_inode = nfs4_evict_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_devname = nfs_show_devname, + .show_path = nfs_show_path, + .show_stats = nfs_show_stats, + .remount_fs = nfs_remount, +}; + +/* + * Set up an NFS4 superblock + */ +static void nfs4_fill_super(struct super_block *sb, + struct nfs_mount_info *mount_info) +{ + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + /* + * The VFS shouldn't apply the umask to mode bits. We will do + * so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + sb->s_xattr = nfs4_xattr_handlers; + nfs_initialise_sb(sb); +} + +/* + * Get the superblock for the NFS4 root partition + */ +static struct dentry * +nfs4_remote_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *info) +{ + struct nfs_mount_info *mount_info = info; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + mount_info->fill_super = nfs4_fill_super; + mount_info->set_security = nfs_set_sb_security; + + /* Get a volume representation */ + server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + +out: + return mntroot; +} + +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 5; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +struct nfs_referral_count { + struct list_head list; + const struct task_struct *task; + unsigned int referral_count; +}; + +static LIST_HEAD(nfs_referral_count_list); +static DEFINE_SPINLOCK(nfs_referral_count_list_lock); + +static struct nfs_referral_count *nfs_find_referral_count(void) +{ + struct nfs_referral_count *p; + + list_for_each_entry(p, &nfs_referral_count_list, list) { + if (p->task == current) + return p; + } + return NULL; +} + +#define NFS_MAX_NESTED_REFERRALS 2 + +static int nfs_referral_loop_protect(void) +{ + struct nfs_referral_count *p, *new; + int ret = -ENOMEM; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out; + new->task = current; + new->referral_count = 1; + + ret = 0; + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + if (p != NULL) { + if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) + ret = -ELOOP; + else + p->referral_count++; + } else { + list_add(&new->list, &nfs_referral_count_list); + new = NULL; + } + spin_unlock(&nfs_referral_count_list_lock); + kfree(new); +out: + return ret; +} + +static void nfs_referral_loop_unprotect(void) +{ + struct nfs_referral_count *p; + + spin_lock(&nfs_referral_count_list_lock); + p = nfs_find_referral_count(); + p->referral_count--; + if (p->referral_count == 0) + list_del(&p->list); + else + p = NULL; + spin_unlock(&nfs_referral_count_list_lock); + kfree(p); +} + +static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path) +{ + struct dentry *dentry; + int err; + + if (IS_ERR(root_mnt)) + return ERR_CAST(root_mnt); + + err = nfs_referral_loop_protect(); + if (err) { + mntput(root_mnt); + return ERR_PTR(err); + } + + dentry = mount_subtree(root_mnt, export_path); + nfs_referral_loop_unprotect(); + + return dentry; +} + +struct dentry *nfs4_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info) +{ + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + struct nfs_parsed_mount_data *data = mount_info->parsed; + + dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + + mount_info->fill_super = nfs4_fill_super; + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + + dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + +/* + * Clone an NFS4 server record on xdev traversal (FSID-change) + */ +static struct dentry * +nfs4_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; + return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); +} + +static struct dentry * +nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct nfs_mount_info mount_info = { + .fill_super = nfs4_fill_super, + .set_security = nfs_clone_sb_security, + .cloned = raw_data, + }; + struct nfs_server *server; + struct dentry *mntroot = ERR_PTR(-ENOMEM); + + dprintk("--> nfs4_referral_get_sb()\n"); + + mount_info.mntfh = nfs_alloc_fhandle(); + if (mount_info.cloned == NULL || mount_info.mntfh == NULL) + goto out; + + /* create a new volume representation */ + server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); + if (IS_ERR(server)) { + mntroot = ERR_CAST(server); + goto out; + } + + mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); +out: + nfs_free_fhandle(mount_info.mntfh); + return mntroot; +} + +/* + * Create an NFS4 server record on referral traversal + */ +static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + struct dentry *res; + + dprintk("--> nfs4_referral_mount()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + res = nfs_follow_remote_path(root_mnt, export_path); + dprintk("<-- nfs4_referral_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); + return res; +} + + int __init init_nfs_v4(void) { int err; @@ -18,7 +339,13 @@ int __init init_nfs_v4(void) if (err) goto out1; + err = register_filesystem(&nfs4_fs_type); + if (err < 0) + goto out2; + return 0; +out2: + nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -27,6 +354,7 @@ out: void __exit exit_nfs_v4(void) { + unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ca3c0e8cf77..95866a8c21b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -278,29 +278,8 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_err, NULL } }; -struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); - struct nfs_parsed_mount_data *parsed; - struct nfs_clone_mount *cloned; - struct nfs_fh *mntfh; -}; - -static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct dentry *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct dentry *); -static int nfs_show_devname(struct seq_file *, struct dentry *); -static int nfs_show_path(struct seq_file *, struct dentry *); -static int nfs_show_stats(struct seq_file *, struct dentry *); -static struct dentry *nfs_fs_mount_common(struct file_system_type *, - struct nfs_server *, int, const char *, struct nfs_mount_info *); -static struct dentry *nfs_fs_mount(struct file_system_type *, - int, const char *, void *); static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static void nfs_put_super(struct super_block *); -static void nfs_kill_super(struct super_block *); -static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, @@ -337,71 +316,6 @@ static const struct super_operations nfs_sops = { static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static const struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs4_write_inode, - .put_super = nfs_put_super, - .statfs = nfs_statfs, - .evict_inode = nfs4_evict_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_devname = nfs_show_devname, - .show_path = nfs_show_path, - .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, -}; #endif static struct shrinker acl_shrinker = { @@ -423,18 +337,9 @@ int __init register_nfs_fs(void) ret = nfs_register_sysctl(); if (ret < 0) goto error_1; -#ifdef CONFIG_NFS_V4 - ret = register_filesystem(&nfs4_fs_type); - if (ret < 0) - goto error_2; -#endif register_shrinker(&acl_shrinker); return 0; -#ifdef CONFIG_NFS_V4 -error_2: - nfs_unregister_sysctl(); -#endif error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -447,9 +352,6 @@ error_0: void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); -#ifdef CONFIG_NFS_V4 - unregister_filesystem(&nfs4_fs_type); -#endif nfs_unregister_sysctl(); unregister_filesystem(&nfs_fs_type); } @@ -473,7 +375,7 @@ void nfs_sb_deactive(struct super_block *sb) /* * Deliver file system statistics to userspace */ -static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) +int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct nfs_server *server = NFS_SB(dentry->d_sb); unsigned char blockbits; @@ -756,7 +658,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, /* * Describe the mount options on this VFS mountpoint */ -static int nfs_show_options(struct seq_file *m, struct dentry *root) +int nfs_show_options(struct seq_file *m, struct dentry *root) { struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -814,7 +716,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } #endif -static int nfs_show_devname(struct seq_file *m, struct dentry *root) +int nfs_show_devname(struct seq_file *m, struct dentry *root) { char *page = (char *) __get_free_page(GFP_KERNEL); char *devname, *dummy; @@ -830,7 +732,7 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root) return err; } -static int nfs_show_path(struct seq_file *m, struct dentry *dentry) +int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; @@ -839,7 +741,7 @@ static int nfs_show_path(struct seq_file *m, struct dentry *dentry) /* * Present statistical information for this VFS mountpoint */ -static int nfs_show_stats(struct seq_file *m, struct dentry *root) +int nfs_show_stats(struct seq_file *m, struct dentry *root) { int i, cpu; struct nfs_server *nfss = NFS_SB(root->d_sb); @@ -932,7 +834,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) * Begin unmount by attempting to remove all automounted mountpoints we added * in response to xdev traversals and referrals */ -static void nfs_umount_begin(struct super_block *sb) +void nfs_umount_begin(struct super_block *sb) { struct nfs_server *server; struct rpc_clnt *rpc; @@ -2107,7 +2009,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, return 0; } -static int +int nfs_remount(struct super_block *sb, int *flags, char *raw_data) { int error; @@ -2172,7 +2074,7 @@ out: /* * Initialise the common bits of the superblock */ -static inline void nfs_initialise_sb(struct super_block *sb) +inline void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2194,8 +2096,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -static void nfs_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) { struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); @@ -2220,8 +2121,7 @@ static void nfs_fill_super(struct super_block *sb, /* * Finish setting up a cloned NFS2/3/4 superblock */ -static void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) +void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2381,14 +2281,14 @@ static int nfs_bdi_register(struct nfs_server *server) return bdi_register_dev(&server->backing_dev_info, server->s_dev); } -static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } -static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) +int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, + struct nfs_mount_info *mount_info) { /* clone any lsm security options from the parent to the new sb */ security_sb_clone_mnt_opts(mount_info->cloned->sb, s); @@ -2397,10 +2297,10 @@ static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return 0; } -static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, - int flags, const char *dev_name, - struct nfs_mount_info *mount_info) +struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, + struct nfs_server *server, + int flags, const char *dev_name, + struct nfs_mount_info *mount_info) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2470,7 +2370,7 @@ error_splat_bdi: goto out; } -static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, +struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { @@ -2511,7 +2411,7 @@ out: * Ensure that we unregister the bdi before kill_anon_super * releases the device name */ -static void nfs_put_super(struct super_block *s) +void nfs_put_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); @@ -2521,7 +2421,7 @@ static void nfs_put_super(struct super_block *s) /* * Destroy an NFS2/3 superblock */ -static void nfs_kill_super(struct super_block *s) +void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); @@ -2533,7 +2433,7 @@ static void nfs_kill_super(struct super_block *s) /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ -static struct dentry * +struct dentry * nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, const char *dev_name, struct nfs_mount_info *mount_info) { @@ -2580,23 +2480,6 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, #ifdef CONFIG_NFS_V4 -/* - * Set up an NFS4 superblock - */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); -} - static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| @@ -2694,238 +2577,4 @@ out_no_address: return -EINVAL; } -/* - * Get the superblock for the NFS4 root partition - */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - struct nfs_mount_info *mount_info = info; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - mount_info->fill_super = nfs4_fill_super; - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); - -out: - return mntroot; -} - -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; - - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) - return ERR_PTR(-ENOMEM); - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); - kfree(root_devname); - return root_mnt; -} - -struct nfs_referral_count { - struct list_head list; - const struct task_struct *task; - unsigned int referral_count; -}; - -static LIST_HEAD(nfs_referral_count_list); -static DEFINE_SPINLOCK(nfs_referral_count_list_lock); - -static struct nfs_referral_count *nfs_find_referral_count(void) -{ - struct nfs_referral_count *p; - - list_for_each_entry(p, &nfs_referral_count_list, list) { - if (p->task == current) - return p; - } - return NULL; -} - -#define NFS_MAX_NESTED_REFERRALS 2 - -static int nfs_referral_loop_protect(void) -{ - struct nfs_referral_count *p, *new; - int ret = -ENOMEM; - - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (!new) - goto out; - new->task = current; - new->referral_count = 1; - - ret = 0; - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - if (p != NULL) { - if (p->referral_count >= NFS_MAX_NESTED_REFERRALS) - ret = -ELOOP; - else - p->referral_count++; - } else { - list_add(&new->list, &nfs_referral_count_list); - new = NULL; - } - spin_unlock(&nfs_referral_count_list_lock); - kfree(new); -out: - return ret; -} - -static void nfs_referral_loop_unprotect(void) -{ - struct nfs_referral_count *p; - - spin_lock(&nfs_referral_count_list_lock); - p = nfs_find_referral_count(); - p->referral_count--; - if (p->referral_count == 0) - list_del(&p->list); - else - p = NULL; - spin_unlock(&nfs_referral_count_list_lock); - kfree(p); -} - -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) -{ - struct dentry *dentry; - int err; - - if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); - - err = nfs_referral_loop_protect(); - if (err) { - mntput(root_mnt); - return ERR_PTR(err); - } - - dentry = mount_subtree(root_mnt, export_path); - nfs_referral_loop_unprotect(); - - return dentry; -} - -static struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) -{ - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - struct nfs_parsed_mount_data *data = mount_info->parsed; - - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - - mount_info->fill_super = nfs4_fill_super; - - export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); - data->nfs_server.export_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - - dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} - -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); - - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; - - /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} - -/* - * Create an NFS4 server record on referral traversal - */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - - dprintk("--> nfs4_referral_mount()\n"); - - export_path = data->mnt_path; - data->mnt_path = "/"; - - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); - data->mnt_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); - return res; -} - #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From ec409897e7c71596cc080135ef5f86b81a0e9813 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 16 Jul 2012 16:39:21 -0400 Subject: NFS: Split out NFS v4 client functions These functions are only needed by NFS v4, so they can be moved into a v4 specific file. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 93 ----------------------------------------------------- fs/nfs/nfs4client.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 93 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0d50629d9e2..65afa382c5e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -56,30 +56,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); -#ifdef CONFIG_NFS_V4 - -/* - * Get a unique NFSv4.0 callback identifier which will be used - * by the V4.0 callback service to lookup the nfs_client struct - */ -static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) -{ - int ret = 0; - struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - - if (clp->rpc_ops->version != 4 || minorversion != 0) - return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; - spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; -} -#endif /* CONFIG_NFS_V4 */ /* * RPC cruft for NFS @@ -175,75 +151,6 @@ error_0: } #ifdef CONFIG_NFS_V4 -#ifdef CONFIG_NFS_V4_1 -static void nfs4_shutdown_session(struct nfs_client *clp) -{ - if (nfs4_has_session(clp)) { - nfs4_destroy_session(clp->cl_session); - nfs4_destroy_clientid(clp); - } - -} -#else /* CONFIG_NFS_V4_1 */ -static void nfs4_shutdown_session(struct nfs_client *clp) -{ -} -#endif /* CONFIG_NFS_V4_1 */ - -struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) -{ - int err; - struct nfs_client *clp = nfs_alloc_client(cl_init); - if (IS_ERR(clp)) - return clp; - - err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); - if (err) - goto error; - - spin_lock_init(&clp->cl_lock); - INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); - clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; - clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; - return clp; - -error: - kfree(clp); - return ERR_PTR(err); -} - -/* - * Destroy the NFS4 callback service - */ -static void nfs4_destroy_callback(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version); -} - -static void nfs4_shutdown_client(struct nfs_client *clp) -{ - if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) - nfs4_kill_renewd(clp); - nfs4_shutdown_session(clp); - nfs4_destroy_callback(clp); - if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) - nfs_idmap_delete(clp); - - rpc_destroy_wait_queue(&clp->cl_rpcwaitq); - kfree(clp->cl_serverowner); - kfree(clp->cl_serverscope); - kfree(clp->cl_implid); -} - -void nfs4_free_client(struct nfs_client *clp) -{ - nfs4_shutdown_client(clp); - nfs_free_client(clp); -} - /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index a71d95ecbea..1c3f13c8e47 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -22,6 +22,97 @@ */ static bool nfs4_disable_idmapping = true; +/* + * Get a unique NFSv4.0 callback identifier which will be used + * by the V4.0 callback service to lookup the nfs_client struct + */ +static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) +{ + int ret = 0; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + + if (clp->rpc_ops->version != 4 || minorversion != 0) + return ret; +retry: + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) + return -ENOMEM; + spin_lock(&nn->nfs_client_lock); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nn->nfs_client_lock); + if (ret == -EAGAIN) + goto retry; + return ret; +} + +#ifdef CONFIG_NFS_V4_1 +static void nfs4_shutdown_session(struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + nfs4_destroy_clientid(clp); + } + +} +#else /* CONFIG_NFS_V4_1 */ +static void nfs4_shutdown_session(struct nfs_client *clp) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) +{ + int err; + struct nfs_client *clp = nfs_alloc_client(cl_init); + if (IS_ERR(clp)) + return clp; + + err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); + if (err) + goto error; + + spin_lock_init(&clp->cl_lock); + INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); + clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; + clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + return clp; + +error: + kfree(clp); + return ERR_PTR(err); +} + +/* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_mvops->minor_version); +} + +static void nfs4_shutdown_client(struct nfs_client *clp) +{ + if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) + nfs4_kill_renewd(clp); + nfs4_shutdown_session(clp); + nfs4_destroy_callback(clp); + if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) + nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); + kfree(clp->cl_serverowner); + kfree(clp->cl_serverscope); + kfree(clp->cl_implid); +} + +void nfs4_free_client(struct nfs_client *clp) +{ + nfs4_shutdown_client(clp); + nfs_free_client(clp); +} + /* * Initialize the NFS4 callback service */ -- cgit v1.2.3 From bb6e071f845d32545e3e943058012f328a2e95ad Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 17 Jul 2012 15:18:30 -0400 Subject: NFS: exit_nfs_v4() shouldn't be an __exit function ... yet. Right now, init_nfs() is calling this function if an error is encountered when loading the nfs module. An __exit function can't be called from one declared as __init. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2af26913884..59264fb335c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -352,7 +352,7 @@ out: return err; } -void __exit exit_nfs_v4(void) +void exit_nfs_v4(void) { unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); -- cgit v1.2.3 From 9695c7057f4887ed54dc1e6c2ef22f72a2be1175 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 25 Jul 2012 16:57:06 +0400 Subject: SUNRPC: service request network namespace helper introduced This is a cleanup patch - makes code looks simplier. It replaces widely used rqstp->rq_xprt->xpt_net by introduced SVC_NET(rqstp). Signed-off-by: Stanislav Kinsbursky Signed-off-by: J. Bruce Fields --- fs/nfs/callback_xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e64b01d2a33..742ff4ffced 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -863,7 +863,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r .drc_status = 0, .clp = NULL, .slotid = NFS4_NO_SLOT, - .net = rqstp->rq_xprt->xpt_net, + .net = SVC_NET(rqstp), }; unsigned int nops = 0; @@ -879,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } -- cgit v1.2.3 From 0add3e8567a42b8137e26c0595a59f893d8592e0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 27 Jul 2012 11:49:26 -0700 Subject: nfs: fix stub return type warnings Fix numerous repeated warnings by making the stub function void instead of non-void: fs/nfs/nfs4_fs.h: In function 'nfs4_unregister_sysctl': fs/nfs/nfs4_fs.h:385:1: warning: no return statement in function returning non-void Signed-off-by: Randy Dunlap Cc: Trond Myklebust Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b1ecacd8784..5511690de8a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -380,7 +380,7 @@ static inline int nfs4_register_sysctl(void) return 0; } -static inline int nfs4_unregister_sysctl(void) +static inline void nfs4_unregister_sysctl(void) { } #endif -- cgit v1.2.3 From 59948db3be76099b14c7103d0f46c5454b173c3a Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:49 -0400 Subject: NFS: fix pnfs regression with directio reads Commit 1abb50886af "NFS: Create an read_pageio_init() function" did not modify the call in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 48253372ab1..69f20c73220 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -393,7 +393,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_read(&desc, dreq->inode, + NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; -- cgit v1.2.3 From c95908e4c50d218f016e3866f5abf786055df635 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 18 Jul 2012 14:20:50 -0400 Subject: NFS: fix pnfs regression with directio writes Commit 57208fa7e51 "NFS: Create an write_pageio_init() function" did not modify the calls in direct.c, preventing direct io from using pnfs. This reintroduces that capability. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 69f20c73220..42dce909ec7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -478,7 +478,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -782,7 +782,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, + NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); -- cgit v1.2.3 From f44106e2173f08ccb1c9195d85a6c22388b461c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 15:49:56 -0400 Subject: nfs: fix fl_type tests in NFSv4 code fl_type is not a bitmap. Reported-by: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e373c30e8d..6843e0a37de 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4885,7 +4885,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. */ - switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) { + switch (request->fl_type) { case F_RDLCK: if (!(filp->f_mode & FMODE_READ)) return -EBADF; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 610ebccbde5..6cbd602e26d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1236,7 +1236,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct static inline int nfs4_lock_type(struct file_lock *fl, int block) { - if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + if (fl->fl_type == F_RDLCK) return block ? NFS4_READW_LT : NFS4_READ_LT; return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; } -- cgit v1.2.3 From 159e0561e322dd8008fff59e36efff8d2bdd0b0e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 29 May 2012 13:57:58 +0800 Subject: pnfsblock: bail out partial page IO Current block layout driver read/write code assumes page aligned IO in many places. Add a checker to validate the assumption. Otherwise there would be data corruption like when application does open(O_WRONLY) and page unaliged write. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7ae8a608956..dd392ed5f2e 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -228,6 +228,14 @@ bl_end_par_io_read(void *data, int unused) schedule_work(&rdata->task.u.tk_work); } +static bool +bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) +{ + if ((offset & blkmask) || (len & blkmask)) + return false; + return true; +} + static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -244,6 +252,9 @@ bl_read_pagelist(struct nfs_read_data *rdata) dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) + goto use_mds; + par = alloc_parallel(rdata); if (!par) goto use_mds; @@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par; + struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; struct page **pages = wdata->args.pages; @@ -563,6 +574,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + /* Check for alignment first */ + if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) + goto out_mds; + /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -996,14 +1011,32 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static void +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); +} + +static void +bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + nfs_pageio_reset_write_mds(pgio); + else + pnfs_generic_pg_init_write(pgio, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = bl_pg_init_read, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = bl_pg_init_write, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From 5cf02d09b50b1ee1c2d536c9cf64af5a7d433f56 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 13:58:51 -0400 Subject: nfs: skip commit in releasepage if we're freeing memory for fs-related reasons We've had some reports of a deadlock where rpciod ends up with a stack trace like this: PID: 2507 TASK: ffff88103691ab40 CPU: 14 COMMAND: "rpciod/14" #0 [ffff8810343bf2f0] schedule at ffffffff814dabd9 #1 [ffff8810343bf3b8] nfs_wait_bit_killable at ffffffffa038fc04 [nfs] #2 [ffff8810343bf3c8] __wait_on_bit at ffffffff814dbc2f #3 [ffff8810343bf418] out_of_line_wait_on_bit at ffffffff814dbcd8 #4 [ffff8810343bf488] nfs_commit_inode at ffffffffa039e0c1 [nfs] #5 [ffff8810343bf4f8] nfs_release_page at ffffffffa038bef6 [nfs] #6 [ffff8810343bf528] try_to_release_page at ffffffff8110c670 #7 [ffff8810343bf538] shrink_page_list.clone.0 at ffffffff81126271 #8 [ffff8810343bf668] shrink_inactive_list at ffffffff81126638 #9 [ffff8810343bf818] shrink_zone at ffffffff8112788f #10 [ffff8810343bf8c8] do_try_to_free_pages at ffffffff81127b1e #11 [ffff8810343bf958] try_to_free_pages at ffffffff8112812f #12 [ffff8810343bfa08] __alloc_pages_nodemask at ffffffff8111fdad #13 [ffff8810343bfb28] kmem_getpages at ffffffff81159942 #14 [ffff8810343bfb58] fallback_alloc at ffffffff8115a55a #15 [ffff8810343bfbd8] ____cache_alloc_node at ffffffff8115a2d9 #16 [ffff8810343bfc38] kmem_cache_alloc at ffffffff8115b09b #17 [ffff8810343bfc78] sk_prot_alloc at ffffffff81411808 #18 [ffff8810343bfcb8] sk_alloc at ffffffff8141197c #19 [ffff8810343bfce8] inet_create at ffffffff81483ba6 #20 [ffff8810343bfd38] __sock_create at ffffffff8140b4a7 #21 [ffff8810343bfd98] xs_create_sock at ffffffffa01f649b [sunrpc] #22 [ffff8810343bfdd8] xs_tcp_setup_socket at ffffffffa01f6965 [sunrpc] #23 [ffff8810343bfe38] worker_thread at ffffffff810887d0 #24 [ffff8810343bfee8] kthread at ffffffff8108dd96 #25 [ffff8810343bff48] kernel_thread at ffffffff8100c1ca rpciod is trying to allocate memory for a new socket to talk to the server. The VM ends up calling ->releasepage to get more memory, and it tries to do a blocking commit. That commit can't succeed however without a connected socket, so we deadlock. Fix this by setting PF_FSTRANS on the workqueue task prior to doing the socket allocation, and having nfs_release_page check for that flag when deciding whether to do a commit call. Also, set PF_FSTRANS unconditionally in rpc_async_schedule since that function can also do allocations sometimes. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 70d124a61b9..1b392542692 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -447,8 +447,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ -- cgit v1.2.3 From a427b9ec4eda8cd6e641ea24541d30b641fc3140 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Jul 2012 16:53:36 +0100 Subject: NFS: Fix a number of bugs in the idmapper Fix a number of bugs in the NFS idmapper code: (1) Only registered key types can be passed to the core keys code, so register the legacy idmapper key type. This is a requirement because the unregister function cleans up keys belonging to that key type so that there aren't dangling pointers to the module left behind - including the key->type pointer. (2) Rename the legacy key type. You can't have two key types with the same name, and (1) would otherwise require that. (3) complete_request_key() must be called in the error path of nfs_idmap_legacy_upcall(). (4) There is one idmap struct for each nfs_client struct. This means that idmap->idmap_key_cons is shared without the use of a lock. This is a problem because key_instantiate_and_link() - as called indirectly by idmap_pipe_downcall() - releases anyone waiting for the key to be instantiated. What happens is that idmap_pipe_downcall() running in the rpc.idmapd thread, releases the NFS filesystem in whatever thread that is running in to continue. This may then make another idmapper call, overwriting idmap_key_cons before idmap_pipe_downcall() gets the chance to call complete_request_key(). I *think* that reading idmap_key_cons only once, before key_instantiate_and_link() is called, and then caching the result in a variable is sufficient. Bug (4) is the cause of: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 0 Oops: 0010 [#1] SMP CPU 1 Modules linked in: ppdev parport_pc lp parport ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack nfs fscache xt_CHECKSUM auth_rpcgss iptable_mangle nfs_acl bridge stp llc lockd be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi snd_hda_codec_realtek snd_usb_audio snd_hda_intel snd_hda_codec snd_seq snd_pcm snd_hwdep snd_usbmidi_lib snd_rawmidi snd_timer uvcvideo videobuf2_core videodev media videobuf2_vmalloc snd_seq_device videobuf2_memops e1000e vhost_net iTCO_wdt joydev coretemp snd soundcore macvtap macvlan i2c_i801 snd_page_alloc tun iTCO_vendor_support microcode kvm_intel kvm sunrpc hid_logitech_dj usb_storage i915 drm_kms_helper drm i2c_algo_bit i2c_core video [last unloaded: scsi_wait_scan] Pid: 1229, comm: rpc.idmapd Not tainted 3.4.2-1.fc16.x86_64 #1 Gateway DX4710-UB801A/G33M05G1 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff8801a3645d40 EFLAGS: 00010246 RAX: ffff880077707e30 RBX: ffff880077707f50 RCX: ffff8801a18ccd80 RDX: 0000000000000006 RSI: ffff8801a3645e75 RDI: ffff880077707f50 RBP: ffff8801a3645d88 R08: ffff8801a430f9c0 R09: ffff8801a3645db0 R10: 000000000000000a R11: 0000000000000246 R12: ffff8801a18ccd80 R13: ffff8801a3645e75 R14: ffff8801a430f9c0 R15: 0000000000000006 FS: 00007fb6fb51a700(0000) GS:ffff8801afc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001a49b0000 CR4: 00000000000027e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rpc.idmapd (pid: 1229, threadinfo ffff8801a3644000, task ffff8801a3bf9710) Stack: ffffffff81260878 ffff8801a3645db0 ffff8801a3645db0 ffff880077707a90 ffff880077707f50 ffff8801a18ccd80 0000000000000006 ffff8801a3645e75 ffff8801a430f9c0 ffff8801a3645dd8 ffffffff81260983 ffff8801a3645de8 Call Trace: [] ? __key_instantiate_and_link+0x58/0x100 [] key_instantiate_and_link+0x63/0xa0 [] idmap_pipe_downcall+0x1cb/0x1e0 [nfs] [] rpc_pipe_write+0x67/0x90 [sunrpc] [] vfs_write+0xb3/0x180 [] sys_write+0x4a/0x90 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: David Howells Reviewed-by: Steve Dickson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.4] --- fs/nfs/idmap.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 864c51e4b40..1b5058b4043 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -205,12 +205,18 @@ static int nfs_idmap_init_keyring(void) if (ret < 0) goto failed_put_key; + ret = register_key_type(&key_type_id_resolver_legacy); + if (ret < 0) + goto failed_reg_legacy; + set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; id_resolver_cache = cred; return 0; +failed_reg_legacy: + unregister_key_type(&key_type_id_resolver); failed_put_key: key_put(keyring); failed_put_cred: @@ -222,6 +228,7 @@ static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); + unregister_key_type(&key_type_id_resolver_legacy); put_cred(id_resolver_cache); } @@ -385,7 +392,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = { }; static struct key_type key_type_id_resolver_legacy = { - .name = "id_resolver", + .name = "id_legacy", .instantiate = user_instantiate, .match = user_match, .revoke = user_revoke, @@ -674,6 +681,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; + BUG_ON(idmap->idmap_key_cons != NULL); idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); @@ -687,8 +695,7 @@ out2: out1: kfree(msg); out0: - key_revoke(cons->key); - key_revoke(cons->authkey); + complete_request_key(cons, ret); return ret; } @@ -722,11 +729,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons; struct idmap_msg im; size_t namelen_in; int ret; + /* If instantiation is successful, anyone waiting for key construction + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + if (mlen != sizeof(im)) { ret = -ENOSPC; goto out; @@ -739,7 +753,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { ret = mlen; - complete_request_key(idmap->idmap_key_cons, -ENOKEY); + complete_request_key(cons, -ENOKEY); goto out_incomplete; } @@ -756,7 +770,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } out: - complete_request_key(idmap->idmap_key_cons, ret); + complete_request_key(cons, ret); out_incomplete: return ret; } -- cgit v1.2.3 From ab7017a3a0a64b953e091619c30413b3721d925d Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:16 -0400 Subject: NFS: Add version registering framework This patch adds in the code to track multiple versions of the NFS protocol. I created default structures for v2, v3 and v4 so that each version can continue to work while I convert them into kernel modules. I also removed the const parameter from the rpc_version array so that I can change it at runtime. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 4 +- fs/nfs/client.c | 147 +++++++++++++++++++++++++++++++++++++++------------- fs/nfs/inode.c | 9 ++-- fs/nfs/internal.h | 10 ++-- fs/nfs/nfs.h | 72 +++++++++++++++++++++++++ fs/nfs/nfs2super.c | 25 +++++++++ fs/nfs/nfs3super.c | 25 +++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4client.c | 4 +- fs/nfs/nfs4super.c | 14 ++++- fs/nfs/super.c | 32 ++++++++---- 11 files changed, 282 insertions(+), 61 deletions(-) create mode 100644 fs/nfs/nfs.h create mode 100644 fs/nfs/nfs2super.c create mode 100644 fs/nfs/nfs3super.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 0b96c203834..66dd3075e5d 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,8 +9,8 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o +nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 65afa382c5e..462de24482b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -51,25 +51,23 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); +static DEFINE_SPINLOCK(nfs_version_lock); +static DEFINE_MUTEX(nfs_version_mutex); +static LIST_HEAD(nfs_versions); /* * RPC cruft for NFS */ static const struct rpc_version *nfs_version[5] = { -#ifdef CONFIG_NFS_V2 - [2] = &nfs_version2, -#endif -#ifdef CONFIG_NFS_V3 - [3] = &nfs_version3, -#endif -#ifdef CONFIG_NFS_V4 - [4] = &nfs_version4, -#endif + [2] = NULL, + [3] = NULL, + [4] = NULL, }; const struct rpc_program nfs_program = { @@ -101,6 +99,93 @@ const struct rpc_program nfsacl_program = { }; #endif /* CONFIG_NFS_V3_ACL */ +static struct nfs_subversion *find_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs; + spin_lock(&nfs_version_lock); + + list_for_each_entry(nfs, &nfs_versions, list) { + if (nfs->rpc_ops->version == version) { + spin_unlock(&nfs_version_lock); + return nfs; + } + }; + + spin_unlock(&nfs_version_lock); + return ERR_PTR(-EPROTONOSUPPORT);; +} + +struct nfs_subversion *get_nfs_version(unsigned int version) +{ + struct nfs_subversion *nfs = find_nfs_version(version); + + if (IS_ERR(nfs)) { + mutex_lock(&nfs_version_mutex); + request_module("nfs%d", version); + nfs = find_nfs_version(version); + mutex_unlock(&nfs_version_mutex); + } + + if (!IS_ERR(nfs)) + try_module_get(nfs->owner); + return nfs; +} + +void put_nfs_version(struct nfs_subversion *nfs) +{ + module_put(nfs->owner); +} + +void register_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + list_add(&nfs->list, &nfs_versions); + nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers; + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(register_nfs_version); + +void unregister_nfs_version(struct nfs_subversion *nfs) +{ + spin_lock(&nfs_version_lock); + + nfs_version[nfs->rpc_ops->version] = NULL; + list_del(&nfs->list); + + spin_unlock(&nfs_version_lock); +} +EXPORT_SYMBOL_GPL(unregister_nfs_version); + +/* + * Preload all configured NFS versions during module init. + * This function should be edited after each protocol is converted, + * and eventually removed. + */ +int __init nfs_register_versions(void) +{ + int err = init_nfs_v2(); + if (err) + return err; + + err = init_nfs_v3(); + if (err) + return err; + + return init_nfs_v4(); +} + +/* + * Remove each pre-loaded NFS version + */ +void nfs_unregister_versions(void) +{ + exit_nfs_v2(); + exit_nfs_v3(); + exit_nfs_v4(); +} + /* * Allocate a shared client record * @@ -116,7 +201,10 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; - clp->rpc_ops = cl_init->rpc_ops; + clp->cl_nfs_mod = cl_init->nfs_mod; + try_module_get(clp->cl_nfs_mod->owner); + + clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -145,6 +233,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error_cleanup: + put_nfs_version(clp->cl_nfs_mod); kfree(clp); error_0: return ERR_PTR(err); @@ -205,6 +294,7 @@ void nfs_free_client(struct nfs_client *clp) put_rpccred(clp->cl_machine_cred); put_net(clp->cl_net); + put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp); @@ -362,7 +452,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Different NFS versions cannot share the same nfs_client */ - if (clp->rpc_ops != data->rpc_ops) + if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; if (clp->cl_proto != data->proto) @@ -431,9 +521,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); + const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", cl_init->rpc_ops->version); + cl_init->hostname ?: "", rpc_ops->version); /* see if the client already exists */ do { @@ -450,14 +541,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, list_add(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return cl_init->rpc_ops->init_client(new, - timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr, + authflavour); } spin_unlock(&nn->nfs_client_lock); - new = cl_init->rpc_ops->alloc_client(cl_init); + new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", @@ -714,13 +804,14 @@ error: * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + const struct nfs_parsed_mount_data *data, + struct nfs_subversion *nfs_mod) { struct nfs_client_initdata cl_init = { .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, - .rpc_ops = NULL, + .nfs_mod = nfs_mod, .proto = data->nfs_server.protocol, .net = data->net, }; @@ -730,21 +821,6 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); - switch (data->version) { -#ifdef CONFIG_NFS_V2 - case 2: - cl_init.rpc_ops = &nfs_v2_clientops; - break; -#endif -#ifdef CONFIG_NFS_V3 - case 3: - cl_init.rpc_ops = &nfs_v3_clientops; - break; -#endif - default: - return -EPROTONOSUPPORT; - } - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) @@ -1033,7 +1109,8 @@ void nfs_free_server(struct nfs_server *server) * - keyed on server and FSID */ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; struct nfs_fattr *fattr; @@ -1049,7 +1126,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data); + error = nfs_init_server(server, data, nfs_mod); if (error < 0) goto error; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 35f7e4bc680..e8877c82582 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -50,6 +50,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "nfs.h" #include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -1671,21 +1672,17 @@ static int __init init_nfs_fs(void) rpc_proc_register(&init_net, &nfs_rpcstat); #endif -#ifdef CONFIG_NFS_V4 - err = init_nfs_v4(); + err = nfs_register_versions(); if (err) goto out1; -#endif if ((err = register_nfs_fs()) != 0) goto out0; return 0; out0: -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); + nfs_unregister_versions(); out1: -#endif #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cfafd13b6fe..ac936476b3b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -90,7 +90,7 @@ struct nfs_client_initdata { const char *hostname; const struct sockaddr *addr; size_t addrlen; - const struct nfs_rpc_ops *rpc_ops; + struct nfs_subversion *nfs_mod; int proto; u32 minorversion; struct net *net; @@ -189,7 +189,8 @@ nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_fh *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -321,6 +322,7 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type nfs4_xdev_fs_type; @@ -329,8 +331,8 @@ extern struct file_system_type nfs4_referral_fs_type; void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct file_system_type *, struct nfs_server *, - int, const char *, struct nfs_mount_info *); +struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, + struct nfs_mount_info *, struct nfs_subversion *); struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, const char *, struct nfs_mount_info *); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h new file mode 100644 index 00000000000..ac10b9e6c92 --- /dev/null +++ b/fs/nfs/nfs.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + * + * Function and structures exported by the NFS module + * for use by NFS version-specific modules. + */ +#ifndef __LINUX_INTERNAL_NFS_H +#define __LINUX_INTERNAL_NFS_H + +#include +#include +#include + +struct nfs_subversion { + struct module *owner; /* THIS_MODULE pointer */ + struct file_system_type *nfs_fs; /* NFS filesystem type */ + const struct rpc_version *rpc_vers; /* NFS version information */ + const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + struct list_head list; /* List of NFS versions */ +}; + +int nfs_register_versions(void); +void nfs_unregister_versions(void); + +#ifdef CONFIG_NFS_V2 +int init_nfs_v2(void); +void exit_nfs_v2(void); +#else /* CONFIG_NFS_V2 */ +static inline int __init init_nfs_v2(void) +{ + return 0; +} + +static inline void exit_nfs_v2(void) +{ +} +#endif /* CONFIG_NFS_V2 */ + +#ifdef CONFIG_NFS_V3 +int init_nfs_v3(void); +void exit_nfs_v3(void); +#else /* CONFIG_NFS_V3 */ +static inline int __init init_nfs_v3(void) +{ + return 0; +} + +static inline void exit_nfs_v3(void) +{ +} +#endif /* CONFIG_NFS_V3 */ + +#ifdef CONFIG_NFS_V4 +int init_nfs_v4(void); +void exit_nfs_v4(void); +#else /* CONFIG_NFS_V4 */ +static inline int __init init_nfs_v4(void) +{ + return 0; +} + +static inline void exit_nfs_v4(void) +{ +} +#endif /* CONFIG_NFS_V4 */ + +struct nfs_subversion *get_nfs_version(unsigned int); +void put_nfs_version(struct nfs_subversion *); +void register_nfs_version(struct nfs_subversion *); +void unregister_nfs_version(struct nfs_subversion *); + +#endif /* __LINUX_INTERNAL_NFS_H */ diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c new file mode 100644 index 00000000000..cef06d42334 --- /dev/null +++ b/fs/nfs/nfs2super.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include +#include +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v2 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version2, + .rpc_ops = &nfs_v2_clientops, +}; + +int __init init_nfs_v2(void) +{ + register_nfs_version(&nfs_v2); + return 0; +} + +void exit_nfs_v2(void) +{ + unregister_nfs_version(&nfs_v2); +} diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c new file mode 100644 index 00000000000..f815cf359d9 --- /dev/null +++ b/fs/nfs/nfs3super.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 Netapp, Inc. All rights reserved. + */ +#include +#include +#include "internal.h" +#include "nfs.h" + +static struct nfs_subversion nfs_v3 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs_fs_type, + .rpc_vers = &nfs_version3, + .rpc_ops = &nfs_v3_clientops, +}; + +int __init init_nfs_v3(void) +{ + register_nfs_version(&nfs_v3); + return 0; +} + +void exit_nfs_v3(void) +{ + unregister_nfs_version(&nfs_v3); +} diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5511690de8a..99c2e7e4d3e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -366,6 +366,7 @@ extern const nfs4_stateid zero_stateid; /* nfs4super.c */ struct nfs_mount_info; +extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 1c3f13c8e47..769e798b395 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -357,7 +357,7 @@ static int nfs4_set_client(struct nfs_server *server, .hostname = hostname, .addr = addr, .addrlen = addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = proto, .minorversion = minorversion, .net = net, @@ -411,7 +411,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, struct nfs_client_initdata cl_init = { .addr = ds_addr, .addrlen = ds_addrlen, - .rpc_ops = &nfs_v4_clientops, + .nfs_mod = &nfs_v4, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, .net = mds_clp->cl_net, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 59264fb335c..1f3401902c2 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -8,6 +8,7 @@ #include #include "internal.h" #include "nfs4_fs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -75,6 +76,13 @@ static const struct super_operations nfs4_sops = { .remount_fs = nfs_remount, }; +struct nfs_subversion nfs_v4 = { + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, +}; + /* * Set up an NFS4 superblock */ @@ -113,7 +121,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, goto out; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); out: return mntroot; @@ -293,7 +301,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, goto out; } - mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); out: nfs_free_fhandle(mount_info.mntfh); return mntroot; @@ -343,6 +351,7 @@ int __init init_nfs_v4(void) if (err < 0) goto out2; + register_nfs_version(&nfs_v4); return 0; out2: nfs4_unregister_sysctl(); @@ -354,6 +363,7 @@ out: void exit_nfs_v4(void) { + unregister_nfs_version(&nfs_v4); unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 95866a8c21b..61405a7a6b3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -64,6 +64,7 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 @@ -281,7 +282,7 @@ static match_table_t nfs_vers_tokens = { static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs_fs_type = { +struct file_system_type nfs_fs_type = { .owner = THIS_MODULE, .name = "nfs", .mount = nfs_fs_mount, @@ -1650,7 +1651,8 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, } static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -1662,11 +1664,11 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs_create_server(mount_info->parsed, mount_info->mntfh, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); - return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info); + return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } /* @@ -2297,10 +2299,10 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return 0; } -struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, - struct nfs_server *server, +struct dentry *nfs_fs_mount_common(struct nfs_server *server, int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct super_block *s; struct dentry *mntroot = ERR_PTR(-ENOMEM); @@ -2319,7 +2321,7 @@ struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); + s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; @@ -2378,6 +2380,7 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, .set_security = nfs_set_sb_security, }; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod; int error; mount_info.parsed = nfs_alloc_parsed_mount_data(); @@ -2394,12 +2397,20 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } + nfs_mod = get_nfs_version(mount_info.parsed->version); + if (IS_ERR(nfs_mod)) { + mntroot = ERR_CAST(nfs_mod); + goto out; + } + #ifdef CONFIG_NFS_V4 if (mount_info.parsed->version == 4) mntroot = nfs4_try_mount(flags, dev_name, &mount_info); else #endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info); + mntroot = nfs_try_mount(flags, dev_name, &mount_info, nfs_mod); + + put_nfs_version(nfs_mod); out: nfs_free_parsed_mount_data(mount_info.parsed); @@ -2440,6 +2451,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, struct nfs_clone_mount *data = mount_info->cloned; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); + struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; int error; dprintk("--> nfs_xdev_mount_common()\n"); @@ -2453,7 +2465,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, goto out_err; } - mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info); + mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; -- cgit v1.2.3 From e8f25e6d6d198dca7d09d8fe2c24ba3b9683bb24 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:17 -0400 Subject: NFS: Remove the NFS v4 xdev mount function I can now share this code with the v2 and v3 code by using the NFS subversion structure. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 13 ------------- fs/nfs/nfs4super.c | 25 ------------------------- fs/nfs/super.c | 30 ++++++++++-------------------- 3 files changed, 10 insertions(+), 58 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 08b9c93675d..0f699fefee6 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -195,20 +195,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, const char *devname, struct nfs_clone_mount *mountdata) { -#ifdef CONFIG_NFS_V4 - struct vfsmount *mnt = ERR_PTR(-EINVAL); - switch (server->nfs_client->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); - break; - case 4: - mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata); - } - return mnt; -#else return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata); -#endif } /** diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1f3401902c2..8a505573c28 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -14,8 +14,6 @@ static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, @@ -37,14 +35,6 @@ static struct file_system_type nfs4_remote_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -struct file_system_type nfs4_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -261,21 +251,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, return res; } -/* - * Clone an NFS4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs4_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info); -} - static struct dentry * nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 61405a7a6b3..4faefa19a8c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2445,10 +2445,15 @@ void nfs_kill_super(struct super_block *s) * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ struct dentry * -nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, - const char *dev_name, struct nfs_mount_info *mount_info) +nfs_xdev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) { - struct nfs_clone_mount *data = mount_info->cloned; + struct nfs_clone_mount *data = raw_data; + struct nfs_mount_info mount_info = { + .fill_super = nfs_clone_super, + .set_security = nfs_clone_sb_security, + .cloned = data, + }; struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; @@ -2456,7 +2461,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, dprintk("--> nfs_xdev_mount_common()\n"); - mount_info->mntfh = data->fh; + mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); @@ -2465,7 +2470,7 @@ nfs_xdev_mount_common(struct file_system_type *fs_type, int flags, goto out_err; } - mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); + mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); dprintk("<-- nfs_xdev_mount_common() = 0\n"); out: return mntroot; @@ -2475,21 +2480,6 @@ out_err: goto out; } -/* - * Clone an NFS2/3 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info); -} - #ifdef CONFIG_NFS_V4 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) -- cgit v1.2.3 From ff9099f26645818563c8d396a154c2ce6ee422eb Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:18 -0400 Subject: NFS: Create a try_mount rpc op I'm already looking up the nfs subversion in nfs_fs_mount(), so I have easy access to rpc_ops that used to be difficult to reach. This allows me to set up a different mount path for NFS v2/3 and NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4super.c | 3 ++- fs/nfs/proc.c | 1 + fs/nfs/super.c | 14 ++++---------- 7 files changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ac936476b3b..3364eccd17e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -328,6 +328,8 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, + struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 65d23eb92fe..4f4cb8e4971 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -925,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 99c2e7e4d3e..c321fb59d80 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -367,7 +367,7 @@ extern const nfs4_stateid zero_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *); +struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6843e0a37de..eb4ba1d99df 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6870,6 +6870,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, + .try_mount = nfs4_try_mount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 8a505573c28..9384f666b6a 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -226,7 +226,8 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, } struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info) + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { char *export_path; struct vfsmount *root_mnt; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4d3356af330..ebb3d9c5227 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -774,6 +774,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, + .try_mount = nfs_try_mount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4faefa19a8c..5fca59d73e4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1650,9 +1650,9 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } -static struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct dentry *nfs_try_mount(int flags, const char *dev_name, + struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { int status; struct nfs_server *server; @@ -2403,15 +2403,9 @@ struct dentry *nfs_fs_mount(struct file_system_type *fs_type, goto out; } -#ifdef CONFIG_NFS_V4 - if (mount_info.parsed->version == 4) - mntroot = nfs4_try_mount(flags, dev_name, &mount_info); - else -#endif /* CONFIG_NFS_V4 */ - mntroot = nfs_try_mount(flags, dev_name, &mount_info, nfs_mod); + mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); put_nfs_version(nfs_mod); - out: nfs_free_parsed_mount_data(mount_info.parsed); nfs_free_fhandle(mount_info.mntfh); -- cgit v1.2.3 From 1179acc6a3e260bc4edc74fa94f6c7908290eaec Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:19 -0400 Subject: NFS: Only initialize the ACL client in the v3 case v2 and v4 don't use it, so I create two new nfs_rpc_ops functions to initialize the ACL client only when we are using v3. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/client.c | 61 +++++-------------------------------------------- fs/nfs/internal.h | 15 ++++++++----- fs/nfs/nfs3client.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 2 ++ fs/nfs/nfs4client.c | 10 +++++---- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4super.c | 2 +- fs/nfs/proc.c | 2 ++ fs/nfs/super.c | 4 ++-- 10 files changed, 96 insertions(+), 69 deletions(-) create mode 100644 fs/nfs/nfs3client.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 66dd3075e5d..7ca0125da65 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -10,7 +10,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o -nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 462de24482b..1f2908287cb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -83,22 +83,6 @@ struct rpc_stat nfs_rpcstat = { .program = &nfs_program }; - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static const struct rpc_version *nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -const struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ - static struct nfs_subversion *find_nfs_version(unsigned int version) { struct nfs_subversion *nfs; @@ -695,36 +679,6 @@ static int nfs_start_lockd(struct nfs_server *server) return 0; } -/* - * Initialise an NFSv3 ACL client connection - */ -#ifdef CONFIG_NFS_V3_ACL -static void nfs_init_server_aclclient(struct nfs_server *server) -{ - if (server->nfs_client->rpc_ops->version != 3) - goto out_noacl; - if (server->flags & NFS_MOUNT_NOACL) - goto out_noacl; - - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - if (IS_ERR(server->client_acl)) - goto out_noacl; - - /* No errors! Assume that Sun nfsacls are supported */ - server->caps |= NFS_CAP_ACLS; - return; - -out_noacl: - server->caps &= ~NFS_CAP_ACLS; -} -#else -static inline void nfs_init_server_aclclient(struct nfs_server *server) -{ - server->flags &= ~NFS_MOUNT_NOACL; - server->caps &= ~NFS_CAP_ACLS; -} -#endif - /* * Create a general RPC client */ @@ -874,8 +828,6 @@ static int nfs_init_server(struct nfs_server *server, server->mountd_protocol = data->mount_server.protocol; server->namelen = data->namlen; - /* Create a client RPC handle for the NFSv3 ACL management interface */ - nfs_init_server_aclclient(server); dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; @@ -1108,8 +1060,7 @@ void nfs_free_server(struct nfs_server *server) * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh, +struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { struct nfs_server *server; @@ -1126,7 +1077,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, goto error; /* Get a client representation */ - error = nfs_init_server(server, data, nfs_mod); + error = nfs_init_server(server, mount_info->parsed, nfs_mod); if (error < 0) goto error; @@ -1135,13 +1086,13 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mntfh, fattr); + error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -1149,7 +1100,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; @@ -1210,8 +1161,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, flavor); if (error < 0) goto out_free_server; - if (!IS_ERR(source->client_acl)) - nfs_init_server_aclclient(server); /* probe the filesystem info for this server filesystem */ error = nfs_probe_fsinfo(server, fh, fattr_fsinfo); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3364eccd17e..2151bafd55b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -187,13 +187,11 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *); -extern struct nfs_server *nfs_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *, +extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_mount_info *, + struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); @@ -225,6 +223,13 @@ static inline void nfs_fs_proc_exit(void) int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); #endif +/* nfs3client.c */ +#ifdef CONFIG_NFS_V3 +struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); +#endif + /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c new file mode 100644 index 00000000000..b3fc65ef39c --- /dev/null +++ b/fs/nfs/nfs3client.c @@ -0,0 +1,65 @@ +#include +#include +#include "internal.h" + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static const struct rpc_version *nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +const struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; + +/* + * Initialise an NFSv3 ACL client connection + */ +static void nfs_init_server_aclclient(struct nfs_server *server) +{ + if (server->flags & NFS_MOUNT_NOACL) + goto out_noacl; + + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + if (IS_ERR(server->client_acl)) + goto out_noacl; + + /* No errors! Assume that Sun nfsacls are supported */ + server->caps |= NFS_CAP_ACLS; + return; + +out_noacl: + server->caps &= ~NFS_CAP_ACLS; +} +#else +static inline void nfs_init_server_aclclient(struct nfs_server *server) +{ + server->flags &= ~NFS_MOUNT_NOACL; + server->caps &= ~NFS_CAP_ACLS; +} +#endif + +struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + /* Create a client RPC handle for the NFS v3 ACL management interface */ + if (!IS_ERR(server)) + nfs_init_server_aclclient(server); + return server; +} + +struct nfs_server *nfs3_clone_server(struct nfs_server *source, + struct nfs_fh *fh, + struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor); + if (!IS_ERR(server) && !IS_ERR(source->client_acl)) + nfs_init_server_aclclient(server); + return server; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 4f4cb8e4971..0952c791df3 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -969,4 +969,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs3_create_server, + .clone_server = nfs3_clone_server, }; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 769e798b395..b2d409d2805 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -574,8 +574,10 @@ error: * Create a version 4 volume record * - keyed on server and FSID */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) +/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, + struct nfs_fh *mntfh)*/ +struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) { struct nfs_server *server; int error; @@ -587,11 +589,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, return ERR_PTR(-ENOMEM); /* set up the general RPC client */ - error = nfs4_init_server(server, data); + error = nfs4_init_server(server, mount_info->parsed); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh); + error = nfs4_server_common_setup(server, mount_info->mntfh); if (error < 0) goto error; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index eb4ba1d99df..36c6432aac7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6916,6 +6916,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .alloc_client = nfs4_alloc_client, .init_client = nfs4_init_client, .free_client = nfs4_free_client, + .create_server = nfs4_create_server, + .clone_server = nfs_clone_server, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 9384f666b6a..a6283625666 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -105,7 +105,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ - server = nfs4_create_server(mount_info->parsed, mount_info->mntfh); + server = nfs4_create_server(mount_info, &nfs_v4); if (IS_ERR(server)) { mntroot = ERR_CAST(server); goto out; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ebb3d9c5227..50a88c3546e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -817,4 +817,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .alloc_client = nfs_alloc_client, .init_client = nfs_init_client, .free_client = nfs_free_client, + .create_server = nfs_create_server, + .clone_server = nfs_clone_server, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5fca59d73e4..a5f9fb3bfdc 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1664,7 +1664,7 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, } /* Get a volume representation */ - server = nfs_create_server(mount_info->parsed, mount_info->mntfh, nfs_mod); + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); @@ -2458,7 +2458,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); + server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err; -- cgit v1.2.3 From 6a74490dca897471a994a542fc7c5a469b48b46b Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:20 -0400 Subject: NFS: Pass super operations and xattr handlers in the nfs_subversion I can set all variables in the nfs_fill_super() function, allowing me to remove the nfs4_fill_super() function. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/nfs.h | 2 ++ fs/nfs/nfs2super.c | 1 + fs/nfs/nfs3super.c | 1 + fs/nfs/nfs4super.c | 24 +++--------------------- fs/nfs/super.c | 9 +++++---- 6 files changed, 13 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2151bafd55b..17d14709e75 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -327,6 +327,7 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ +extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index ac10b9e6c92..9f502a0c1e5 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -16,6 +16,8 @@ struct nfs_subversion { struct file_system_type *nfs_fs; /* NFS filesystem type */ const struct rpc_version *rpc_vers; /* NFS version information */ const struct nfs_rpc_ops *rpc_ops; /* NFS operations */ + const struct super_operations *sops; /* NFS Super operations */ + const struct xattr_handler **xattr; /* NFS xattr handlers */ struct list_head list; /* List of NFS versions */ }; diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index cef06d42334..a9fb69d7281 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -11,6 +11,7 @@ static struct nfs_subversion nfs_v2 = { .nfs_fs = &nfs_fs_type, .rpc_vers = &nfs_version2, .rpc_ops = &nfs_v2_clientops, + .sops = &nfs_sops, }; int __init init_nfs_v2(void) diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index f815cf359d9..8378090b810 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -11,6 +11,7 @@ static struct nfs_subversion nfs_v3 = { .nfs_fs = &nfs_fs_type, .rpc_vers = &nfs_version3, .rpc_ops = &nfs_v3_clientops, + .sops = &nfs_sops, }; int __init init_nfs_v3(void) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index a6283625666..c70e1730755 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -71,25 +71,10 @@ struct nfs_subversion nfs_v4 = { .nfs_fs = &nfs4_fs_type, .rpc_vers = &nfs_version4, .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, }; -/* - * Set up an NFS4 superblock - */ -static void nfs4_fill_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - /* - * The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - sb->s_xattr = nfs4_xattr_handlers; - nfs_initialise_sb(sb); -} - /* * Get the superblock for the NFS4 root partition */ @@ -101,7 +86,6 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); - mount_info->fill_super = nfs4_fill_super; mount_info->set_security = nfs_set_sb_security; /* Get a volume representation */ @@ -236,8 +220,6 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); - mount_info->fill_super = nfs4_fill_super; - export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, @@ -257,7 +239,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_mount_info mount_info = { - .fill_super = nfs4_fill_super, + .fill_super = nfs_fill_super, .set_security = nfs_clone_sb_security, .cloned = raw_data, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a5f9fb3bfdc..a275d19ae51 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -298,7 +298,7 @@ struct file_system_type nfs_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -static const struct super_operations nfs_sops = { +const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, @@ -2105,10 +2105,12 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_blocksize_bits = 0; sb->s_blocksize = 0; - if (data->bsize) + sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; + sb->s_op = server->nfs_client->cl_nfs_mod->sops; + if (data && data->bsize) sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (server->nfs_client->rpc_ops->version == 3) { + if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -2116,7 +2118,6 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_gran = 1; } - sb->s_op = &nfs_sops; nfs_initialise_sb(sb); } -- cgit v1.2.3 From 19d87ca3623956494b517f3abe0caf2616d55457 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:21 -0400 Subject: NFS: Split out remaining NFS v4 inode functions Somehow I missed this in my previous patch series, but these functions are only needed by the v4 code and should be moved to a v4-only file. I wasn't exactly sure where I should put these functions, so I moved them into nfs4super.c where I could make them static. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 23 +---------------------- fs/nfs/internal.h | 4 +--- fs/nfs/nfs4_fs.h | 3 --- fs/nfs/nfs4super.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 20 -------------------- 5 files changed, 41 insertions(+), 48 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8877c82582..a6ffa4be2a0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -106,7 +106,7 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -static void nfs_clear_inode(struct inode *inode) +void nfs_clear_inode(struct inode *inode) { /* * The following should never happen... @@ -1472,27 +1472,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) return -ESTALE; } - -#ifdef CONFIG_NFS_V4 - -/* - * Clean out any remaining NFSv4 state that might be left over due - * to open() calls that passed nfs_atomic_lookup, but failed to call - * nfs_open(). - */ -void nfs4_evict_inode(struct inode *inode) -{ - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); - pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation_noreclaim(inode); - /* First call standard NFS clear_inode() code */ - nfs_clear_inode(inode); -} -#endif - struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 17d14709e75..4174faf73ec 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -319,10 +319,8 @@ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); +extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); -#ifdef CONFIG_NFS_V4 -extern void nfs4_evict_inode(struct inode *); -#endif void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c321fb59d80..4811e1251d3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,9 +205,6 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); -/* write.c */ -int nfs4_write_inode(struct inode *, struct writeback_control *); - /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index c70e1730755..1c825f3bef5 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -6,12 +6,16 @@ #include #include #include +#include "delegation.h" #include "internal.h" #include "nfs4_fs.h" +#include "pnfs.h" #include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); +static void nfs4_evict_inode(struct inode *inode); static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, @@ -75,6 +79,41 @@ struct nfs_subversion nfs_v4 = { .xattr = nfs4_xattr_handlers, }; +static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + int ret = nfs_write_inode(inode, wbc); + + if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { + int status; + bool sync = true; + + if (wbc->sync_mode == WB_SYNC_NONE) + sync = false; + + status = pnfs_layoutcommit_inode(inode, sync); + if (status < 0) + return status; + } + return ret; +} + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_evict_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation_noreclaim(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + /* * Get the superblock for the NFS4 root partition */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f312860c15d..6ddac54dc67 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1674,26 +1674,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) return nfs_commit_unstable_pages(inode, wbc); } -#ifdef CONFIG_NFS_V4 -int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret = nfs_write_inode(inode, wbc); - - if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status; - bool sync = true; - - if (wbc->sync_mode == WB_SYNC_NONE) - sync = false; - - status = pnfs_layoutcommit_inode(inode, sync); - if (status < 0) - return status; - } - return ret; -} -#endif - /* * flush the inode to disk. */ -- cgit v1.2.3 From fac1e8e4ef417e958060a6c3a061cc1a180bd8ae Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:22 -0400 Subject: NFS: Keep module parameters in the generic NFS client Otherwise we break backwards compatibility when v4 becomes a modules. Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 24 ------------------------ fs/nfs/idmap.c | 3 --- fs/nfs/nfs4_fs.h | 4 +++- fs/nfs/nfs4client.c | 9 --------- fs/nfs/nfs4proc.c | 6 ------ fs/nfs/nfs4xdr.c | 6 ------ fs/nfs/super.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 48 insertions(+), 49 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 23ff18fe080..ca3ac992028 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -37,31 +37,7 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; -#define NFS_CALLBACK_MAXPORTNR (65535U) - -static int param_set_portnr(const char *val, const struct kernel_param *kp) -{ - unsigned long num; - int ret; - - if (!val) - return -EINVAL; - ret = strict_strtoul(val, 0, &num); - if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} -static struct kernel_param_ops param_ops_portnr = { - .set = param_set_portnr, - .get = param_get_uint, -}; -#define param_check_portnr(name, p) __param_check(name, p, unsigned int); - -module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); /* * This is the NFSv4 callback kernel thread. diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 1b5058b4043..b701358c39c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,8 +52,6 @@ #define NFS_UINT_MAXLEN 11 -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600; static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; @@ -366,7 +364,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ } /* idmap classic begins here */ -module_param(nfs_idmap_cache_timeout, int, 0644); enum { Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4811e1251d3..bafe5186c9c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -367,7 +367,9 @@ extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); int init_nfs_v4(void); void exit_nfs_v4(void); - +extern bool nfs4_disable_idmapping; +extern unsigned short max_session_slots; +extern unsigned short send_implementation_id; /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index b2d409d2805..cbcdfaf3250 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -17,11 +17,6 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -/* - * Turn off NFSv4 uid/gid mapping when using AUTH_SYS - */ -static bool nfs4_disable_idmapping = true; - /* * Get a unique NFSv4.0 callback identifier which will be used * by the V4.0 callback service to lookup the nfs_client struct @@ -659,7 +654,3 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } - -module_param(nfs4_disable_idmapping, bool, 0644); -MODULE_PARM_DESC(nfs4_disable_idmapping, - "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 36c6432aac7..a99a8d94872 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,8 +72,6 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) -static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -6932,10 +6930,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; -module_param(max_session_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " - "requests the client will negotiate"); - /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6cbd602e26d..ca13483edd6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -852,12 +852,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ -static unsigned short send_implementation_id = 1; - -module_param(send_implementation_id, ushort, 0644); -MODULE_PARM_DESC(send_implementation_id, - "Send implementation ID with NFSv4.1 exchange_id"); - static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a275d19ae51..8e0da5a6b3c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2574,4 +2574,49 @@ out_no_address: return -EINVAL; } +/* + * NFS v4 module parameters need to stay in the + * NFS client for backwards compatibility + */ +unsigned int nfs_callback_set_tcpport; +unsigned short nfs_callback_tcpport; +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600; +/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ +bool nfs4_disable_idmapping = true; +unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; +unsigned short send_implementation_id = 1; + +#define NFS_CALLBACK_MAXPORTNR (65535U) + +static int param_set_portnr(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); +module_param(nfs_idmap_cache_timeout, int, 0644); +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From ddda8e0aa8b955e20cb80908189bfa154ab54837 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:23 -0400 Subject: NFS: Convert v2 into a module The module (nfs2.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v2. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 4 +++- fs/nfs/client.c | 12 ++++++------ fs/nfs/dir.c | 13 +++++++++++++ fs/nfs/file.c | 2 ++ fs/nfs/inode.c | 10 ++++++++++ fs/nfs/namespace.c | 2 ++ fs/nfs/nfs.h | 14 -------------- fs/nfs/nfs2super.c | 9 +++++++-- fs/nfs/read.c | 1 + fs/nfs/super.c | 3 +++ fs/nfs/write.c | 2 ++ 12 files changed, 50 insertions(+), 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac39..6764dbf66d0 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -30,7 +30,7 @@ config NFS_FS If unsure, say N. config NFS_V2 - bool "NFS client support for NFS version 2" + tristate "NFS client support for NFS version 2" depends on NFS_FS default y help diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 7ca0125da65..df61db41bfa 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,6 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V2) += nfs2super.o proc.o nfs2xdr.o nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ @@ -25,6 +24,9 @@ endif nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o +obj-$(CONFIG_NFS_V2) += nfs2.o +nfs2-y := nfs2super.o proc.o nfs2xdr.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1f2908287cb..fa8acf51033 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -149,11 +149,7 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version); */ int __init nfs_register_versions(void) { - int err = init_nfs_v2(); - if (err) - return err; - - err = init_nfs_v3(); + int err = init_nfs_v3(); if (err) return err; @@ -165,7 +161,6 @@ int __init nfs_register_versions(void) */ void nfs_unregister_versions(void) { - exit_nfs_v2(); exit_nfs_v3(); exit_nfs_v4(); } @@ -222,6 +217,7 @@ error_cleanup: error_0: return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(nfs_alloc_client); #ifdef CONFIG_NFS_V4 /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ @@ -284,6 +280,7 @@ void nfs_free_client(struct nfs_client *clp) dprintk("<-- nfs_free_client()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_client); /* * Release a reference to a shared client record @@ -753,6 +750,7 @@ error: dprintk("<-- nfs_init_client() = xerror %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_init_client); /* * Create a version 2 or 3 client @@ -1122,6 +1120,7 @@ error: nfs_free_server(server); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_create_server); /* * Clone an NFS2, NFS3 or NFS4 server record @@ -1191,6 +1190,7 @@ out_free_server: dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } +EXPORT_SYMBOL_GPL(nfs_clone_server); void nfs_clients_init(struct net *net) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d49f1b9cd3f..c382a6d5e17 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -17,6 +17,7 @@ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ +#include #include #include #include @@ -1196,6 +1197,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs_dentry_operations); struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { @@ -1263,6 +1265,7 @@ out: nfs_free_fhandle(fhandle); return res; } +EXPORT_SYMBOL_GPL(nfs_lookup); #ifdef CONFIG_NFS_V4 static int nfs4_lookup_revalidate(struct dentry *, unsigned int); @@ -1508,6 +1511,7 @@ out_error: dput(parent); return error; } +EXPORT_SYMBOL_GPL(nfs_instantiate); /* * Following a failed create operation, we drop the dentry rather @@ -1536,6 +1540,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_create); /* * See comments for nfs_proc_create regarding failed operations. @@ -1563,6 +1568,7 @@ out_err: d_drop(dentry); return status; } +EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. @@ -1586,6 +1592,7 @@ out_err: d_drop(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { @@ -1609,6 +1616,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } +EXPORT_SYMBOL_GPL(nfs_rmdir); /* * Remove a file after making sure there are no pending writes, @@ -1680,6 +1688,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) d_rehash(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_unlink); /* * To create a symbolic link, most file systems instantiate a new inode, @@ -1750,6 +1759,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) return 0; } +EXPORT_SYMBOL_GPL(nfs_symlink); int nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1771,6 +1781,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) } return error; } +EXPORT_SYMBOL_GPL(nfs_link); /* * RENAME @@ -1869,6 +1880,7 @@ out: dput(dentry); return error; } +EXPORT_SYMBOL_GPL(nfs_rename); static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); @@ -2188,6 +2200,7 @@ out_notsup: res = generic_permission(inode, mask); goto out; } +EXPORT_SYMBOL_GPL(nfs_permission); /* * Local variables: diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1b392542692..5b3e7038955 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -16,6 +16,7 @@ * nfs regular file handling functions */ +#include #include #include #include @@ -865,3 +866,4 @@ const struct file_operations nfs_file_operations = { .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; +EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a6ffa4be2a0..f358b976e9e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -193,6 +193,7 @@ void nfs_invalidate_atime(struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* * Invalidate, but do not unhash, the inode. @@ -438,6 +439,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) out: return error; } +EXPORT_SYMBOL_GPL(nfs_setattr); /** * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall @@ -496,6 +498,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) nfs_vmtruncate(inode, attr->ia_size); } } +EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -535,6 +538,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) out: return err; } +EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { @@ -623,6 +627,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_close_context); struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) { @@ -1028,6 +1033,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->owner_name = NULL; fattr->group_name = NULL; } +EXPORT_SYMBOL_GPL(nfs_fattr_init); struct nfs_fattr *nfs_alloc_fattr(void) { @@ -1038,6 +1044,7 @@ struct nfs_fattr *nfs_alloc_fattr(void) nfs_fattr_init(fattr); return fattr; } +EXPORT_SYMBOL_GPL(nfs_alloc_fattr); struct nfs_fh *nfs_alloc_fhandle(void) { @@ -1048,6 +1055,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) fh->size = 0; return fh; } +EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); #ifdef NFS_DEBUG /* @@ -1168,6 +1176,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1255,6 +1264,7 @@ out_noforce: spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 0f699fefee6..2a3b170e88e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -7,6 +7,7 @@ * NFS namespace */ +#include #include #include #include @@ -255,3 +256,4 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); } +EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 9f502a0c1e5..f5d1cf5f5dc 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -24,20 +24,6 @@ struct nfs_subversion { int nfs_register_versions(void); void nfs_unregister_versions(void); -#ifdef CONFIG_NFS_V2 -int init_nfs_v2(void); -void exit_nfs_v2(void); -#else /* CONFIG_NFS_V2 */ -static inline int __init init_nfs_v2(void) -{ - return 0; -} - -static inline void exit_nfs_v2(void) -{ -} -#endif /* CONFIG_NFS_V2 */ - #ifdef CONFIG_NFS_V3 int init_nfs_v3(void); void exit_nfs_v3(void); diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index a9fb69d7281..0a9782c9171 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -14,13 +14,18 @@ static struct nfs_subversion nfs_v2 = { .sops = &nfs_sops, }; -int __init init_nfs_v2(void) +static int __init init_nfs_v2(void) { register_nfs_version(&nfs_v2); return 0; } -void exit_nfs_v2(void) +static void __exit exit_nfs_v2(void) { unregister_nfs_version(&nfs_v2); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v2); +module_exit(exit_nfs_v2); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6267b873bbc..b000e4c0cf8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -113,6 +113,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8e0da5a6b3c..999ce750514 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -289,6 +289,7 @@ struct file_system_type nfs_fs_type = { .kill_sb = nfs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { .owner = THIS_MODULE, @@ -312,6 +313,7 @@ const struct super_operations nfs_sops = { .show_stats = nfs_show_stats, .remount_fs = nfs_remount, }; +EXPORT_SYMBOL_GPL(nfs_sops); #ifdef CONFIG_NFS_V4 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); @@ -1670,6 +1672,7 @@ struct dentry *nfs_try_mount(int flags, const char *dev_name, return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); } +EXPORT_SYMBOL_GPL(nfs_try_mount); /* * Split "dev_name" into "hostname:export_path". diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6ddac54dc67..1e8d4b04376 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1210,6 +1210,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } +EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { @@ -1688,6 +1689,7 @@ int nfs_wb_all(struct inode *inode) return sync_inode(inode, &wbc); } +EXPORT_SYMBOL_GPL(nfs_wb_all); int nfs_wb_page_cancel(struct inode *inode, struct page *page) { -- cgit v1.2.3 From 1c606fb74c758beafd98cbad9a9133eadeec2371 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:24 -0400 Subject: NFS: Convert v3 into a module This patch exports symbols and moves over the final structures needed by the v3 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V3 or CONFIG_NFS_V3_MODULE are set. The module (nfs3.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v3. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 6 ++++-- fs/nfs/client.c | 5 ----- fs/nfs/dir.c | 1 + fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 3 +++ fs/nfs/internal.h | 2 +- fs/nfs/nfs.h | 14 -------------- fs/nfs/nfs3super.c | 9 +++++++-- fs/nfs/super.c | 6 +++--- fs/nfs/write.c | 8 ++++---- 11 files changed, 25 insertions(+), 33 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6764dbf66d0..f81a729c00e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -40,7 +40,7 @@ config NFS_V2 If unsure, say Y. config NFS_V3 - bool "NFS client support for NFS version 3" + tristate "NFS client support for NFS version 3" depends on NFS_FS default y help diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index df61db41bfa..01846edc5c9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,8 +9,6 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V3) += nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4super.o nfs4file.o delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ @@ -27,6 +25,10 @@ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_NFS_V2) += nfs2.o nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V3) += nfs3.o +nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fa8acf51033..8687b6b6edc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -149,10 +149,6 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version); */ int __init nfs_register_versions(void) { - int err = init_nfs_v3(); - if (err) - return err; - return init_nfs_v4(); } @@ -161,7 +157,6 @@ int __init nfs_register_versions(void) */ void nfs_unregister_versions(void) { - exit_nfs_v3(); exit_nfs_v4(); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c382a6d5e17..55438c970cb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1981,6 +1981,7 @@ void nfs_access_zap_cache(struct inode *inode) spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); } +EXPORT_SYMBOL_GPL(nfs_access_zap_cache); static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec7..899238156b1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f358b976e9e..78dfc3e895e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -186,6 +186,7 @@ void nfs_zap_acl_cache(struct inode *inode) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { @@ -847,6 +848,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { @@ -1213,6 +1215,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4174faf73ec..64f0dc41a9b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -224,7 +224,7 @@ int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); #endif /* nfs3client.c */ -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index f5d1cf5f5dc..3e1b84baa57 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -24,20 +24,6 @@ struct nfs_subversion { int nfs_register_versions(void); void nfs_unregister_versions(void); -#ifdef CONFIG_NFS_V3 -int init_nfs_v3(void); -void exit_nfs_v3(void); -#else /* CONFIG_NFS_V3 */ -static inline int __init init_nfs_v3(void) -{ - return 0; -} - -static inline void exit_nfs_v3(void) -{ -} -#endif /* CONFIG_NFS_V3 */ - #ifdef CONFIG_NFS_V4 int init_nfs_v4(void); void exit_nfs_v4(void); diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 8378090b810..cc471c72523 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -14,13 +14,18 @@ static struct nfs_subversion nfs_v3 = { .sops = &nfs_sops, }; -int __init init_nfs_v3(void) +static int __init init_nfs_v3(void) { register_nfs_version(&nfs_v3); return 0; } -void exit_nfs_v3(void) +static void __exit exit_nfs_v3(void) { unregister_nfs_version(&nfs_v3); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v3); +module_exit(exit_nfs_v3); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 999ce750514..558a85c9594 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -69,7 +69,7 @@ #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_TEXT_DATA 1 -#ifdef CONFIG_NFS_V3 +#if IS_ENABLED(CONFIG_NFS_V3) #define NFS_DEFAULT_VERSION 3 #else #define NFS_DEFAULT_VERSION 2 @@ -1876,7 +1876,7 @@ static int nfs23_validate_mount_data(void *options, return NFS_TEXT_DATA; } -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ @@ -1896,7 +1896,7 @@ out_no_sec: dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); return -EINVAL; -#ifndef CONFIG_NFS_V3 +#if !IS_ENABLED(CONFIG_NFS_V3) out_v3_not_compiled: dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); return -EPROTONOSUPPORT; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1e8d4b04376..f268fe4f278 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -446,7 +446,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -636,7 +636,7 @@ out: hdr->release(hdr); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -1298,7 +1298,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1358,7 +1358,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; -- cgit v1.2.3 From 89d77c8fa8e6d1cb7e2cce95b428be30ddcc6f23 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Jul 2012 16:05:25 -0400 Subject: NFS: Convert v4 into a module This patch exports symbols needed by the v4 module. In addition, I also switch over to using IS_ENABLED() to check if CONFIG_NFS_V4 or CONFIG_NFS_V4_MODULE are set. The module (nfs4.ko) will be created in the same directory as nfs.ko and will be automatically loaded the first time you try to mount over NFS v4. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 2 +- fs/nfs/Makefile | 19 +++++++--------- fs/nfs/callback.h | 2 +- fs/nfs/client.c | 34 ++++++++++++---------------- fs/nfs/delegation.h | 2 +- fs/nfs/dir.c | 6 ++++- fs/nfs/direct.c | 2 +- fs/nfs/dns_resolve.c | 4 ++++ fs/nfs/file.c | 13 +++++++++++ fs/nfs/inode.c | 64 ++++++++++++++++++++++++++-------------------------- fs/nfs/internal.h | 8 +++---- fs/nfs/namespace.c | 2 ++ fs/nfs/netns.h | 2 +- fs/nfs/nfs.h | 17 -------------- fs/nfs/nfs4_fs.h | 5 ++-- fs/nfs/nfs4super.c | 9 ++++++-- fs/nfs/pagelist.c | 4 ++++ fs/nfs/pnfs.c | 2 ++ fs/nfs/read.c | 4 ++++ fs/nfs/super.c | 41 +++++++++++++++++++++++++-------- fs/nfs/write.c | 13 +++++++---- 21 files changed, 147 insertions(+), 108 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f81a729c00e..195c1ea6151 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -72,7 +72,7 @@ config NFS_V3_ACL If unsure, say N. config NFS_V4 - bool "NFS client support for NFS version 4" + tristate "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS select KEYS diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 01846edc5c9..8bf3a3f6925 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,17 +9,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ write.o namespace.o mount_clnt.o \ dns_resolve.o cache_lib.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o -nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - nfs4super.o nfs4file.o delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o - -ifeq ($(CONFIG_SYSCTL), y) -nfs-y += sysctl.o -nfs-$(CONFIG_NFS_V4) += nfs4sysctl.o -endif - +nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_NFS_V2) += nfs2.o @@ -29,6 +19,13 @@ obj-$(CONFIG_NFS_V3) += nfs3.o nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V4) += nfs4.o +nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ + delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs4getroot.o nfs4client.o +nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o + obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a5527c90a5a..b44d7b128b7 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -192,7 +192,7 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_process_state *cps); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, struct cb_process_state *cps); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8687b6b6edc..9fc0d9dfc91 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -142,24 +142,6 @@ void unregister_nfs_version(struct nfs_subversion *nfs) } EXPORT_SYMBOL_GPL(unregister_nfs_version); -/* - * Preload all configured NFS versions during module init. - * This function should be edited after each protocol is converted, - * and eventually removed. - */ -int __init nfs_register_versions(void) -{ - return init_nfs_v4(); -} - -/* - * Remove each pre-loaded NFS version - */ -void nfs_unregister_versions(void) -{ - exit_nfs_v4(); -} - /* * Allocate a shared client record * @@ -214,7 +196,7 @@ error_0: } EXPORT_SYMBOL_GPL(nfs_alloc_client); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { @@ -390,6 +372,7 @@ int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr); #endif /* CONFIG_NFS_V4_1 */ /* @@ -456,6 +439,7 @@ int nfs_wait_client_init_complete(const struct nfs_client *clp) return wait_event_killable(nfs_client_active_wq, nfs_client_init_is_complete(clp)); } +EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete); /* * Found an existing client. Make sure it's ready before returning. @@ -530,6 +514,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, cl_init->hostname ?: "", PTR_ERR(new)); return new; } +EXPORT_SYMBOL_GPL(nfs_get_client); /* * Mark a server as ready or failed @@ -540,6 +525,7 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state) clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } +EXPORT_SYMBOL_GPL(nfs_mark_client_ready); /* * Initialise the timeout values for a connection @@ -581,6 +567,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, BUG(); } } +EXPORT_SYMBOL_GPL(nfs_init_timeout_values); /* * Create an RPC client handle @@ -620,6 +607,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, clp->cl_rpcclient = clnt; return 0; } +EXPORT_SYMBOL_GPL(nfs_create_rpc_client); /* * Version 2 or 3 client destruction @@ -706,6 +694,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server, return 0; } +EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); /** * nfs_init_client - Initialise an NFS2 or NFS3 client @@ -932,6 +921,7 @@ out_error: dprintk("nfs_probe_fsinfo: error = %d\n", -error); return error; } +EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); /* * Copy useful information when duplicating a server record @@ -948,6 +938,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; } +EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); void nfs_server_insert_lists(struct nfs_server *server) { @@ -961,6 +952,7 @@ void nfs_server_insert_lists(struct nfs_server *server) spin_unlock(&nn->nfs_client_lock); } +EXPORT_SYMBOL_GPL(nfs_server_insert_lists); static void nfs_server_remove_lists(struct nfs_server *server) { @@ -1020,6 +1012,7 @@ struct nfs_server *nfs_alloc_server(void) return server; } +EXPORT_SYMBOL_GPL(nfs_alloc_server); /* * Free up a server record @@ -1048,6 +1041,7 @@ void nfs_free_server(struct nfs_server *server) nfs_release_automount_timer(); dprintk("<-- nfs_free_server()\n"); } +EXPORT_SYMBOL_GPL(nfs_free_server); /* * Create a version 2 or 3 volume record @@ -1193,7 +1187,7 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_client_list); INIT_LIST_HEAD(&nn->nfs_volume_list); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); #endif spin_lock_init(&nn->nfs_client_lock); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1f3ccd93463..bbc6a4dba0d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -8,7 +8,7 @@ #ifndef FS_NFS_DELEGATION_H #define FS_NFS_DELEGATION_H -#if defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V4) /* * NFSv4 delegation */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 55438c970cb..627f108ede2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -936,6 +936,7 @@ void nfs_force_lookup_revalidate(struct inode *dir) { NFS_I(dir)->cache_change_attribute++; } +EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); /* * A check for whether or not the parent directory has changed. @@ -1267,7 +1268,7 @@ out: } EXPORT_SYMBOL_GPL(nfs_lookup); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { @@ -1277,6 +1278,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_automount = nfs_d_automount, .d_release = nfs_d_release, }; +EXPORT_SYMBOL_GPL(nfs4_dentry_operations); static fmode_t flags_to_mode(int flags) { @@ -1419,6 +1421,7 @@ no_open: return finish_no_open(file, res); } +EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { @@ -2142,6 +2145,7 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } +EXPORT_SYMBOL_GPL(nfs_may_open); int nfs_permission(struct inode *inode, int mask) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 899238156b1..b7b4f80968b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -460,7 +460,7 @@ static void nfs_inode_dio_write_done(struct inode *inode) inode_dio_done(inode); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index b3924b8a600..31c26c4dcc2 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -8,6 +8,7 @@ #ifdef CONFIG_NFS_USE_KERNEL_DNS +#include #include #include #include "dns_resolve.h" @@ -27,9 +28,11 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, kfree(ip_addr); return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #else +#include #include #include #include @@ -345,6 +348,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = -ESRCH; return ret; } +EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); int nfs_dns_resolver_cache_init(struct net *net) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5b3e7038955..1557978ca7b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -53,6 +53,7 @@ int nfs_check_flags(int flags) return 0; } +EXPORT_SYMBOL_GPL(nfs_check_flags); /* * Open file @@ -85,6 +86,7 @@ nfs_file_release(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); } +EXPORT_SYMBOL_GPL(nfs_file_release); /** * nfs_revalidate_size - Revalidate the file size @@ -138,6 +140,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return generic_file_llseek(filp, offset, origin); } +EXPORT_SYMBOL_GPL(nfs_file_llseek); /* * Flush all dirty pages, and check for write errors. @@ -166,6 +169,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } +EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, @@ -190,6 +194,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, } return result; } +EXPORT_SYMBOL_GPL(nfs_file_read); ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, @@ -212,6 +217,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, } return res; } +EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -233,6 +239,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) } return status; } +EXPORT_SYMBOL_GPL(nfs_file_mmap); /* * Flush any dirty pages for this process, and check for write errors. @@ -271,6 +278,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) ret = status; return ret; } +EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); static int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -615,6 +623,7 @@ out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); goto out; } +EXPORT_SYMBOL_GPL(nfs_file_write); ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, @@ -646,6 +655,7 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_splice_write); static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) @@ -806,6 +816,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) out_err: return ret; } +EXPORT_SYMBOL_GPL(nfs_lock); /* * Lock a (portion of) a file @@ -835,6 +846,7 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } +EXPORT_SYMBOL_GPL(nfs_flock); /* * There is no protocol support for leases, so we have no way to implement @@ -847,6 +859,7 @@ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) file->f_path.dentry->d_name.name, arg); return -EINVAL; } +EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 78dfc3e895e..2ed6138f32a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -82,6 +82,7 @@ int nfs_wait_bit_killable(void *word) freezable_schedule(); return 0; } +EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** * nfs_compat_user_ino64 - returns the user-visible inode number @@ -117,6 +118,7 @@ void nfs_clear_inode(struct inode *inode) nfs_access_zap_cache(inode); nfs_fscache_release_inode_cookie(inode); } +EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { @@ -393,6 +395,7 @@ out_no_inode: dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } +EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) @@ -655,6 +658,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mdsthreshold = NULL; return ctx; } +EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { @@ -662,6 +666,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) atomic_inc(&ctx->lock_context.count); return ctx; } +EXPORT_SYMBOL_GPL(get_nfs_open_context); static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) { @@ -689,6 +694,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) { __put_nfs_open_context(ctx, 0); } +EXPORT_SYMBOL_GPL(put_nfs_open_context); /* * Ensure that mmap has a recent RPC credential for use when writing out @@ -704,6 +710,7 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics @@ -1497,11 +1504,12 @@ struct inode *nfs_alloc_inode(struct super_block *sb) nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); #endif -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } +EXPORT_SYMBOL_GPL(nfs_alloc_inode); static void nfs_i_callback(struct rcu_head *head) { @@ -1513,10 +1521,11 @@ void nfs_destroy_inode(struct inode *inode) { call_rcu(&inode->i_rcu, nfs_i_callback); } +EXPORT_SYMBOL_GPL(nfs_destroy_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) INIT_LIST_HEAD(&nfsi->open_states); nfsi->delegation = NULL; nfsi->delegation_state = 0; @@ -1562,6 +1571,7 @@ static void nfs_destroy_inodecache(void) } struct workqueue_struct *nfsiod_workqueue; +EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* * start up the nfsiod workqueue @@ -1622,90 +1632,80 @@ static int __init init_nfs_fs(void) err = nfs_dns_resolver_init(); if (err < 0) - goto out11; + goto out10;; err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out10; + goto out9; err = nfs_fscache_register(); if (err < 0) - goto out9; + goto out8; err = nfsiod_start(); if (err) - goto out8; + goto out7; err = nfs_fs_proc_init(); if (err) - goto out7; + goto out6; err = nfs_init_nfspagecache(); if (err) - goto out6; + goto out5; err = nfs_init_inodecache(); if (err) - goto out5; + goto out4; err = nfs_init_readpagecache(); if (err) - goto out4; + goto out3; err = nfs_init_writepagecache(); if (err) - goto out3; + goto out2; err = nfs_init_directcache(); if (err) - goto out2; + goto out1; #ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); #endif - - err = nfs_register_versions(); - if (err) - goto out1; - if ((err = register_nfs_fs()) != 0) goto out0; return 0; out0: - nfs_unregister_versions(); -out1: #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); -out2: +out1: nfs_destroy_writepagecache(); -out3: +out2: nfs_destroy_readpagecache(); -out4: +out3: nfs_destroy_inodecache(); -out5: +out4: nfs_destroy_nfspagecache(); -out6: +out5: nfs_fs_proc_exit(); -out7: +out6: nfsiod_stop(); -out8: +out7: nfs_fscache_unregister(); -out9: +out8: unregister_pernet_subsys(&nfs_net_ops); -out10: +out9: nfs_dns_resolver_destroy(); -out11: +out10: return err; } static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_V4 - exit_nfs_v4(); -#endif nfs_destroy_directcache(); nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 64f0dc41a9b..8865538b26b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -262,7 +262,7 @@ extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); /* nfs4xdr.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, int); #endif @@ -272,7 +272,7 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct rpc_procinfo nfs4_procedures[]; #endif @@ -328,7 +328,7 @@ extern int nfs_wait_bit_killable(void *word); extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif @@ -364,7 +364,7 @@ struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, const char *); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2a3b170e88e..655925373b9 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -113,6 +113,7 @@ Elong_unlock: Elong: return ERR_PTR(-ENAMETOOLONG); } +EXPORT_SYMBOL_GPL(nfs_path); /* * nfs_d_automount - Handle crossing a mountpoint on the server @@ -241,6 +242,7 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } +EXPORT_SYMBOL_GPL(nfs_do_submount); struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8a6394edb8b..0539de1b8d1 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -20,7 +20,7 @@ struct nfs_net { wait_queue_head_t bl_wq; struct list_head nfs_client_list; struct list_head nfs_volume_list; -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ #endif spinlock_t nfs_client_lock; diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h index 3e1b84baa57..43679df56cd 100644 --- a/fs/nfs/nfs.h +++ b/fs/nfs/nfs.h @@ -21,23 +21,6 @@ struct nfs_subversion { struct list_head list; /* List of NFS versions */ }; -int nfs_register_versions(void); -void nfs_unregister_versions(void); - -#ifdef CONFIG_NFS_V4 -int init_nfs_v4(void); -void exit_nfs_v4(void); -#else /* CONFIG_NFS_V4 */ -static inline int __init init_nfs_v4(void) -{ - return 0; -} - -static inline void exit_nfs_v4(void) -{ -} -#endif /* CONFIG_NFS_V4 */ - struct nfs_subversion *get_nfs_version(unsigned int); void put_nfs_version(struct nfs_subversion *); void register_nfs_version(struct nfs_subversion *); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index bafe5186c9c..3b950dd81e8 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,7 +9,7 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) struct idmap; @@ -365,11 +365,10 @@ extern const nfs4_stateid zero_stateid; struct nfs_mount_info; extern struct nfs_subversion nfs_v4; struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -int init_nfs_v4(void); -void exit_nfs_v4(void); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short send_implementation_id; + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 1c825f3bef5..12a31a9dbcd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -332,7 +332,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, } -int __init init_nfs_v4(void) +static int __init init_nfs_v4(void) { int err; @@ -358,10 +358,15 @@ out: return err; } -void exit_nfs_v4(void) +static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } + +MODULE_LICENSE("GPL"); + +module_init(init_nfs_v4); +module_exit(exit_nfs_v4); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f..1e7d8879dae 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -54,6 +54,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); } +EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; } +EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_can_coalesce_requests - test two requests for compatibility @@ -409,6 +411,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_pageio_add_request); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -424,6 +427,7 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } +EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7fbd25afe41..76875bfcf19 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1407,6 +1407,7 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) @@ -1561,6 +1562,7 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } +EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b000e4c0cf8..6935e401ad7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -48,6 +48,7 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } return rhdr; } +EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -80,6 +81,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) kmem_cache_free(nfs_rdata_cachep, rhdr); } +EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_read_data *rdata) { @@ -96,6 +98,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_readdata_release); static int nfs_return_empty_page(struct page *page) @@ -398,6 +401,7 @@ int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, return nfs_pagein_multi(desc, hdr); return nfs_pagein_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 558a85c9594..ac6a3c55dce 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -315,7 +315,7 @@ const struct super_operations nfs_sops = { }; EXPORT_SYMBOL_GPL(nfs_sops); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); @@ -366,6 +366,7 @@ void nfs_sb_active(struct super_block *sb) if (atomic_inc_return(&server->active) == 1) atomic_inc(&sb->s_active); } +EXPORT_SYMBOL_GPL(nfs_sb_active); void nfs_sb_deactive(struct super_block *sb) { @@ -374,6 +375,7 @@ void nfs_sb_deactive(struct super_block *sb) if (atomic_dec_and_test(&server->active)) deactivate_super(sb); } +EXPORT_SYMBOL_GPL(nfs_sb_deactive); /* * Deliver file system statistics to userspace @@ -439,6 +441,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) dprintk("%s: statfs error = %d\n", __func__, -error); return error; } +EXPORT_SYMBOL_GPL(nfs_statfs); /* * Map the security flavour number to a name @@ -544,7 +547,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_mountd_netid(m, nfss, showdefaults); } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { @@ -675,8 +678,9 @@ int nfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_options); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { @@ -709,7 +713,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) } } #else -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void show_pnfs(struct seq_file *m, struct nfs_server *server) { } @@ -734,12 +738,14 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root) free_page((unsigned long)page); return err; } +EXPORT_SYMBOL_GPL(nfs_show_devname); int nfs_show_path(struct seq_file *m, struct dentry *dentry) { seq_puts(m, "/"); return 0; } +EXPORT_SYMBOL_GPL(nfs_show_path); /* * Present statistical information for this VFS mountpoint @@ -774,7 +780,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, ",bsize=%u", nfss->bsize); seq_printf(m, ",namlen=%u", nfss->namelen); -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); @@ -832,6 +838,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) return 0; } +EXPORT_SYMBOL_GPL(nfs_show_stats); /* * Begin unmount by attempting to remove all automounted mountpoints we added @@ -851,6 +858,7 @@ void nfs_umount_begin(struct super_block *sb) if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } +EXPORT_SYMBOL_GPL(nfs_umount_begin); static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) { @@ -1915,7 +1923,7 @@ out_invalid_fh: return -EINVAL; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static int nfs_validate_mount_data(struct file_system_type *fs_type, void *options, struct nfs_parsed_mount_data *args, @@ -1953,7 +1961,7 @@ static int nfs_validate_text_mount_data(void *options, goto out_no_address; if (args->version == 4) { -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; @@ -1976,7 +1984,7 @@ static int nfs_validate_text_mount_data(void *options, &args->nfs_server.export_path, max_pathlen); -#ifndef CONFIG_NFS_V4 +#if !IS_ENABLED(CONFIG_NFS_V4) out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; @@ -2075,6 +2083,7 @@ out: kfree(data); return error; } +EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock @@ -2123,6 +2132,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) nfs_initialise_sb(sb); } +EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock @@ -2292,6 +2302,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, { return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); } +EXPORT_SYMBOL_GPL(nfs_set_sb_security); int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) @@ -2302,6 +2313,7 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, return -ESTALE; return 0; } +EXPORT_SYMBOL_GPL(nfs_clone_sb_security); struct dentry *nfs_fs_mount_common(struct nfs_server *server, int flags, const char *dev_name, @@ -2375,6 +2387,7 @@ error_splat_bdi: deactivate_locked_super(s); goto out; } +EXPORT_SYMBOL_GPL(nfs_fs_mount_common); struct dentry *nfs_fs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) @@ -2415,6 +2428,7 @@ out: nfs_free_fhandle(mount_info.mntfh); return mntroot; } +EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Ensure that we unregister the bdi before kill_anon_super @@ -2426,6 +2440,7 @@ void nfs_put_super(struct super_block *s) bdi_unregister(&server->backing_dev_info); } +EXPORT_SYMBOL_GPL(nfs_put_super); /* * Destroy an NFS2/3 superblock @@ -2438,6 +2453,7 @@ void nfs_kill_super(struct super_block *s) nfs_fscache_release_super_cookie(s); nfs_free_server(server); } +EXPORT_SYMBOL_GPL(nfs_kill_super); /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) @@ -2478,7 +2494,7 @@ out_err: goto out; } -#ifdef CONFIG_NFS_V4 +#if IS_ENABLED(CONFIG_NFS_V4) static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) { @@ -2590,6 +2606,13 @@ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; +EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); +EXPORT_SYMBOL_GPL(nfs_callback_tcpport); +EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); +EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); +EXPORT_SYMBOL_GPL(max_session_slots); +EXPORT_SYMBOL_GPL(send_implementation_id); + #define NFS_CALLBACK_MAXPORTNR (65535U) static int param_set_portnr(const char *val, const struct kernel_param *kp) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f268fe4f278..e4a2ad2059b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -84,6 +84,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } return p; } +EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) @@ -115,6 +116,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); mempool_free(whdr, nfs_wdata_mempool); } +EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_write_data *wdata) { @@ -131,6 +133,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata) if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); } +EXPORT_SYMBOL_GPL(nfs_writedata_release); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -446,7 +449,7 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page @@ -636,7 +639,7 @@ out: hdr->release(hdr); } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -1173,6 +1176,7 @@ int nfs_generic_flush(struct nfs_pageio_descriptor *desc, return nfs_flush_multi(desc, hdr); return nfs_flush_one(desc, hdr); } +EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { @@ -1298,7 +1302,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) return; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we @@ -1358,7 +1362,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } -#if IS_ENABLED(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1674,6 +1678,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { return nfs_commit_unstable_pages(inode, wbc); } +EXPORT_SYMBOL_GPL(nfs_write_inode); /* * flush the inode to disk. -- cgit v1.2.3 From b042414feb240df5f5911b9bca39b61e6738e814 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 31 Jul 2012 14:40:12 +1000 Subject: nfs: increase number of permitted callback connections. By default a sunrpc service is limited to (N+3)*20 connections where N is the number of threads. This is 80 when N==1. If this number is exceeded a warning is printed suggesting that the number of threads be increased. However with services which run a single thread, this is impossible. For such services there is a ->sv_maxconn setting that can be used to forcibly increase the limit, and silence the message. This is used by lockd. The nfs client uses a sunrpc service to handle callbacks and it too is single-threaded, so to avoid the useless messages, and to allow a reasonable number of concurrent connections, we need to set ->sv_maxconn. 1024 seems like a good number. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index ca3ac992028..4c8459e5bde 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -241,6 +241,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) ret = -ENOMEM; goto out_err; } + /* As there is only one thread we need to over-ride the + * default maximum of 80 connections + */ + serv->sv_maxconn = 1024; ret = svc_bind(serv, net); if (ret < 0) { -- cgit v1.2.3 From ad0fcd4eb68059de02e1766948263c71b8a5b1dc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 23 Jul 2012 15:46:23 -0400 Subject: nfs: explicitly reject LOCK_MAND flock() requests We have no mechanism to emulate LOCK_MAND locks on NFSv4, so explicitly return -EINVAL if someone requests it. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1557978ca7b..b039a17ee94 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -834,6 +834,15 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; + /* + * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of + * any standard. In principle we might be able to support LOCK_MAND + * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the + * NFS code is not set up for it. + */ + if (fl->fl_type & LOCK_MAND) + return -EINVAL; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; -- cgit v1.2.3 From d56b4ddf7781ef8dd050542781cc7f55673af002 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:06 -0700 Subject: nfs: teach the NFS client how to treat PG_swapcache pages Replace all relevant occurences of page->index and page->mapping in the NFS client with the new page_file_index() and page_file_mapping() functions. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/file.c | 6 +++--- fs/nfs/internal.h | 7 ++++--- fs/nfs/pagelist.c | 2 +- fs/nfs/read.c | 6 +++--- fs/nfs/write.c | 36 ++++++++++++++++++------------------ 5 files changed, 29 insertions(+), 28 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 70d124a61b9..acd4e4cd290 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -430,7 +430,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_cancel(page->mapping->host, page); + nfs_wb_page_cancel(page_file_mapping(page)->host, page); nfs_fscache_invalidate_page(page, page->mapping->host); } @@ -472,7 +472,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) */ static int nfs_launder_page(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", @@ -521,7 +521,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); lock_page(page); - mapping = page->mapping; + mapping = page_file_mapping(page); if (mapping != dentry->d_inode->i_mapping) goto out_unlock; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cfafd13b6fe..4be14b3e0a1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -546,13 +546,14 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) static inline unsigned int nfs_page_length(struct page *page) { - loff_t i_size = i_size_read(page->mapping->host); + loff_t i_size = i_size_read(page_file_mapping(page)->host); if (i_size > 0) { + pgoff_t page_index = page_file_index(page); pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (page->index < end_index) + if (page_index < end_index) return PAGE_CACHE_SIZE; - if (page->index == end_index) + if (page_index == end_index) return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1; } return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aed913c833f..9ef8b3cf7fc 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -117,7 +117,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - req->wb_index = page->index; + req->wb_index = page_file_index(page); page_cache_get(page); req->wb_offset = offset; req->wb_pgbase = offset; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6267b873bbc..7cb02078268 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -522,11 +522,11 @@ static const struct rpc_call_ops nfs_read_common_ops = { int nfs_readpage(struct file *file, struct page *page) { struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int error; dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_CACHE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); nfs_add_stats(inode, NFSIOS_READPAGES, 1); @@ -580,7 +580,7 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f312860c15d..d0feca32b4f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,7 +153,7 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) static struct nfs_page *nfs_page_find_request(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); @@ -165,16 +165,16 @@ static struct nfs_page *nfs_page_find_request(struct page *page) /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; - if (i_size > 0 && page->index < end_index) + if (i_size > 0 && page_file_index(page) < end_index) goto out; - end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + end = page_file_offset(page) + ((loff_t)offset+count); if (i_size >= end) goto out; i_size_write(inode, end); @@ -187,7 +187,7 @@ out: static void nfs_set_pageerror(struct page *page) { SetPageError(page); - nfs_zap_mapping(page->mapping->host, page->mapping); + nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } /* We can set the PG_uptodate flag if we see that a write request @@ -228,7 +228,7 @@ static int nfs_set_page_writeback(struct page *page) int ret = test_set_page_writeback(page); if (!ret) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); if (atomic_long_inc_return(&nfss->writeback) > @@ -242,7 +242,7 @@ static int nfs_set_page_writeback(struct page *page) static void nfs_end_page_writeback(struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); @@ -252,7 +252,7 @@ static void nfs_end_page_writeback(struct page *page) static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; int ret; @@ -313,13 +313,13 @@ out: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - nfs_pageio_cond_complete(pgio, page->index); + nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); @@ -336,7 +336,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page->mapping->host)->write_pageio_init(&pgio, + NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, page->mapping->host, wb_priority(wbc), &nfs_async_write_completion_ops); @@ -471,7 +471,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -538,7 +538,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } static void @@ -789,7 +789,7 @@ out_err: static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req; req = nfs_try_to_update_request(inode, page, offset, bytes); @@ -842,7 +842,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_page(page->mapping->host, page); + status = nfs_wb_page(page_file_mapping(page)->host, page); } while (status == 0); return status; } @@ -872,7 +872,7 @@ int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); @@ -880,7 +880,7 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) + offset)); + (long long)(page_file_offset(page) + offset)); /* If we're not using byte range locks, and we know the page * is up to date, it may be more efficient to extend the write @@ -1469,7 +1469,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); -- cgit v1.2.3 From 29418aa4bd487c82016733ef5c6a06d656ed3c7d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:10 -0700 Subject: nfs: disable data cache revalidation for swapfiles The VM does not like PG_private set on PG_swapcache pages. As suggested by Trond in http://lkml.org/lkml/2006/8/25/348, this patch disables NFS data cache revalidation on swap files. as it does not make sense to have other clients change the file while it is being used as swap. This avoids setting PG_private on swap pages, since there ought to be no further races with invalidate_inode_pages2() to deal with. Since we cannot set PG_private we cannot use page->private which is already used by PG_swapcache pages to store the nfs_page. Thus augment the new nfs_page_find_request logic. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 4 ++++ fs/nfs/write.c | 49 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 35f7e4bc680..1d57fe9f49a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -882,6 +882,10 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + goto out; + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d0feca32b4f..974e9c2d31f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -139,15 +139,28 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } -static struct nfs_page *nfs_page_find_request_locked(struct page *page) +static struct nfs_page * +nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; - if (PagePrivate(page)) { + if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - if (req != NULL) - kref_get(&req->wb_kref); + else if (unlikely(PageSwapCache(page))) { + struct nfs_page *freq, *t; + + /* Linearly search the commit list for the correct req */ + list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { + if (freq->wb_page == page) { + req = freq; + break; + } + } } + + if (req) + kref_get(&req->wb_kref); + return req; } @@ -157,7 +170,7 @@ static struct nfs_page *nfs_page_find_request(struct page *page) struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -258,7 +271,7 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) break; if (nfs_lock_request(req)) @@ -413,9 +426,15 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) spin_lock(&inode->i_lock); if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; - set_bit(PG_MAPPED, &req->wb_flags); - SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); + /* + * Swap-space should not get truncated. Hence no need to plug the race + * with invalidate/truncate. + */ + if (likely(!PageSwapCache(req->wb_page))) { + set_bit(PG_MAPPED, &req->wb_flags); + SetPagePrivate(req->wb_page); + set_page_private(req->wb_page, (unsigned long)req); + } nfsi->npages++; kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -432,9 +451,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&inode->i_lock); - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (likely(!PageSwapCache(req->wb_page))) { + set_page_private(req->wb_page, 0); + ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); + } nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -730,7 +751,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(page); + req = nfs_page_find_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; @@ -1744,7 +1765,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) */ int nfs_wb_page(struct inode *inode, struct page *page) { - loff_t range_start = page_offset(page); + loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, -- cgit v1.2.3 From a564b8f0398636ba30b07c0eaebdef7ff7837249 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:12 -0700 Subject: nfs: enable swap on NFS Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol ->connect() method. PF_MEMALLOC should allow the allocation of struct socket and related objects and the early (re)setting of SOCK_MEMALLOC should allow us to receive the packets required for the TCP connection buildup. [jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases] [dfeng@redhat.com: Fix handling of multiple swap files] [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/Kconfig | 8 ++++++ fs/nfs/direct.c | 82 +++++++++++++++++++++++++++++++++++++-------------------- fs/nfs/file.c | 22 ++++++++++++++-- 3 files changed, 82 insertions(+), 30 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac39..6fd5f2cdcd1 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -86,6 +86,14 @@ config NFS_V4 If unsure, say Y. +config NFS_SWAP + bool "Provide swap over NFS support" + default n + depends on NFS_FS + select SUNRPC_SWAP + help + This option enables swapon to work on files located on NFS mounts. + config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_V4 && EXPERIMENTAL diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec7..bf9c8d0ec16 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * @nr_segs: size of iovec array * * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, we shunt off direct - * read and write requests before the VFS gets them, so this method - * should never be called. + * the NFS client supports direct I/O. However, for most direct IO, we + * shunt off direct read and write requests before the VFS gets them, + * so this method is only ever called for swap. */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { +#ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, (long long) pos, nr_segs); return -EINVAL; +#else + VM_BUG_ON(iocb->ki_left != PAGE_SIZE); + VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); + + if (rw == READ || rw == KERNEL_READ) + return nfs_file_direct_read(iocb, iov, nr_segs, pos, + rw == READ ? true : false); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, + rw == WRITE ? true : false); +#endif /* CONFIG_NFS_SWAP */ } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { */ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de GFP_KERNEL); if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 1, pagevec); + if (WARN_ON(result != 1)) + break; + } + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { @@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; @@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); NFS_I(inode)->read_io += result; @@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos) + loff_t pos, bool uio) { struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; @@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d if (!pagevec) break; - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; + if (uio) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + npages, 0, 0, pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (result < 0) + break; + } else { + WARN_ON(npages != 1); + result = get_kernel_page(user_addr, 0, pagevec); + if (WARN_ON(result != 1)) + break; + } if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; @@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos) + loff_t pos, bool uio) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos); + result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); if (result < 0) break; requested_bytes += result; @@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, - size_t count) + size_t count, bool uio) { ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; @@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -867,7 +893,7 @@ out: * cache. */ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, task_io_account_read(count); - retval = nfs_direct_read(iocb, iov, nr_segs, pos); + retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); if (retval > 0) iocb->ki_pos = pos + retval; @@ -923,7 +949,7 @@ out: * is no atomic O_APPEND write facility in the NFS protocol. */ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + unsigned long nr_segs, loff_t pos, bool uio) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; @@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); if (retval > 0) { struct inode *inode = mapping->host; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index acd4e4cd290..50fb83a88b1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); dprintk("NFS: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } +#ifdef CONFIG_NFS_SWAP +static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + *span = sis->pages; + return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); +} + +static void nfs_swap_deactivate(struct file *file) +{ + xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); +} +#endif + const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, @@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = { .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, .error_remove_page = generic_error_remove_page, +#ifdef CONFIG_NFS_SWAP + .swap_activate = nfs_swap_activate, + .swap_deactivate = nfs_swap_deactivate, +#endif }; /* @@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); dprintk("NFS: write(%s/%s, %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, -- cgit v1.2.3 From 192e501b0438bb0e1574179773537f84c4752e25 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:45:16 -0700 Subject: nfs: prevent page allocator recursions with swap over NFS. GFP_NOFS is _more_ permissive than GFP_NOIO in that it will initiate IO, just not of any filesystem data. The problem is that previously NOFS was correct because that avoids recursion into the NFS code. With swap-over-NFS, it is no longer correct as swap IO can lead to this recursion. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/pagelist.c | 2 +- fs/nfs/write.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9ef8b3cf7fc..7de1646c4e6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -70,7 +70,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); if (p) INIT_LIST_HEAD(&p->wb_list); return p; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 974e9c2d31f..211ba656677 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -52,7 +52,7 @@ static mempool_t *nfs_commit_mempool; struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); if (p) { memset(p, 0, sizeof(*p)); @@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); struct nfs_write_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; -- cgit v1.2.3 From 3dd4765fce04c0b4af1e0bc4c0b10f906f95fabc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 2 Aug 2012 14:30:56 -0400 Subject: nfs: tear down caches in nfs_init_writepagecache when allocation fails ...and ensure that we tear down the nfs_commit_data cache too when unloading the module. Cc: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5829d0ce7cf..e3b55372726 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void) nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, nfs_wdata_cachep); if (nfs_wdata_mempool == NULL) - return -ENOMEM; + goto out_destroy_write_cache; nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", sizeof(struct nfs_commit_data), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_cdata_cachep == NULL) - return -ENOMEM; + goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) - return -ENOMEM; + goto out_destroy_commit_cache; /* * NFS congestion size, scale with available memory. @@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void) nfs_congestion_kb = 256*1024; return 0; + +out_destroy_commit_cache: + kmem_cache_destroy(nfs_cdata_cachep); +out_destroy_write_mempool: + mempool_destroy(nfs_wdata_mempool); +out_destroy_write_cache: + kmem_cache_destroy(nfs_wdata_cachep); + return -ENOMEM; } void nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); + kmem_cache_destroy(nfs_cdata_cachep); mempool_destroy(nfs_wdata_mempool); kmem_cache_destroy(nfs_wdata_cachep); } -- cgit v1.2.3 From 8554116e17eef055d9dd58a94b3427cb2ad1c317 Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:10 +0300 Subject: pnfs: defer release of pages in layoutget we have encountered a bug whereby reading a lot of files (copying fedora's /bin) from a pNFS mount and hitting Ctrl+C in the middle caused a general protection fault in xdr_shrink_bufhead. this function is called when decoding the response from LAYOUTGET. the decoding is done by a worker thread, and the caller of LAYOUTGET waits for the worker thread to complete. hitting Ctrl+C caused the synchronous wait to end and the next thing the caller does is to free the pages, so when the worker thread calls xdr_shrink_bufhead, the pages are gone. therefore, the cleanup of these pages has been moved to nfs4_layoutget_release. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.c | 39 +------------------------------------ fs/nfs/pnfs.h | 2 +- 3 files changed, 58 insertions(+), 40 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a99a8d94872..6a78d49da5c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6223,11 +6223,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6239,9 +6286,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6259,6 +6307,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76875bfcf19..2e00feacd4b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; dprintk("--> %s\n", __func__); @@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); + nfs4_proc_layoutget(lgp, gfp_flags); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c6c80503ba..5ea019e80b4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From 21d1f58aedc5f7ac4bb0c4e3d78c74ea31ac050f Mon Sep 17 00:00:00 2001 From: Idan Kedar Date: Thu, 2 Aug 2012 11:47:11 +0300 Subject: pnfs: nfs4_proc_layoutget returns void since the only user of nfs4_proc_layoutget is send_layoutget, which ignores its return value, there is no reason to return any value. Signed-off-by: Idan Kedar Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/pnfs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6a78d49da5c..f94f6b3928f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6310,7 +6310,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return -ENOMEM; + return; } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,7 +6319,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; @@ -6327,7 +6327,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) status = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return status; + return; } static void diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5ea019e80b4..745aa1b39e7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -- cgit v1.2.3 From f6166384095b7ecf77752b5e9096e6d03d75f7ae Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 2 Aug 2012 15:36:09 +0300 Subject: NFS41: add pg_layout_private to nfs_pageio_descriptor To allow layout driver to pass private information around pg_init/pg_doio. Signed-off-by: Peng Tao Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1a6732ed04a..311a79681e2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->dreq = desc->pg_dreq; + hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) @@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_error = 0; desc->pg_lseg = NULL; desc->pg_dreq = NULL; + desc->pg_layout_private = NULL; } EXPORT_SYMBOL_GPL(nfs_pageio_init); -- cgit v1.2.3 From 7de6e28417c65919cf2c1621841a650c4a3afbbd Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 2 Aug 2012 15:38:23 +0300 Subject: pnfs-obj: Better IO pattern in case of unaligned offset Depending on layout and ARCH, ORE has some limits on max IO sizes which is communicated on (what else) ore_layout->max_io_length, which is always stripe aligned. This was considered as the pg_test boundary for splitting and starting a new IO. But in the case of a long IO where the start offset is not aligned what would happen is that both end of IO[N] and start of IO[N+1] would be unaligned, causing each IO boundary parity unit to be calculated and written twice. So what we do in this patch is split the very start of an unaligned IO, up to a stripe boundary, and then next IO's can continue fully aligned til the end. We might be sacrificing the case where the full unaligned IO would fit within a single max_io_length, but the sacrifice is well worth the elimination of double calculation and parity units IO. Actually the sacrificing is marginal and is almost unmeasurable. TODO: If we know the total expected linear segment that will be received, at pg_init, we could use that information in many places: 1. blocks-layout get_layout write segment size 2. Better mds-threshold 3. In above situation for a better clean split I will do this in future submission. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 55 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index f50d3e8d6f2..ea6d111b03e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, return false; return pgio->pg_count + req->wb_bytes <= - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + (unsigned long)pgio->pg_layout_private; +} + +void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; +} + +static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, + unsigned long *stripe_end) +{ + u32 stripe_off; + unsigned stripe_size; + + if (layout->raid_algorithm == PNFS_OSD_RAID_0) + return true; + + stripe_size = layout->stripe_unit * + (layout->group_width - layout->parity); + + div_u64_rem(offset, stripe_size, &stripe_off); + if (!stripe_off) + return true; + + *stripe_end = stripe_size - stripe_off; + return false; +} + +void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + unsigned long stripe_end = 0; + + pnfs_generic_pg_init_write(pgio, req); + if (unlikely(pgio->pg_lseg == NULL)) + return; /* Not pNFS */ + + if (req->wb_offset || + !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, + &OBJIO_LSEG(pgio->pg_lseg)->layout, + &stripe_end)) { + pgio->pg_layout_private = (void *)stripe_end; + } else { + pgio->pg_layout_private = (void *) + OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; + } } static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From 47fbf7976e0b7d9dcdd799e2a1baba19064d9631 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2012 16:03:13 -0400 Subject: NFSv4.1: Remove a bogus BUG_ON() in nfs4_layoutreturn_done Ever since commit 0a57cdac3f (NFSv4.1 send layoutreturn to fence disconnected data server) we've been sending layoutreturn calls while there is potentially still outstanding I/O to the data servers. The reason we do this is to avoid races between replayed writes to the MDS and the original writes to the DS. When this happens, the BUG_ON() in nfs4_layoutreturn_done can be triggered because it assumes that we would never call layoutreturn without knowing that all I/O to the DS is finished. The fix is to remove the BUG_ON() now that the assumptions behind the test are obsolete. Reported-by: Boaz Harrosh Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>=3.5] --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f94f6b3928f..c77d296bdaa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6359,12 +6359,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); -- cgit v1.2.3 From 41f63c5359d14ca995172b8f6eaffd93f60fec54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Aug 2012 10:30:47 -0700 Subject: workqueue: use mod_delayed_work() instead of cancel + queue Convert delayed_work users doing cancel_delayed_work() followed by queue_delayed_work() to mod_delayed_work(). Most conversions are straight-forward. Ones worth mentioning are, * drivers/edac/edac_mc.c: edac_mc_workq_setup() converted to always use mod_delayed_work() and cancel loop in edac_mc_reset_delay_period() is dropped. * drivers/platform/x86/thinkpad_acpi.c: No need to remember whether watchdog is active or not. @fan_watchdog_active and related code dropped. * drivers/power/charger-manager.c: Seemingly a lot of delayed_work_pending() abuse going on here. [delayed_]work_pending() are unsynchronized and racy when used like this. I converted one instance in fullbatt_handler(). Please conver the rest so that it invokes workqueue APIs for the intended target state rather than trying to game work item pending state transitions. e.g. if timer should be modified - call mod_delayed_work(), canceled - call cancel_delayed_work[_sync](). * drivers/thermal/thermal_sys.c: thermal_zone_device_set_polling() simplified. Note that round_jiffies() calls in this function are meaningless. round_jiffies() work on absolute jiffies not delta delay used by delayed_work. v2: Tomi pointed out that __cancel_delayed_work() users can't be safely converted to mod_delayed_work(). They could be calling it from irq context and if that happens while delayed_work_timer_fn() is running, it could deadlock. __cancel_delayed_work() users are dropped. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Acked-by: Dmitry Torokhov Acked-by: Anton Vorontsov Acked-by: David Howells Cc: Tomi Valkeinen Cc: Jens Axboe Cc: Jiri Kosina Cc: Doug Thompson Cc: David Airlie Cc: Roland Dreier Cc: "John W. Linville" Cc: Zhang Rui Cc: Len Brown Cc: "J. Bruce Fields" Cc: Johannes Berg --- fs/nfs/nfs4renewd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6930bec91bc..1720d32ffa5 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) timeout = 5 * HZ; dprintk("%s: requeueing work. Lease period = %ld\n", __func__, (timeout + HZ - 1) / HZ); - cancel_delayed_work(&clp->cl_renewd); - schedule_delayed_work(&clp->cl_renewd, timeout); + mod_delayed_work(system_wq, &clp->cl_renewd, timeout); set_bit(NFS_CS_RENEWD, &clp->cl_res_state); spin_unlock(&clp->cl_lock); } -- cgit v1.2.3 From 1ae811ee27912a0521e4b92dc9a1850c0243a247 Mon Sep 17 00:00:00 2001 From: "bjschuma@gmail.com" Date: Wed, 8 Aug 2012 13:57:06 -0400 Subject: NFS: Fix a regression when loading the NFS v4 module Some systems have a modprobe.d/nfs.conf file that sets an nfs4 alias pointing to nfs.ko, rather than nfs4.ko. This can prevent the v4 module from loading on mount, since the kernel sees that something named "nfs4" has already been loaded. To work around this, I've renamed the modules to "nfsv2.ko" "nfsv3.ko" and "nfsv4.ko". I also had to move the nfs4_fs_type back to nfs.ko to ensure that `mount -t nfs4` still works. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 18 +++++++++--------- fs/nfs/client.c | 2 +- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4super.c | 15 --------------- fs/nfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- 5 files changed, 49 insertions(+), 26 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8bf3a3f6925..b7db60897f9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o -obj-$(CONFIG_NFS_V2) += nfs2.o -nfs2-y := nfs2super.o proc.o nfs2xdr.o +obj-$(CONFIG_NFS_V2) += nfsv2.o +nfsv2-y := nfs2super.o proc.o nfs2xdr.o -obj-$(CONFIG_NFS_V3) += nfs3.o -nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o -nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o +obj-$(CONFIG_NFS_V3) += nfsv3.o +nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o +nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o -obj-$(CONFIG_NFS_V4) += nfs4.o -nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ +obj-$(CONFIG_NFS_V4) += nfsv4.o +nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs4getroot.o nfs4client.o -nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9fc0d9dfc91..99694442b93 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version) if (IS_ERR(nfs)) { mutex_lock(&nfs_version_mutex); - request_module("nfs%d", version); + request_module("nfsv%d", version); nfs = find_nfs_version(version); mutex_unlock(&nfs_version_mutex); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3b950dd81e8..da0618aeead 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); +/* super.c */ +extern struct file_system_type nfs4_fs_type; + /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 12a31a9dbcd..bd61221ad2c 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data); -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - static struct file_system_type nfs4_remote_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -344,14 +336,8 @@ static int __init init_nfs_v4(void) if (err) goto out1; - err = register_filesystem(&nfs4_fs_type); - if (err < 0) - goto out2; - register_nfs_version(&nfs_v4); return 0; -out2: - nfs4_unregister_sysctl(); out1: nfs_idmap_quit(); out: @@ -361,7 +347,6 @@ out: static void __exit exit_nfs_v4(void) { unregister_nfs_version(&nfs_v4); - unregister_filesystem(&nfs4_fs_type); nfs4_unregister_sysctl(); nfs_idmap_quit(); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac6a3c55dce..c4a15c55519 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops); static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); static int nfs4_validate_mount_data(void *options, struct nfs_parsed_mount_data *args, const char *dev_name); + +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .mount = nfs_fs_mount, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; +EXPORT_SYMBOL_GPL(nfs4_fs_type); + +static int __init register_nfs4_fs(void) +{ + return register_filesystem(&nfs4_fs_type); +} + +static void unregister_nfs4_fs(void) +{ + unregister_filesystem(&nfs4_fs_type); +} +#else +static int __init register_nfs4_fs(void) +{ + return 0; +} + +static void unregister_nfs4_fs(void) +{ +} #endif static struct shrinker acl_shrinker = { @@ -337,12 +365,18 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; - ret = nfs_register_sysctl(); + ret = register_nfs4_fs(); if (ret < 0) goto error_1; + + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_2; register_shrinker(&acl_shrinker); return 0; +error_2: + unregister_nfs4_fs(); error_1: unregister_filesystem(&nfs_fs_type); error_0: @@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void) { unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); + unregister_nfs4_fs(); unregister_filesystem(&nfs_fs_type); } -- cgit v1.2.3 From 425e776d93a7a5070b77d4f458a5bab0f924652c Mon Sep 17 00:00:00 2001 From: "bjschuma@gmail.com" Date: Wed, 8 Aug 2012 13:57:10 -0400 Subject: NFS: Alias the nfs module to nfs4 This allows distros to remove the line from their modprobe configuration. Signed-off-by: Bryan Schumaker Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c4a15c55519..239aff7338e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2680,4 +2680,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 519d3959e30a98f8e135e7a16647c10af5ad63d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:30:10 -0400 Subject: NFSv4: Fix pointer arithmetic in decode_getacl Resetting the cursor xdr->p to a previous value is not a safe practice: if the xdr_stream has crossed out of the initial iovec, then a bunch of other fields would need to be reset too. Fix this issue by using xdr_enter_page() so that the buffer gets page aligned at the bitmap _before_ we decode it. Also fix the confusion of the ACL length with the page buffer length by not adding the base offset to the ACL length... Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4xdr.c | 21 +++++++-------------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c77d296bdaa..286ab707841 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3819,7 +3819,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len - res.acl_data_offset; + acl_len = res.acl_len; if (acl_len > args.acl_len) nfs4_write_cached_acl(inode, NULL, 0, acl_len); else diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ca13483edd6..54d3f5a9faa 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5049,18 +5049,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, uint32_t attrlen, bitmap[3] = {0}; int status; - size_t page_len = xdr->buf->page_len; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + xdr_enter_page(xdr, xdr->buf->page_len); + bm_p = xdr->p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; - /* Check if the acl data starts beyond the allocated buffer */ - if (res->acl_data_offset > page_len) - return -ERANGE; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5074,23 +5070,20 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - xdr->p = bm_p; + res->acl_data_offset = (xdr->p - bm_p) << 2; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ - attrlen += res->acl_data_offset; - if (attrlen > page_len) { + res->acl_len = attrlen; + if (attrlen + res->acl_data_offset > xdr->buf->page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ - res->acl_len = attrlen; goto out; } - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, page_len); + dprintk("NFS: acl reply: attrlen %u > page_len %u\n", + attrlen, xdr->buf->page_len); return -EINVAL; } - xdr_read_pages(xdr, attrlen); - res->acl_len = attrlen; } else status = -EOPNOTSUPP; -- cgit v1.2.3 From b291f1b1c86aa0c7bc3df2994e6a1a4e53f1fde0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 18:30:41 -0400 Subject: NFSv4: Fix the acl cache size calculation Currently, we do not take into account the size of the 16 byte struct nfs4_cached_acl header, when deciding whether or not we should cache the acl data. Consequently, we will end up allocating an 8k buffer in order to fit a maximum size 4k acl. This patch adjusts the calculation so that we limit the cache size to 4k for the acl header+data. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 286ab707841..635274140b1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3737,9 +3737,10 @@ out: static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; + size_t buflen = sizeof(*acl) + acl_len; - if (pages && acl_len <= PAGE_SIZE) { - acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (pages && buflen <= PAGE_SIZE) { + acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; -- cgit v1.2.3 From cff298c721099c9ac4cea7196a37097ba2847946 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2012 17:14:17 -0400 Subject: NFSv4: Don't use private xdr_stream fields in decode_getacl Instead of using the private field xdr->p from struct xdr_stream, use the public xdr_stream_pos(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 54d3f5a9faa..1bfbd67c556 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5045,10 +5045,10 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_getaclres *res) { unsigned int savep; - __be32 *bm_p; uint32_t attrlen, bitmap[3] = {0}; int status; + unsigned int pg_offset; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -5056,7 +5056,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, xdr_enter_page(xdr, xdr->buf->page_len); - bm_p = xdr->p; + /* Calculate the offset of the page data */ + pg_offset = xdr->buf->head[0].iov_len; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; @@ -5070,18 +5071,18 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ - res->acl_data_offset = (xdr->p - bm_p) << 2; + res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ res->acl_len = attrlen; - if (attrlen + res->acl_data_offset > xdr->buf->page_len) { + if (attrlen > (xdr->nwords << 2)) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ goto out; } dprintk("NFS: acl reply: attrlen %u > page_len %u\n", - attrlen, xdr->buf->page_len); + attrlen, xdr->nwords << 2); return -EINVAL; } } else -- cgit v1.2.3 From c5066945b7ea346a11424dbeb7830b7d7d00c206 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:49 -0400 Subject: NFS: Clear key construction data if the idmap upcall fails idmap_pipe_downcall already clears this field if the upcall succeeds, but if it fails (rpc.idmapd isn't running) the field will still be set on the next call triggering a BUG_ON(). This patch tries to handle all possible ways that the upcall could fail and clear the idmap key data for each one. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b701358c39c..6703c73307a 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -61,6 +61,12 @@ struct idmap { struct mutex idmap_mutex; }; +struct idmap_legacy_upcalldata { + struct rpc_pipe_msg pipe_msg; + struct idmap_msg idmap_msg; + struct idmap *idmap; +}; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields * @fattr: fully initialised struct nfs_fattr @@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); + idmap->idmap_key_cons = NULL; mutex_unlock(&idmap->idmap_mutex); } return ret; @@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = { static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); +static void idmap_release_pipe(struct inode *); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, + .release_pipe = idmap_release_pipe, .destroy_msg = idmap_pipe_destroy_msg, }; @@ -616,7 +625,8 @@ void nfs_idmap_quit(void) nfs_idmap_quit_keyring(); } -static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, +static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, + struct idmap_msg *im, struct rpc_pipe_msg *msg) { substring_t substr; @@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) { + struct idmap_legacy_upcalldata *data; struct rpc_pipe_msg *msg; struct idmap_msg *im; struct idmap *idmap = (struct idmap *)aux; @@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - goto out0; - - im = kmalloc(sizeof(*im), GFP_KERNEL); - if (!im) + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) goto out1; - ret = nfs_idmap_prepare_message(key->description, im, msg); + msg = &data->pipe_msg; + im = &data->idmap_msg; + data->idmap = idmap; + + ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) goto out2; @@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - goto out2; + goto out3; return ret; +out3: + idmap->idmap_key_cons = NULL; out2: - kfree(im); + kfree(data); out1: - kfree(msg); -out0: complete_request_key(cons, ret); return ret; } @@ -775,9 +786,26 @@ out_incomplete: static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct idmap_legacy_upcalldata *data = container_of(msg, + struct idmap_legacy_upcalldata, + pipe_msg); + struct idmap *idmap = data->idmap; + struct key_construction *cons; + if (msg->errno) { + cons = ACCESS_ONCE(idmap->idmap_key_cons); + idmap->idmap_key_cons = NULL; + complete_request_key(cons, msg->errno); + } /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(msg->data); - kfree(msg); + kfree(data); +} + +static void +idmap_release_pipe(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + idmap->idmap_key_cons = NULL; } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -- cgit v1.2.3 From 12dfd080556124088ed61a292184947711b46cbe Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:50 -0400 Subject: NFS: return -ENOKEY when the upcall fails to map the name This allows the normal error-paths to handle the error, rather than making a special call to complete_request_key() just for this instance. Signed-off-by: Bryan Schumaker Tested-by: William Dauchy Cc: stable@vger.kernel.org [>= 3.4] Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 6703c73307a..a850079467d 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -760,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); @@ -779,7 +778,6 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) out: complete_request_key(cons, ret); -out_incomplete: return ret; } -- cgit v1.2.3 From 7653f6ff4ebab2a094e65b60fb19ee66ed2f78e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:12:29 -0400 Subject: NFSv4: Ensure that nfs4_alloc_client cleans up on error. Any pointer that was allocated through nfs_alloc_client() needs to be freed via a call to nfs_free_client(). Reported-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index cbcdfaf3250..24eb663f8ed 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) return clp; error: - kfree(clp); + nfs_free_client(clp); return ERR_PTR(err); } -- cgit v1.2.3 From 086600430493e04b802bee6e5b3ce0458e4eb77f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Aug 2012 12:42:15 -0400 Subject: NFSv3: Ensure that do_proc_get_root() reports errors correctly If the rpc call to NFS3PROC_FSINFO fails, then we need to report that error so that the mount fails. Otherwise we can end up with a superblock with completely unusable values for block sizes, maxfilesize, etc. Reported-by: Yuanming Chen Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 0952c791df3..d6b3b5f2d77 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); -- cgit v1.2.3 From 5b444cc9a4c979aa0fa185c8ddca221462a34b7a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 17 Aug 2012 21:47:53 -0400 Subject: svcrpc: remove handling of unknown errors from svc_recv svc_recv() returns only -EINTR or -EAGAIN. If we really want to worry about the case where it has a bug that causes it to return something else, we could stick a WARN() in svc_recv. But it's silly to require every caller to have all this boilerplate to handle that case. Signed-off-by: J. Bruce Fields --- fs/nfs/callback.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 4c8459e5bde..d9e2a188078 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport6; static int nfs4_callback_svc(void *vrqstp) { - int err, preverr = 0; + int err; struct svc_rqst *rqstp = vrqstp; set_freezable(); @@ -55,20 +55,8 @@ nfs4_callback_svc(void *vrqstp) * Listen for a request on the socket */ err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT); - if (err == -EAGAIN || err == -EINTR) { - preverr = err; + if (err == -EAGAIN || err == -EINTR) continue; - } - if (err < 0) { - if (err != preverr) { - printk(KERN_WARNING "NFS: %s: unexpected error " - "from svc_recv (%d)\n", __func__, err); - preverr = err; - } - schedule_timeout_uninterruptible(HZ); - continue; - } - preverr = err; svc_process(rqstp); } return 0; -- cgit v1.2.3 From 1856b225ca1f80446938c9ec4a0b330c1772ec45 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Sat, 18 Aug 2012 17:38:54 +0200 Subject: nfs: comment fix Signed-off-by: Peter Meerwald Signed-off-by: Jiri Kosina --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8b2a2977b72..120d8e98ee5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1590,7 +1590,7 @@ static int nfs_parse_mount_options(char *raw, /* * verify that any proto=/mountproto= options match the address - * familiies in the addr=/mountaddr= options. + * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && protofamily != mnt->nfs_server.address.ss_family) -- cgit v1.2.3 From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2012 14:56:02 -0400 Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array When the NFS_COOKIEVERF helper macro was converted into a static inline function in commit 99fadcd764 (nfs: convert NFS_*(inode) helpers to static inline), we broke the initialisation of the readdir cookies, since that depended on doing a memset with an argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *). At this point, NFS_COOKIEVERF seems to be more of an obfuscation than a helper, so the best thing would be to just get rid of it. Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881 Reported-by: Andi Kleen Reported-by: David Binderman Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c6e895f0fbf..9b47610338f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d6b3b5f2d77..69322096c32 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - __be32 *verf = NFS_COOKIEVERF(dir); + __be32 *verf = NFS_I(dir)->cookieverf; struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 635274140b1..86b4c736103 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } -- cgit v1.2.3 From 872ece86ea5c367aa92f44689c2d01a1c767aeb3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Sep 2012 11:05:07 -0400 Subject: NFS: Fix a problem with the legacy binary mount code Apparently, am-utils is still using the legacy binary mountdata interface, and is having trouble parsing /proc/mounts due to the 'port=' field being incorrectly set. The following patch should fix up the regression. Reported-by: Marius Tolzmann Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 239aff7338e..b8eda700584 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -2564,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options, return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { if (data->auth_flavourlen > 1) -- cgit v1.2.3 From 21f498c2f73bd6150d82931f09965826dca0b5f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Aug 2012 10:59:25 -0400 Subject: NFSv4: Fix range checking in __nfs4_get_acl_uncached and __nfs4_proc_set_acl Ensure that the user supplied buffer size doesn't cause us to overflow the 'pages' array. Also fix up some confusion between the use of PAGE_SIZE and PAGE_CACHE_SIZE when calculating buffer sizes. We're not using the page cache for anything here. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 86b4c736103..6b94f2d5253 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); } -/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that - * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on * the stack. */ -#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) +#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) @@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, spages = pages; do { - len = min_t(size_t, PAGE_CACHE_SIZE, buflen); + len = min_t(size_t, PAGE_SIZE, buflen); newpage = alloc_page(GFP_KERNEL); if (newpage == NULL) @@ -3782,17 +3782,16 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + int ret = -ENOMEM, i; size_t acl_len = 0; - npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ if (npages == 0) npages = 1; - - /* Add an extra page to handle the bitmap returned */ - npages++; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3891,10 +3890,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .rpc_argp = &arg, .rpc_resp = &res, }; + unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + if (npages > ARRAY_SIZE(pages)) + return -ERANGE; i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); if (i < 0) return i; -- cgit v1.2.3 From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 26 Aug 2012 11:44:43 -0700 Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached so that it knows if the acl has been truncated. The current overflow checking is broken, resulting in Oopses on user-triggered nfs4_getfacl calls, and is opaque to the point where several attempts at fixing it have failed. This patch tries to clean up the code in addition to fixing the Oopses by ensuring that the overflow checks are performed in a single place (decode_getacl). If the overflow check failed, we will still be able to report the acl length, but at least we will no longer attempt to cache the acl or copy the truncated contents to user space. Reported-by: Sachin Prabhu Signed-off-by: Trond Myklebust Tested-by: Sachin Prabhu --- fs/nfs/nfs4proc.c | 31 ++++++++++++------------------- fs/nfs/nfs4xdr.c | 14 +++++--------- 2 files changed, 17 insertions(+), 28 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b94f2d5253..1e50326d00d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3739,7 +3739,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size struct nfs4_cached_acl *acl; size_t buflen = sizeof(*acl) + acl_len; - if (pages && buflen <= PAGE_SIZE) { + if (buflen <= PAGE_SIZE) { acl = kmalloc(buflen, GFP_KERNEL); if (acl == NULL) goto out; @@ -3784,7 +3784,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu }; unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret = -ENOMEM, i; - size_t acl_len = 0; /* As long as we're doing a round trip to the server anyway, * let's be prepared for a page of acl data. */ @@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu args.acl_len = npages * PAGE_SIZE; args.acl_pgbase = 0; - /* Let decode_getfacl know not to fail if the ACL data is larger than - * the page we send as a guess */ - if (buf == NULL) - res.acl_flags |= NFS4_ACL_LEN_REQUEST; - dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), @@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (ret) goto out_free; - acl_len = res.acl_len; - if (acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, 0, acl_len); - else - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, - acl_len); - if (buf) { + /* Handle the case where the passed-in buffer is too short */ + if (res.acl_flags & NFS4_ACL_TRUNC) { + /* Did the user only issue a request for the acl length? */ + if (buf == NULL) + goto out_ok; ret = -ERANGE; - if (acl_len > buflen) - goto out_free; - _copy_from_pages(buf, pages, res.acl_data_offset, - acl_len); + goto out_free; } - ret = acl_len; + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + if (buf) + _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); +out_ok: + ret = res.acl_len; out_free: for (i = 0; i < npages; i++) if (pages[i]) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1bfbd67c556..541e796e6db 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5072,18 +5072,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, * are stored with the acl data to handle the problem of * variable length bitmaps.*/ res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; - - /* We ignore &savep and don't do consistency checks on - * the attr length. Let userspace figure it out.... */ res->acl_len = attrlen; - if (attrlen > (xdr->nwords << 2)) { - if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { - /* getxattr interface called with a NULL buf */ - goto out; - } + + /* Check for receive buffer overflow */ + if (res->acl_len > (xdr->nwords << 2) || + res->acl_len + res->acl_data_offset > xdr->buf->page_len) { + res->acl_flags |= NFS4_ACL_TRUNC; dprintk("NFS: acl reply: attrlen %u > page_len %u\n", attrlen, xdr->nwords << 2); - return -EINVAL; } } else status = -EOPNOTSUPP; -- cgit v1.2.3 From 01913b49cf1dc6409a07dd2a4cc6af2e77f3c410 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 6 Sep 2012 15:54:27 -0400 Subject: NFS: return error from decode_getfh in decode open If decode_getfh failed, nfs4_xdr_dec_open would return 0 since the last decode_* call must have succeeded. Cc: stable@vger.kernel.org Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 541e796e6db..8dba6bd4855 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6225,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(xdr, &res->fh) != 0) + status = decode_getfh(xdr, &res->fh); + if (status) goto out; decode_getfattr(xdr, res->f_attr, res->server); out: -- cgit v1.2.3 From 7b281ee026552f10862b617a2a51acf49c829554 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 15:38:32 -0400 Subject: NFS: fsync() must exit with an error if page writeback failed We need to ensure that if the call to filemap_write_and_wait_range() fails, then we report that error back to the application. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 +++- fs/nfs/nfs4file.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 75d6d0a3d32..6a7fcab7ecb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_path.dentry->d_inode; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + goto out; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); mutex_unlock(&inode->i_mutex); - +out: return ret; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index acb65e7887f..eb5eb8eef4d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_path.dentry->d_inode; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + goto out; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); if (!ret && !datasync) /* application has asked for meta-data sync */ ret = pnfs_layoutcommit_inode(inode, true); mutex_unlock(&inode->i_mutex); - +out: return ret; } -- cgit v1.2.3 From 5f3a4a28ec140a90e6058d1d09f6b1f235d485e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Sep 2012 20:17:44 -0700 Subject: userns: Pass a userns parameter into posix_acl_to_xattr and posix_acl_from_xattr - Pass the user namespace the uid and gid values in the xattr are stored in into posix_acl_from_xattr. - Pass the user namespace kuid and kgid values should be converted into when storing uid and gid values in an xattr in posix_acl_to_xattr. - Modify all callers of posix_acl_from_xattr and posix_acl_to_xattr to pass in &init_user_ns. In the short term this change is not strictly needed but it makes the code clearer. In the longer term this change is necessary to be able to mount filesystems outside of the initial user namespace that natively store posix acls in the linux xattr format. Cc: Theodore Tso Cc: Andrew Morton Cc: Andreas Dilger Cc: Jan Kara Cc: Al Viro Signed-off-by: "Eric W. Biederman" --- fs/nfs/nfs3acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e4498dc351a..4a1aafba6a2 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, if (type == ACL_TYPE_ACCESS && acl->a_count == 0) error = -ENODATA; else - error = posix_acl_to_xattr(acl, buffer, size); + error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); } else error = -ENODATA; @@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name, else return -EOPNOTSUPP; - acl = posix_acl_from_xattr(value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); error = nfs3_proc_setacl(inode, type, acl); -- cgit v1.2.3 From e8d920c58ddb45126e1b306854f6e34b88446baf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 21 Sep 2012 12:27:41 +0800 Subject: NFS: fix the return value check by using IS_ERR In case of error, the function rpcauth_create() returns ERR_PTR() and never returns NULL pointer. The NULL test in the return value check should be replaced with IS_ERR(). dpatch engine is used to auto generated this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 2 +- fs/nfs/nfs4proc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 017b4b01a69..398d5fd7415 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -205,7 +205,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino return clone; auth = rpcauth_create(flavor, clone); - if (!auth) { + if (IS_ERR(auth)) { rpc_shutdown_client(clone); clone = ERR_PTR(-EIO); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1e50326d00d..ddfebb12801 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2399,7 +2399,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl int ret; auth = rpcauth_create(flavor, server->client); - if (!auth) { + if (IS_ERR(auth)) { ret = -EIO; goto out; } -- cgit v1.2.3 From 62d98c935456ee121b03d6a68aa3091a04085b7e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Sep 2012 16:46:34 +1000 Subject: NFS4: avoid underflow when converting error to pointer. In nfs4_create_sec_client, 'flavor' can hold a negative error code (returned from nfs4_negotiate_security), even though it is an 'enum' and hence unsigned. The code is careful to cast it to an (int) before testing if it is negative, however it doesn't cast to an (int) before calling ERR_PTR. On a machine where "void*" is larger than "int", this results in the unsigned equivalent of -1 (e.g. 0xffffffff) being converted to a pointer. Subsequent code determines that this is not negative, and so dereferences it with predictable results. So: cast 'flavor' to a (signed) int before passing to ERR_PTR. cc: Benny Halevy Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 398d5fd7415..4fdeb1b7042 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -198,7 +198,7 @@ struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *ino flavor = nfs4_negotiate_security(inode, name); if ((int)flavor < 0) - return ERR_PTR(flavor); + return ERR_PTR((int)flavor); clone = rpc_clone_client(clnt); if (IS_ERR(clone)) -- cgit v1.2.3 From 13fe4ba1b64c099843c75b4f0633ad30a4526637 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Aug 2012 14:21:12 -0400 Subject: NFSv4.1: decode_getdeviceinfo should check xdr_read_pages() return value Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8dba6bd4855..a756349b0fa 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5642,7 +5642,8 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, * and places the remaining xdr data in xdr_buf->tail */ pdev->mincount = be32_to_cpup(p); - xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ + if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount) + goto out_overflow; /* Parse notification bitmap, verifying that it is zero. */ p = xdr_inline_decode(xdr, 4); -- cgit v1.2.3 From 0e24d849c4ea777c59955b241fd3af14a1b84af5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2012 12:03:09 -0400 Subject: NFSv4: Remove BUG_ON() and ACCESS_ONCE() calls in the idmapper The use of ACCESS_ONCE() is wrong, since the various routines that set/clear idmap->idmap_key_cons should be strictly ordered w.r.t. each other, and the idmap->idmap_mutex ensures that only one thread at a time may be in an upcall situation. Also replace the BUG_ON()s with WARN_ON_ONCE() where appropriate. Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a850079467d..79f6424aa08 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -465,8 +465,6 @@ nfs_idmap_new(struct nfs_client *clp) struct rpc_pipe *pipe; int error; - BUG_ON(clp->cl_idmap != NULL); - idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); if (idmap == NULL) return -ENOMEM; @@ -510,7 +508,6 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, switch (event) { case RPC_PIPEFS_MOUNT: - BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, clp->cl_idmap, clp->cl_idmap->idmap_pipe); @@ -689,7 +686,11 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; - BUG_ON(idmap->idmap_key_cons != NULL); + if (idmap->idmap_key_cons != NULL) { + WARN_ON_ONCE(1); + ret = -EAGAIN; + goto out2; + } idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); @@ -746,7 +747,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ - cons = ACCESS_ONCE(idmap->idmap_key_cons); + cons = idmap->idmap_key_cons; idmap->idmap_key_cons = NULL; if (mlen != sizeof(im)) { @@ -790,7 +791,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct idmap *idmap = data->idmap; struct key_construction *cons; if (msg->errno) { - cons = ACCESS_ONCE(idmap->idmap_key_cons); + cons = idmap->idmap_key_cons; idmap->idmap_key_cons = NULL; complete_request_key(cons, msg->errno); } -- cgit v1.2.3 From e9ab41b620e4b679ed069ab05cb85e67870b7c87 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Sep 2012 15:44:19 -0400 Subject: NFSv4: Clean up the legacy idmapper upcall Replace the BUG_ON(idmap->idmap_key_cons != NULL) with a WARN_ON_ONCE(). Then get rid of the ACCESS_ONCE(idmap->idmap_key_cons). Then add helper functions for starting, finishing and aborting the legacy upcall. Signed-off-by: Trond Myklebust Cc: Bryan Schumaker --- fs/nfs/idmap.c | 65 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 79f6424aa08..8222ad86145 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -330,7 +330,6 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, name, namelen, type, data, data_size, idmap); - idmap->idmap_key_cons = NULL; mutex_unlock(&idmap->idmap_mutex); } return ret; @@ -662,6 +661,34 @@ out: return ret; } +static bool +nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, + struct key_construction *cons) +{ + if (idmap->idmap_key_cons != NULL) { + WARN_ON_ONCE(1); + return false; + } + idmap->idmap_key_cons = cons; + return true; +} + +static void +nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) +{ + struct key_construction *cons = idmap->idmap_key_cons; + + idmap->idmap_key_cons = NULL; + complete_request_key(cons, ret); +} + +static void +nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) +{ + if (idmap->idmap_key_cons != NULL) + nfs_idmap_complete_pipe_upcall_locked(idmap, ret); +} + static int nfs_idmap_legacy_upcall(struct key_construction *cons, const char *op, void *aux) @@ -686,21 +713,17 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, if (ret < 0) goto out2; - if (idmap->idmap_key_cons != NULL) { - WARN_ON_ONCE(1); - ret = -EAGAIN; + ret = -EAGAIN; + if (!nfs_idmap_prepare_pipe_upcall(idmap, cons)) goto out2; - } - idmap->idmap_key_cons = cons; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); - if (ret < 0) - goto out3; + if (ret < 0) { + nfs_idmap_abort_pipe_upcall(idmap, ret); + kfree(data); + } return ret; - -out3: - idmap->idmap_key_cons = NULL; out2: kfree(data); out1: @@ -741,14 +764,15 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct key_construction *cons; struct idmap_msg im; size_t namelen_in; - int ret; + int ret = -ENOKEY; /* If instantiation is successful, anyone waiting for key construction * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ cons = idmap->idmap_key_cons; - idmap->idmap_key_cons = NULL; + if (cons == NULL) + goto out_noupcall; if (mlen != sizeof(im)) { ret = -ENOSPC; @@ -778,7 +802,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } out: - complete_request_key(cons, ret); + nfs_idmap_complete_pipe_upcall_locked(idmap, ret); +out_noupcall: return ret; } @@ -789,12 +814,9 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct idmap_legacy_upcalldata, pipe_msg); struct idmap *idmap = data->idmap; - struct key_construction *cons; - if (msg->errno) { - cons = idmap->idmap_key_cons; - idmap->idmap_key_cons = NULL; - complete_request_key(cons, msg->errno); - } + + if (msg->errno) + nfs_idmap_abort_pipe_upcall(idmap, msg->errno); /* Free memory allocated in nfs_idmap_legacy_upcall() */ kfree(data); } @@ -804,7 +826,8 @@ idmap_release_pipe(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); struct idmap *idmap = (struct idmap *)rpci->private; - idmap->idmap_key_cons = NULL; + + nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -- cgit v1.2.3 From 0cac120233305b614cfe3ad419f3655876066017 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Sep 2012 16:15:00 -0400 Subject: NFSv4: Ensure that idmap_pipe_downcall sanity-checks the downcall data Use the idmapper upcall data to verify that the legacy idmapper daemon is indeed responding to an upcall that we sent. Signed-off-by: Trond Myklebust Cc: Bryan Schumaker --- fs/nfs/idmap.c | 62 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 8222ad86145..7ac93e0dd4c 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -55,18 +55,19 @@ static const struct cred *id_resolver_cache; static struct key_type key_type_id_resolver_legacy; -struct idmap { - struct rpc_pipe *idmap_pipe; - struct key_construction *idmap_key_cons; - struct mutex idmap_mutex; -}; - struct idmap_legacy_upcalldata { struct rpc_pipe_msg pipe_msg; struct idmap_msg idmap_msg; + struct key_construction *key_cons; struct idmap *idmap; }; +struct idmap { + struct rpc_pipe *idmap_pipe; + struct idmap_legacy_upcalldata *idmap_upcall_data; + struct mutex idmap_mutex; +}; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields * @fattr: fully initialised struct nfs_fattr @@ -663,29 +664,30 @@ out: static bool nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, - struct key_construction *cons) + struct idmap_legacy_upcalldata *data) { - if (idmap->idmap_key_cons != NULL) { + if (idmap->idmap_upcall_data != NULL) { WARN_ON_ONCE(1); return false; } - idmap->idmap_key_cons = cons; + idmap->idmap_upcall_data = data; return true; } static void nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) { - struct key_construction *cons = idmap->idmap_key_cons; + struct key_construction *cons = idmap->idmap_upcall_data->key_cons; - idmap->idmap_key_cons = NULL; + kfree(idmap->idmap_upcall_data); + idmap->idmap_upcall_data = NULL; complete_request_key(cons, ret); } static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) { - if (idmap->idmap_key_cons != NULL) + if (idmap->idmap_upcall_data != NULL) nfs_idmap_complete_pipe_upcall_locked(idmap, ret); } @@ -714,14 +716,12 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, goto out2; ret = -EAGAIN; - if (!nfs_idmap_prepare_pipe_upcall(idmap, cons)) + if (!nfs_idmap_prepare_pipe_upcall(idmap, data)) goto out2; ret = rpc_queue_upcall(idmap->idmap_pipe, msg); - if (ret < 0) { + if (ret < 0) nfs_idmap_abort_pipe_upcall(idmap, ret); - kfree(data); - } return ret; out2: @@ -738,21 +738,32 @@ static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *dat authkey); } -static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) +static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, + struct idmap_msg *upcall, + struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; - int ret = -EINVAL; + int ret = -ENOKEY; + /* ret = -ENOKEY */ + if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv) + goto out; switch (im->im_conv) { case IDMAP_CONV_NAMETOID: + if (strcmp(upcall->im_name, im->im_name) != 0) + break; sprintf(id_str, "%d", im->im_id); ret = nfs_idmap_instantiate(key, authkey, id_str); break; case IDMAP_CONV_IDTONAME: + if (upcall->im_id != im->im_id) + break; ret = nfs_idmap_instantiate(key, authkey, im->im_name); break; + default: + ret = -EINVAL; } - +out: return ret; } @@ -770,10 +781,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ - cons = idmap->idmap_key_cons; - if (cons == NULL) + if (idmap->idmap_upcall_data == NULL) goto out_noupcall; + cons = idmap->idmap_upcall_data->key_cons; + if (mlen != sizeof(im)) { ret = -ENOSPC; goto out; @@ -793,9 +805,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { ret = -EINVAL; goto out; - } +} - ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); + ret = nfs_idmap_read_and_verify_message(&im, + &idmap->idmap_upcall_data->idmap_msg, + cons->key, cons->authkey); if (ret >= 0) { key_set_timeout(cons->key, nfs_idmap_cache_timeout); ret = mlen; @@ -817,8 +831,6 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno) nfs_idmap_abort_pipe_upcall(idmap, msg->errno); - /* Free memory allocated in nfs_idmap_legacy_upcall() */ - kfree(data); } static void -- cgit v1.2.3 From b3c54de6f82d01637796bcc1f667a45f3b32e814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2012 17:15:50 -0400 Subject: NFS: Convert nfs_get_lock_context to return an ERR_PTR on failure We want to be able to distinguish between allocation failures, and the case where the lock context is not needed (because there are no locks). Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 16 ++++++++++++---- fs/nfs/inode.c | 2 +- fs/nfs/pagelist.c | 8 +++++--- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ba385b7c90..22130df1621 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -450,6 +450,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (dreq == NULL) @@ -457,9 +458,12 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -849,6 +853,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; dreq = nfs_direct_req_alloc(); if (!dreq) @@ -856,9 +861,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - dreq->l_ctx = nfs_get_lock_context(dreq->ctx); - if (dreq->l_ctx == NULL) + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); goto out_release; + } + dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f..b5e2913dff2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -578,7 +578,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) spin_unlock(&inode->i_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) - return NULL; + return ERR_PTR(-ENOMEM); nfs_init_lock_context(new); spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 311a79681e2..dfd764bd943 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -102,6 +102,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, unsigned int offset, unsigned int count) { struct nfs_page *req; + struct nfs_lock_context *l_ctx; /* try to allocate the request struct */ req = nfs_page_alloc(); @@ -109,11 +110,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_PTR(-ENOMEM); /* get lock context early so we can deal with alloc failures */ - req->wb_lock_context = nfs_get_lock_context(ctx); - if (req->wb_lock_context == NULL) { + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) { nfs_page_free(req); - return ERR_PTR(-ENOMEM); + return ERR_CAST(l_ctx); } + req->wb_lock_context = l_ctx; /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in -- cgit v1.2.3 From 2a369153c82e0c83621b3e71d8f0c53394705bda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2012 18:54:45 -0400 Subject: NFS: Clean up helper function nfs4_select_rw_stateid() We want to be able to pass on the information that the page was not dirtied under a lock. Instead of adding a flag parameter, do this by passing a pointer to a 'struct nfs_lock_owner' that may be NULL. Also reuse this structure in struct nfs_lock_context to carry the fl_owner_t and pid_t. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 8 ++++---- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 6 +++++- fs/nfs/nfs4state.c | 14 +++++++++++--- fs/nfs/nfs4xdr.c | 6 +++++- fs/nfs/pagelist.c | 4 +++- fs/nfs/write.c | 10 +++++++--- 7 files changed, 36 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b5e2913dff2..126a4cbbb98 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -547,8 +547,8 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { atomic_set(&l_ctx->count, 1); - l_ctx->lockowner = current->files; - l_ctx->pid = current->tgid; + l_ctx->lockowner.l_owner = current->files; + l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); } @@ -557,9 +557,9 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context struct nfs_lock_context *pos; list_for_each_entry(pos, &ctx->lock_context.list, list) { - if (pos->lockowner != current->files) + if (pos->lockowner.l_owner != current->files) continue; - if (pos->pid != current->tgid) + if (pos->lockowner.l_pid != current->tgid) continue; atomic_inc(&pos->count); return pos; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index da0618aeead..d95e25ec357 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -351,7 +351,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, - fmode_t, fl_owner_t, pid_t); + fmode_t, const struct nfs_lockowner *); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ddfebb12801..f19ea4f0f0c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2013,8 +2013,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); if (state != NULL) { + struct nfs_lockowner lockowner = { + .l_owner = current->files, + .l_pid = current->tgid, + }; nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - current->files, current->tgid); + &lockowner); } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, FMODE_WRITE)) { /* Use that stateid */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 55148def554..03a4e7825f3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -911,14 +911,22 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) } static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fl_owner_t fl_owner, pid_t fl_pid) + const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; + fl_owner_t fl_owner; + pid_t fl_pid; bool ret = false; + + if (lockowner == NULL) + goto out; + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; + fl_owner = lockowner->l_owner; + fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { @@ -946,11 +954,11 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) * requests. */ void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, - fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) + fmode_t fmode, const struct nfs_lockowner *lockowner) { if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) return; - if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) + if (nfs4_copy_lock_stateid(dst, state, lockowner)) return; nfs4_copy_open_stateid(dst, state); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a756349b0fa..7ab29abb316 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1509,8 +1509,12 @@ static void encode_open_stateid(struct xdr_stream *xdr, nfs4_stateid stateid; if (ctx->state != NULL) { + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, l_ctx->lockowner, l_ctx->pid); + fmode, lockowner); if (zero_seqid) stateid.seqid = 0; encode_nfs4_stateid(xdr, &stateid); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index dfd764bd943..e56e846e9d2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -292,7 +292,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, { if (req->wb_context->cred != prev->wb_context->cred) return false; - if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) + if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner) + return false; + if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid) return false; if (req->wb_context->state != prev->wb_context->state) return false; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e3b55372726..e1b5fe4d873 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -846,6 +846,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_lock_context *l_ctx; struct nfs_page *req; int do_flush, status; /* @@ -860,9 +861,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page) req = nfs_page_find_request(page); if (req == NULL) return 0; - do_flush = req->wb_page != page || req->wb_context != ctx || - req->wb_lock_context->lockowner != current->files || - req->wb_lock_context->pid != current->tgid; + l_ctx = req->wb_lock_context; + do_flush = req->wb_page != page || req->wb_context != ctx; + if (l_ctx) { + do_flush |= l_ctx->lockowner.l_owner != current->files + || l_ctx->lockowner.l_pid != current->tgid; + } nfs_release_request(req); if (!do_flush) return 0; -- cgit v1.2.3 From 795a88c968eef031f370973512b42124bacb2f17 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 10 Sep 2012 13:26:49 -0400 Subject: NFSv4: Convert the nfs4_lock_state->ls_flags to a bit field Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4proc.c | 10 +++++----- fs/nfs/nfs4state.c | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d95e25ec357..71d407fd00a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -132,8 +132,8 @@ struct nfs4_lock_owner { struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ -#define NFS_LOCK_INITIALIZED 1 - int ls_flags; +#define NFS_LOCK_INITIALIZED 0 + unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; atomic_t ls_count; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f19ea4f0f0c..cf2fd5d0c1b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4395,7 +4395,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ task->tk_action = NULL; return; @@ -4589,7 +4589,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) } if (data->rpc_status == 0) { nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); - data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; + set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } out: @@ -4636,7 +4636,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ case -NFS4ERR_BAD_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || - (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) nfs4_schedule_stateid_recovery(server, lsp->ls_state); break; case -NFS4ERR_STALE_STATEID: @@ -4760,7 +4760,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) struct nfs_server *server = NFS_SERVER(state->inode); list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { status = nfs41_test_stateid(server, &lsp->ls_stateid); if (status != NFS_OK) { /* Free the stateid unless the server @@ -4768,7 +4768,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, &lsp->ls_stateid); - lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; + clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); ret = status; } } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 03a4e7825f3..fc6cfe68ad1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -865,7 +865,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { if (nfs4_release_lockowner(lsp) == 0) return; } @@ -929,7 +929,7 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { + if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = true; } @@ -1297,7 +1297,7 @@ restart: if (status >= 0) { spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) pr_warn_ratelimited("NFS: " "%s: Lock reclaim " "failed!\n", __func__); @@ -1369,7 +1369,7 @@ static void nfs4_clear_open_state(struct nfs4_state *state) spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { lock->ls_seqid.flags = 0; - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags); } spin_unlock(&state->state_lock); } -- cgit v1.2.3 From 05990d1bf2708b9e84d67074551f964d3738eedc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 16:01:22 -0400 Subject: NFS: Fix fdatasync/fsync() when confronted with a server reboot If the server reboots before it can commit the unstable writes to disk, then nfs_commit_release_pages() will detect this when it compares the verifier returned by COMMIT to the one returned by WRITE. When this happens, the client needs to resend those writes in order to guarantee that they make it to stable storage. This patch adds a signalling mechanism to notify fsync() that it needs to retry all writes before it can exit. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 34 ++++++++++++++++++++++------------ fs/nfs/nfs4file.c | 22 ++++++++++++---------- fs/nfs/write.c | 1 + 3 files changed, 35 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6a7fcab7ecb..cc9b56691be 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -259,7 +259,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int have_error, status; + int have_error, do_resend, status; int ret = 0; dprintk("NFS: fsync file(%s/%s) datasync %d\n", @@ -267,15 +267,23 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status >= 0 && ret < 0) - status = ret; have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) + if (have_error) { ret = xchg(&ctx->error, 0); - if (!ret && status < 0) + if (ret) + goto out; + } + if (status < 0) { ret = status; + goto out; + } + do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); + if (do_resend) + ret = -EAGAIN; +out: return ret; } EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); @@ -286,13 +294,15 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file->f_path.dentry->d_inode; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret != 0) - goto out; - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - mutex_unlock(&inode->i_mutex); -out: + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + mutex_unlock(&inode->i_mutex); + } while (ret == -EAGAIN); + return ret; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index eb5eb8eef4d..eef1b38a1b0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -95,16 +95,18 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file->f_path.dentry->d_inode; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret != 0) - goto out; - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); - mutex_unlock(&inode->i_mutex); -out: + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + } while (ret == -EAGAIN); + return ret; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1b5fe4d873..9347ab7c957 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1580,6 +1580,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); } -- cgit v1.2.3 From dcfc4f25461813e8a2dd43b052aa1e0be155742f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 16:19:38 -0400 Subject: NFS: Write the entire file if a server reboot occurs during fsync() This is to ensure that we don't clear the NFS_CONTEXT_RESEND_WRITES flag while there are still writes that haven't been resent. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 7 +++++++ fs/nfs/nfs4file.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc9b56691be..c814666bbe7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -301,6 +301,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); mutex_unlock(&inode->i_mutex); + /* + * If nfs_file_fsync_commit detected a server reboot, then + * resend all dirty pages that might have been covered by + * the NFS_CONTEXT_RESEND_WRITES flag + */ + start = 0; + end = LLONG_MAX; } while (ret == -EAGAIN); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index eef1b38a1b0..afddd6639af 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -105,6 +105,13 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) /* application has asked for meta-data sync */ ret = pnfs_layoutcommit_inode(inode, true); mutex_unlock(&inode->i_mutex); + /* + * If nfs_file_fsync_commit detected a server reboot, then + * resend all dirty pages that might have been covered by + * the NFS_CONTEXT_RESEND_WRITES flag + */ + start = 0; + end = LLONG_MAX; } while (ret == -EAGAIN); return ret; -- cgit v1.2.3 From a0b0a6e39bd1bb4a0922086feee73627cbd53ba4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Sep 2012 17:12:15 -0400 Subject: NFS: Clean up the pNFS layoutget interface Ensure that we do return errors from nfs4_proc_layoutget() and that we don't mark the layout as having failed if the error was due to a signal or resource problem on the client side. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 +++++++++----- fs/nfs/pnfs.c | 25 ++++++++++++++++--------- fs/nfs/pnfs.h | 4 ++-- 3 files changed, 27 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf2fd5d0c1b..1c8656f8745 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +struct pnfs_layout_segment * +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6303,6 +6304,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) .callback_data = lgp, .flags = RPC_TASK_ASYNC, }; + struct pnfs_layout_segment *lseg = NULL; int status = 0; dprintk("--> %s\n", __func__); @@ -6310,7 +6312,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return; + return ERR_PTR(-ENOMEM); } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,15 +6321,17 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return; + return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; if (status == 0) - status = pnfs_layout_process(lgp); + lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return; + if (status) + return ERR_PTR(status); + return lseg; } static void diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00feacd4b..3a7ac97020d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -582,7 +582,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; - struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_segment *lseg; dprintk("--> %s\n", __func__); @@ -599,16 +599,22 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp, gfp_flags); - if (!lseg) { - /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + if (IS_ERR(lseg)) { + switch (PTR_ERR(lseg)) { + case -ENOMEM: + case -ERESTARTSYS: + break; + default: + /* remember that LAYOUTGET failed and suspend trying */ + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + } + return NULL; } return lseg; @@ -1096,7 +1102,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(pnfs_update_layout); -int +struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; @@ -1129,7 +1135,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } init_lseg(lo, lseg); lseg->pls_range = res->range; - *lgp->lsegpp = get_lseg(lseg); + get_lseg(lseg); pnfs_insert_layout(lo, lseg); if (res->return_on_close) { @@ -1140,8 +1146,9 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* Done processing layoutget. Set the layout stateid */ pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); + return lseg; out: - return status; + return ERR_PTR(status); out_forget_reply: spin_unlock(&ino->i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 745aa1b39e7..d51ef888e71 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ @@ -192,7 +192,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); -int pnfs_layout_process(struct nfs4_layoutget *lgp); +struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -- cgit v1.2.3 From 49a85061b0bc9cb26361096482c81172c666c937 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 20:43:31 -0400 Subject: NFSv4.1: Cleanup add a "pnfs_" prefix to mark_matching_lsegs_invalid Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/pnfs.c | 6 +++--- fs/nfs/pnfs.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 1b5d809a105..57b8bda0f4e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -158,7 +158,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || - mark_matching_lsegs_invalid(lo, &free_me_list, + pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &args->cbl_range)) rv = NFS4ERR_DELAY; else @@ -211,7 +211,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) + if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3a7ac97020d..aea2e5256fe 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -390,7 +390,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * after call. */ int -mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range) { @@ -458,7 +458,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); } spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); @@ -651,7 +651,7 @@ _pnfs_return_layout(struct inode *ino) /* Reference matched in nfs4_layoutreturn_release */ get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); - mark_matching_lsegs_invalid(lo, &tmp_list, NULL); + pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d51ef888e71..6af518934e4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -203,7 +203,7 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, struct nfs4_state *open_state); -int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, +int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); -- cgit v1.2.3 From 70c3bd2bdf9a3c7c9282c362a4ec9ec88c713e13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 20:51:13 -0400 Subject: NFSv4.1: Cleanup; add "pnfs_" prefix to get_layout_hdr() and put_layout_hdr() Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 8 ++++---- fs/nfs/nfs4proc.c | 2 +- fs/nfs/pnfs.c | 30 +++++++++++++++--------------- fs/nfs/pnfs.h | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 57b8bda0f4e..24252fea2c9 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -122,7 +122,7 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, ino = igrab(lo->plh_inode); if (!ino) continue; - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); return lo; } } @@ -166,7 +166,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); iput(ino); return rv; } @@ -198,7 +198,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, list_for_each_entry(lo, &server->layouts, plh_layouts) { if (!igrab(lo->plh_inode)) continue; - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); BUG_ON(!list_empty(&lo->plh_bulk_recall)); list_add(&lo->plh_bulk_recall, &recall_list); } @@ -216,7 +216,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); iput(ino); } return rv; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1c8656f8745..bdacb8c21a3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6375,7 +6375,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - put_layout_hdr(lrp->args.layout); + pnfs_put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index aea2e5256fe..512c8632bf3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); /* Need to hold i_lock if caller does not already hold reference */ void -get_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { atomic_inc(&lo->plh_refcount); } @@ -221,14 +221,14 @@ destroy_layout_hdr(struct pnfs_layout_hdr *lo) } static void -put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +pnfs_put_layout_hdr_locked(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_refcount)) destroy_layout_hdr(lo); } void -put_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode = lo->plh_inode; @@ -254,8 +254,8 @@ static void free_lseg(struct pnfs_layout_segment *lseg) struct inode *ino = lseg->pls_layout->plh_inode; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by get_layout_hdr in pnfs_insert_layout */ - put_layout_hdr(NFS_I(ino)->layout); + /* Matched by pnfs_get_layout_hdr in pnfs_insert_layout */ + pnfs_put_layout_hdr(NFS_I(ino)->layout); } static void @@ -268,7 +268,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) if (list_empty(&lseg->pls_layout->plh_segs)) { set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); /* Matched by initial refcount set in alloc_init_layout_hdr */ - put_layout_hdr_locked(lseg->pls_layout); + pnfs_put_layout_hdr_locked(lseg->pls_layout); } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -404,7 +404,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, NFS_I(lo->plh_inode)->write_io = 0; NFS_I(lo->plh_inode)->read_io = 0; if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) - put_layout_hdr_locked(lo); + pnfs_put_layout_hdr_locked(lo); return 0; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) @@ -649,13 +649,13 @@ _pnfs_return_layout(struct inode *ino) } stateid = nfsi->layout->plh_stateid; /* Reference matched in nfs4_layoutreturn_release */ - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); dprintk("NFS: %s no layout segments to return\n", __func__); goto out; } @@ -672,7 +672,7 @@ _pnfs_return_layout(struct inode *ino) set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); pnfs_clear_layout_returned(lo); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); goto out; } @@ -709,7 +709,7 @@ bool pnfs_roc(struct inode *ino) if (!found) goto out_nolayout; lo->plh_block_lgets++; - get_layout_hdr(lo); /* matched in pnfs_roc_release */ + pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); return true; @@ -726,7 +726,7 @@ void pnfs_roc_release(struct inode *ino) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; lo->plh_block_lgets--; - put_layout_hdr_locked(lo); + pnfs_put_layout_hdr_locked(lo); spin_unlock(&ino->i_lock); } @@ -819,7 +819,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); out: - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); dprintk("%s:Return\n", __func__); } @@ -1058,7 +1058,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; atomic_inc(&lo->plh_outstanding); - get_layout_hdr(lo); + pnfs_get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; @@ -1091,7 +1091,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } atomic_dec(&lo->plh_outstanding); - put_layout_hdr(lo); + pnfs_put_layout_hdr(lo); out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6af518934e4..2af681f0a49 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -176,7 +176,7 @@ extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lg extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ -void get_layout_hdr(struct pnfs_layout_hdr *lo); +void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, @@ -196,7 +196,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -void put_layout_hdr(struct pnfs_layout_hdr *lo); +void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier); -- cgit v1.2.3 From 9369a431bce1e985597eda32992960c969b27c5b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 20:57:08 -0400 Subject: NFSv4.1: Cleanup; add "pnfs_" prefix to put_lseg() and get_lseg() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 16 ++++++++-------- fs/nfs/nfs4proc.c | 2 +- fs/nfs/pnfs.c | 36 ++++++++++++++++++------------------ fs/nfs/pnfs.h | 8 ++++---- 4 files changed, 31 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 53f94d915bd..77cd1151ef0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -453,7 +453,7 @@ static void filelayout_commit_release(void *calldata) struct nfs_commit_data *data = calldata; data->completion_ops->completion(data); - put_lseg(data->lseg); + pnfs_put_lseg(data->lseg); nfs_put_client(data->ds_clp); nfs_commitdata_release(data); } @@ -931,7 +931,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); if (status < 0) { - put_lseg(pgio->pg_lseg); + pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; goto out_mds; } @@ -985,7 +985,7 @@ filelayout_clear_request_commit(struct nfs_page *req, out: nfs_request_remove_commit_list(req, cinfo); spin_unlock(cinfo->lock); - put_lseg(freeme); + pnfs_put_lseg(freeme); } static struct list_head * @@ -1018,7 +1018,7 @@ filelayout_choose_commit_list(struct nfs_page *req, * off due to a rewrite, in which case it will be done in * filelayout_clear_request_commit */ - buckets[i].wlseg = get_lseg(lseg); + buckets[i].wlseg = pnfs_get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; @@ -1128,7 +1128,7 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, if (list_empty(src)) bucket->wlseg = NULL; else - get_lseg(bucket->clseg); + pnfs_get_lseg(bucket->clseg); } return ret; } @@ -1159,12 +1159,12 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, /* NOTE cinfo->lock is NOT held, relying on fact that this is * only called on single thread per dreq. - * Can't take the lock because need to do put_lseg + * Can't take the lock because need to do pnfs_put_lseg */ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (transfer_commit_list(&b->written, dst, cinfo, 0)) { BUG_ON(!list_empty(&b->written)); - put_lseg(b->wlseg); + pnfs_put_lseg(b->wlseg); b->wlseg = NULL; } } @@ -1200,7 +1200,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (list_empty(&bucket->committing)) continue; nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - put_lseg(bucket->clseg); + pnfs_put_lseg(bucket->clseg); bucket->clseg = NULL; } /* Caller will clean up entries put on list */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bdacb8c21a3..e605d417a00 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6549,7 +6549,7 @@ static void nfs4_layoutcommit_release(void *calldata) list_del_init(&lseg->pls_lc_list); if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) - put_lseg(lseg); + pnfs_put_lseg(lseg); } clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 512c8632bf3..498af877995 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -259,7 +259,7 @@ static void free_lseg(struct pnfs_layout_segment *lseg) } static void -put_lseg_common(struct pnfs_layout_segment *lseg) +pnfs_put_lseg_common(struct pnfs_layout_segment *lseg) { struct inode *inode = lseg->pls_layout->plh_inode; @@ -274,7 +274,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) } void -put_lseg(struct pnfs_layout_segment *lseg) +pnfs_put_lseg(struct pnfs_layout_segment *lseg) { struct inode *inode; @@ -288,13 +288,13 @@ put_lseg(struct pnfs_layout_segment *lseg) if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { LIST_HEAD(free_me); - put_lseg_common(lseg); + pnfs_put_lseg_common(lseg); list_add(&lseg->pls_list, &free_me); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&free_me); } } -EXPORT_SYMBOL_GPL(put_lseg); +EXPORT_SYMBOL_GPL(pnfs_put_lseg); static inline u64 end_offset(u64 start, u64 len) @@ -378,7 +378,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); if (atomic_dec_and_test(&lseg->pls_refcount)) { - put_lseg_common(lseg); + pnfs_put_lseg_common(lseg); list_add(&lseg->pls_list, tmp_list); rv = 1; } @@ -914,7 +914,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && is_matching_lseg(&lseg->pls_range, range)) { - ret = get_lseg(lseg); + ret = pnfs_get_lseg(lseg); break; } if (lseg->pls_range.offset > range->offset) @@ -1135,7 +1135,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } init_lseg(lo, lseg); lseg->pls_range = res->range; - get_lseg(lseg); + pnfs_get_lseg(lseg); pnfs_insert_layout(lo, lseg); if (res->return_on_close) { @@ -1369,12 +1369,12 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_write_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_writehdr_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1389,17 +1389,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); @@ -1524,12 +1524,12 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_read_through_mds(desc, data); } - put_lseg(lseg); + pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { - put_lseg(hdr->lseg); + pnfs_put_lseg(hdr->lseg); nfs_readhdr_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1545,17 +1545,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); - hdr->lseg = get_lseg(desc->pg_lseg); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); ret = nfs_generic_pagein(desc, hdr); if (ret != 0) { - put_lseg(desc->pg_lseg); + pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); @@ -1608,7 +1608,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) } if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - get_lseg(hdr->lseg); + pnfs_get_lseg(hdr->lseg); } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2af681f0a49..04958797fad 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -177,7 +177,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); -void put_lseg(struct pnfs_layout_segment *lseg); +void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); @@ -281,7 +281,7 @@ static inline int lo_fail_bit(u32 iomode) } static inline struct pnfs_layout_segment * -get_lseg(struct pnfs_layout_segment *lseg) +pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { atomic_inc(&lseg->pls_refcount); @@ -406,12 +406,12 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) } static inline struct pnfs_layout_segment * -get_lseg(struct pnfs_layout_segment *lseg) +pnfs_get_lseg(struct pnfs_layout_segment *lseg) { return NULL; } -static inline void put_lseg(struct pnfs_layout_segment *lseg) +static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } -- cgit v1.2.3 From 78e4e05c643768af170e5a4b21712d9a7a26cce5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 21:02:29 -0400 Subject: NFSv4.1: Replace get_device_info() with filelayout_get_device_info() Fix the namespace pollution issue. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/nfs4filelayout.c | 2 +- fs/nfs/nfs4filelayout.h | 2 +- fs/nfs/nfs4filelayoutdev.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dd392ed5f2e..329bfbfed37 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -874,7 +874,7 @@ static void free_blk_mountid(struct block_mount_id *mid) } } -/* This is mostly copied from the filelayout's get_device_info function. +/* This is mostly copied from the filelayout_get_device_info function. * It seems much of this should be at the generic pnfs level. */ static struct pnfs_block_dev * diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 77cd1151ef0..af6ee4ad3f1 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -608,7 +608,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, NFS_SERVER(lo->plh_inode)->nfs_client, id); if (d == NULL) { - dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); + dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; } else diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 43fe802dd67..11053c425a6 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -158,7 +158,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); void nfs4_ds_disconnect(struct nfs_client *clp); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index f81231f30d9..b85a29df20a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -690,7 +690,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; -- cgit v1.2.3 From f86bbcf85db32596a0484477d1b8042005709049 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Sep 2012 11:21:40 -0400 Subject: NFSv4.1: Replace dprintk() in pnfs_update_layout with something less buggy Dereferencing nfsi->layout in order to read plh_flags without holding a spin lock is bug prone. Furthermore, the dprintk() tells you nothing about whether or not the call succeeded. Replace it with something that tells you about whether or not a valid layout segment was returned for the inode in question. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 498af877995..df45acaf91f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1027,17 +1027,15 @@ pnfs_update_layout(struct inode *ino, bool first = false; if (!pnfs_enabled_sb(NFS_SERVER(ino))) - return NULL; + goto out; if (pnfs_within_mdsthreshold(ctx, ino, iomode)) - return NULL; + goto out; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); - if (lo == NULL) { - dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); + if (lo == NULL) goto out_unlock; - } /* Do we even need to bother with this? */ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { @@ -1093,8 +1091,14 @@ pnfs_update_layout(struct inode *ino, atomic_dec(&lo->plh_outstanding); pnfs_put_layout_hdr(lo); out: - dprintk("%s end, state 0x%lx lseg %p\n", __func__, - nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); + dprintk("%s: inode %s/%llu pNFS layout segment %s for " + "(%s, offset: %llu, length: %llu)\n", + __func__, ino->i_sb->s_id, + (unsigned long long)NFS_FILEID(ino), + lseg == NULL ? "not found" : "found", + iomode==IOMODE_RW ? "read/write" : "read-only", + (unsigned long long)pos, + (unsigned long long)count); return lseg; out_unlock: spin_unlock(&ino->i_lock); -- cgit v1.2.3 From b9e028fd89d6834558aa2a5bb30e5cff5c6c1059 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 16:41:18 -0400 Subject: NFSv4.1: Add helpers for setting/reading the I/O fail bit ...and make them local to the pnfs.c file. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 38 ++++++++++++++++++++++++++------------ fs/nfs/pnfs.h | 6 ------ 2 files changed, 26 insertions(+), 18 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index df45acaf91f..f46f9bc4f76 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -238,6 +238,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static int +pnfs_iomode_to_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; +} + +static void +pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ + set_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags); + dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, + iomode == IOMODE_RW ? "RW" : "READ"); +} + +static bool +pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ + return test_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags) != 0; +} + static void init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { @@ -612,7 +633,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, break; default: /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + pnfs_layout_io_set_failed(lo, range->iomode); } return NULL; } @@ -669,8 +690,8 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; - set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); - set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); + pnfs_layout_io_set_failed(lo, IOMODE_RW); + pnfs_layout_io_set_failed(lo, IOMODE_READ); pnfs_clear_layout_returned(lo); pnfs_put_layout_hdr(lo); goto out; @@ -1019,7 +1040,6 @@ pnfs_update_layout(struct inode *ino, .length = count, }; unsigned pg_offset; - struct nfs_inode *nfsi = NFS_I(ino); struct nfs_server *server = NFS_SERVER(ino); struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; @@ -1044,7 +1064,7 @@ pnfs_update_layout(struct inode *ino, } /* if LAYOUTGET already failed once we don't try again */ - if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) + if (pnfs_layout_io_test_failed(lo, iomode)) goto out_unlock; /* Check to see if the layout for the given range already exists */ @@ -1585,13 +1605,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { - if (lseg->pls_range.iomode == IOMODE_RW) { - dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); - } else { - dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); - set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); - } + pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); } EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 04958797fad..e3eb7d1b17a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -274,12 +274,6 @@ pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); } -static inline int lo_fail_bit(u32 iomode) -{ - return iomode == IOMODE_RW ? - NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; -} - static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From 25c7533357a4c4a9311d40cc92e9648c8a7e763e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 17:01:12 -0400 Subject: NFSv4.1: Retry pNFS after a 2 minute timeout If we had to fall back to read/write through MDS, then assume that we should retry pNFS after a suitable timeout period. The following patch sets a timeout of 2 minutes. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 15 ++++++++++++++- fs/nfs/pnfs.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f46f9bc4f76..2c59da5511d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PNFS +#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) /* Locking: * @@ -248,6 +249,7 @@ pnfs_iomode_to_fail_bit(u32 iomode) static void pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) { + lo->plh_retry_timestamp = jiffies; set_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags); dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, iomode == IOMODE_RW ? "RW" : "READ"); @@ -256,7 +258,18 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) static bool pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) { - return test_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags) != 0; + unsigned long start, end; + if (test_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags) == 0) + return false; + end = jiffies; + start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; + if (!time_in_range(lo->plh_retry_timestamp, start, end)) { + /* It is time to retry the failed layoutgets */ + clear_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); + return false; + } + return true; } static void diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e3eb7d1b17a..bc8e5001203 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -140,6 +140,7 @@ struct pnfs_layout_hdr { atomic_t plh_outstanding; /* number of RPCs out */ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ u32 plh_barrier; /* ignore lower seqids */ + unsigned long plh_retry_timestamp; unsigned long plh_flags; loff_t plh_lwb; /* last write byte for layoutcommit */ struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ -- cgit v1.2.3 From 1dfed2737d8cfe2f2378fddfb3bed126ff5474e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2012 19:51:12 -0400 Subject: NFSv4.1: pNFS data servers may be temporarily offline In cases where the pNFS data server is just temporarily out of service, we want to mark it as such, and then try again later. Typically that will be in cases of network connection errors etc. This patch allows us to mark the devices as being "unavailable" for such transient errors, and will make them available for retries after a 2 minute timeout period. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 22 +++++++++++++++++++--- fs/nfs/nfs4filelayout.h | 8 ++------ fs/nfs/nfs4filelayoutdev.c | 15 +++++++-------- fs/nfs/pnfs.h | 4 ++++ fs/nfs/pnfs_dev.c | 27 +++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index af6ee4ad3f1..dac2162c3ac 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -205,7 +205,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - filelayout_mark_devid_invalid(devid); + nfs4_mark_deviceid_unavailable(devid); clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); @@ -269,6 +269,22 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } +bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node) +{ + return filelayout_test_devid_invalid(node) || + nfs4_test_deviceid_unavailable(node); +} + +static bool +filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) +{ + struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); + + return filelayout_test_layout_invalid(lseg->pls_layout) || + filelayout_test_devid_unavailable(node); +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -613,8 +629,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is being reaped */ - if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; fl->dsaddr = dsaddr; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 11053c425a6..10b0f134400 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -140,12 +140,8 @@ filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) return test_bit(NFS_DEVICEID_INVALID, &node->flags); } -static inline bool -filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) -{ - return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) || - filelayout_test_layout_invalid(lseg->pls_layout); -} +extern bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b85a29df20a..3336d5eaf87 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -804,13 +804,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - if (filelayout_test_devid_invalid(devid)) + if (filelayout_test_devid_unavailable(devid)) return NULL; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); - goto mark_dev_invalid; + filelayout_mark_devid_invalid(devid); + return NULL; } if (!ds->ds_clp) { @@ -818,14 +819,12 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) int err; err = nfs4_ds_connect(s, ds); - if (err) - goto mark_dev_invalid; + if (err) { + nfs4_mark_deviceid_unavailable(devid); + return NULL; + } } return ds; - -mark_dev_invalid: - filelayout_mark_devid_invalid(devid); - return NULL; } module_param(dataserver_retrans, uint, 0644); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bc8e5001203..9735031e1e1 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -234,6 +234,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ + NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ }; /* pnfs_dev.c */ @@ -243,6 +244,7 @@ struct nfs4_deviceid_node { const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; unsigned long flags; + unsigned long timestamp_unavailable; struct nfs4_deviceid deviceid; atomic_t ref; }; @@ -255,6 +257,8 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); +void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); +bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); static inline void diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 73f701f1f4d..d35b62e83ea 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -40,6 +40,8 @@ #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) +#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) + static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static DEFINE_SPINLOCK(nfs4_deviceid_lock); @@ -218,6 +220,30 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) } EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); +void +nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node) +{ + node->timestamp_unavailable = jiffies; + set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); +} +EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable); + +bool +nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node) +{ + if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) { + unsigned long start, end; + + end = jiffies; + start = end - PNFS_DEVICE_RETRY_TIMEOUT; + if (time_in_range(node->timestamp_unavailable, start, end)) + return true; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } + return false; +} +EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable); + static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { @@ -276,3 +302,4 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) } rcu_read_unlock(); } + -- cgit v1.2.3 From 830ffb565760234eb984e4343ad82575e96728de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 21:25:19 -0400 Subject: NFSv4.1: Fix a reference leak in pnfs_update_layout If we exit after the call to pnfs_find_alloc_layout(), we have to ensure that we put the struct pnfs_layout_hdr. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2c59da5511d..d7a8f03e729 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1067,8 +1067,10 @@ pnfs_update_layout(struct inode *ino, spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); - if (lo == NULL) - goto out_unlock; + if (lo == NULL) { + spin_unlock(&ino->i_lock); + goto out; + } /* Do we even need to bother with this? */ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { @@ -1122,6 +1124,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } atomic_dec(&lo->plh_outstanding); +out_put_layout_hdr: pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " @@ -1135,7 +1138,7 @@ out: return lseg; out_unlock: spin_unlock(&ino->i_lock); - goto out; + goto out_put_layout_hdr; } EXPORT_SYMBOL_GPL(pnfs_update_layout); -- cgit v1.2.3 From 3e6212149304eaf9289d5bc56e003068660f3476 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Sep 2012 13:07:16 -0400 Subject: NFSv4.1: Don't drop the pnfs_layout_hdr after a layoutget failure We want to cache the pnfs_layout_hdr after a layoutget or i/o failure so that pnfs_update_layout() can find it and know when it is time to retry. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d7a8f03e729..6834fa1be57 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -247,10 +247,28 @@ pnfs_iomode_to_fail_bit(u32 iomode) } static void -pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) +pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; - set_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags); + if (test_and_set_bit(fail_bit, &lo->plh_flags)) + atomic_inc(&lo->plh_refcount); +} + +static void +pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) +{ + if (test_and_clear_bit(fail_bit, &lo->plh_flags)) + atomic_dec(&lo->plh_refcount); +} + +static void +pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) +{ + struct inode *inode = lo->plh_inode; + + spin_lock(&inode->i_lock); + pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + spin_unlock(&inode->i_lock); dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, iomode == IOMODE_RW ? "RW" : "READ"); } @@ -259,14 +277,15 @@ static bool pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) { unsigned long start, end; - if (test_bit(pnfs_iomode_to_fail_bit(iomode), &lo->plh_flags) == 0) + int fail_bit = pnfs_iomode_to_fail_bit(iomode); + + if (test_bit(fail_bit, &lo->plh_flags) == 0) return false; end = jiffies; start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; if (!time_in_range(lo->plh_retry_timestamp, start, end)) { /* It is time to retry the failed layoutgets */ - clear_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); - clear_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); + pnfs_layout_clear_fail_bit(lo, fail_bit); return false; } return true; @@ -493,9 +512,14 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); - } - spin_unlock(&nfsi->vfs_inode.i_lock); - pnfs_free_lseg_list(&tmp_list); + pnfs_get_layout_hdr(lo); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); + pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); + spin_unlock(&nfsi->vfs_inode.i_lock); + pnfs_free_lseg_list(&tmp_list); + pnfs_put_layout_hdr(lo); + } else + spin_unlock(&nfsi->vfs_inode.i_lock); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); -- cgit v1.2.3 From 115ce575cb10918514d053ef15f597a4e6ff60e9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 21:19:43 -0400 Subject: NFSv4.1: pnfs_layout_io_set_failed must clear invalid lsegs If pnfs_layout_io_test_failed() authorises a retry of the failed layoutgets, we should clear the existing layout segments so that we start afresh. Do this in pnfs_layout_io_set_failed(). Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6834fa1be57..9ee3bd705b9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -265,10 +265,18 @@ static void pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) { struct inode *inode = lo->plh_inode; + struct pnfs_layout_range range = { + .iomode = iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(head); spin_lock(&inode->i_lock); pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); + pnfs_mark_matching_lsegs_invalid(lo, &head, &range); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&head); dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, iomode == IOMODE_RW ? "RW" : "READ"); } -- cgit v1.2.3 From 7fdab069b7172f2348cf3d87e19c6c24340292bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:15:57 -0400 Subject: NFSv4.1: Fix a race in the pNFS return-on-close code If we sleep after dropping the inode->i_lock, then we are no longer atomic with respect to the rpc_wake_up() call in pnfs_layout_remove_lseg(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++----- fs/nfs/pnfs.c | 22 ++++++++++++---------- fs/nfs/pnfs.h | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e605d417a00..6d5750cabd8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2137,6 +2137,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; + struct inode *inode = calldata->inode; int call_close = 0; dprintk("%s: begin!\n", __func__); @@ -2170,16 +2171,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && - pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { - rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, - task, NULL); + pnfs_roc_drain(inode, &calldata->roc_barrier, task)) goto out; - } } nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; - if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), + if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, &calldata->res.seq_res, task)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9ee3bd705b9..8b32f874533 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -807,27 +807,29 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) spin_unlock(&ino->i_lock); } -bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) { struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + u32 current_seqid; bool found = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); found = true; - break; + goto out; } - if (!found) { - struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); + lo = nfsi->layout; + current_seqid = be32_to_cpu(lo->plh_stateid.seqid); - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - } + /* Since close does not return a layout stateid for use as + * a barrier, we choose the worst-case barrier. + */ + *barrier = current_seqid + atomic_read(&lo->plh_outstanding); +out: spin_unlock(&ino->i_lock); return found; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9735031e1e1..aa9fa1b1ff4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -210,7 +210,7 @@ int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -bool pnfs_roc_drain(struct inode *ino, u32 *barrier); +bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); @@ -442,7 +442,7 @@ pnfs_roc_set_barrier(struct inode *ino, u32 barrier) } static inline bool -pnfs_roc_drain(struct inode *ino, u32 *barrier) +pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) { return false; } -- cgit v1.2.3 From 1f7977c1368afc483908281daaffd31bca5a8d1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:31:51 -0400 Subject: NFSv4.1: Simplify the pNFS return-on-close code Confine it to the nfs4_do_close() code. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 6 ++---- fs/nfs/nfs4state.c | 7 ++----- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 71d407fd00a..9cacc131a8a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -223,7 +223,7 @@ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); +extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d5750cabd8..8de0435caed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2204,7 +2204,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) +int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -2240,7 +2240,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = roc; + calldata->roc = pnfs_roc(state->inode); nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -2257,8 +2257,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) out_free_calldata: kfree(calldata); out: - if (roc) - pnfs_roc_release(state->inode); nfs4_put_open_state(state); nfs4_put_state_owner(sp); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fc6cfe68ad1..a5331ec094a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -729,11 +729,8 @@ static void __nfs4_close(struct nfs4_state *state, if (!call_close) { nfs4_put_open_state(state); nfs4_put_state_owner(owner); - } else { - bool roc = pnfs_roc(state->inode); - - nfs4_do_close(state, gfp_mask, wait, roc); - } + } else + nfs4_do_close(state, gfp_mask, wait); } void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) -- cgit v1.2.3 From 965938b83b19aeffdc1d16ce9947c8c127e8f59b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 15:07:45 -0400 Subject: NFSv4.1: Get rid of pNFS layout state "NFS_LAYOUT_INVALID" In all cases where we set NFS_LAYOUT_INVALID, we also set NFS_LAYOUT_DESTROYED. Furthermore, in all cases where we test for NFS_LAYOUT_INVALID, we should also be testing for NFS_LAYOUT_DESTROYED, since the latter means that we hold no valid layout segments. Ergo the two are redundant. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 4 +--- fs/nfs/nfs4filelayout.h | 6 ------ fs/nfs/pnfs.h | 7 ++++++- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index dac2162c3ac..6cce57e7fe5 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -190,8 +190,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, * i/o and all i/o waiting on the slot table to the MDS until * layout is destroyed and a new valid layout is obtained. */ - set_bit(NFS_LAYOUT_INVALID, - &NFS_I(inode)->layout->plh_flags); pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; @@ -281,7 +279,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) { struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); - return filelayout_test_layout_invalid(lseg->pls_layout) || + return pnfs_test_layout_destroyed(lseg->pls_layout) || filelayout_test_devid_unavailable(node); } diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 10b0f134400..dca47d78671 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -128,12 +128,6 @@ filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) set_bit(NFS_DEVICEID_INVALID, &node->flags); } -static inline bool -filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo) -{ - return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags); -} - static inline bool filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aa9fa1b1ff4..aacda7fbb53 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -63,7 +63,6 @@ enum { NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ - NFS_LAYOUT_INVALID, /* layout is being destroyed */ NFS_LAYOUT_RETURNED, /* layout has already been returned */ }; @@ -279,6 +278,12 @@ pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); } +static inline bool +pnfs_test_layout_destroyed(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From bb346f63976823c2959b0c5917928f12cbf96e4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 15:52:13 -0400 Subject: NFSv4.1: reset the inode MDS threshold counters on layout destruction Instead of resetting the inode MDS threshold counters when we mark the layout for destruction, do it as part of freeing the layout. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8b32f874533..ac94fb86fd1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -215,9 +215,13 @@ pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) static void destroy_layout_hdr(struct pnfs_layout_hdr *lo) { + struct nfs_inode *nfsi = NFS_I(lo->plh_inode); dprintk("%s: freeing layout cache %p\n", __func__, lo); BUG_ON(!list_empty(&lo->plh_layouts)); - NFS_I(lo->plh_inode)->layout = NULL; + nfsi->layout = NULL; + /* Reset MDS Threshold I/O counters */ + nfsi->write_io = 0; + nfsi->read_io = 0; pnfs_free_layout_hdr(lo); } @@ -461,9 +465,6 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); if (list_empty(&lo->plh_segs)) { - /* Reset MDS Threshold I/O counters */ - NFS_I(lo->plh_inode)->write_io = 0; - NFS_I(lo->plh_inode)->read_io = 0; if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) pnfs_put_layout_hdr_locked(lo); return 0; -- cgit v1.2.3 From 57036a377600ec0900b13f29814aa19072ad3e52 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 16:33:30 -0400 Subject: NFSv4.1: Rename the pnfs_put_lseg_common to pnfs_layout_remove_lseg The latter name is more descriptive of the actual function. Also rename pnfs_insert_layout to pnfs_layout_insert_lseg. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ac94fb86fd1..33273b3a330 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -319,21 +319,22 @@ static void free_lseg(struct pnfs_layout_segment *lseg) struct inode *ino = lseg->pls_layout->plh_inode; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by pnfs_get_layout_hdr in pnfs_insert_layout */ + /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ pnfs_put_layout_hdr(NFS_I(ino)->layout); } static void -pnfs_put_lseg_common(struct pnfs_layout_segment *lseg) +pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct inode *inode = lo->plh_inode; WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); - if (list_empty(&lseg->pls_layout->plh_segs)) { - set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); + if (list_empty(&lo->plh_segs)) { + set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); /* Matched by initial refcount set in alloc_init_layout_hdr */ - pnfs_put_layout_hdr_locked(lseg->pls_layout); + pnfs_put_layout_hdr_locked(lo); } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -341,6 +342,7 @@ pnfs_put_lseg_common(struct pnfs_layout_segment *lseg) void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { + struct pnfs_layout_hdr *lo; struct inode *inode; if (!lseg) @@ -349,13 +351,14 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - inode = lseg->pls_layout->plh_inode; + lo = lseg->pls_layout; + inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { LIST_HEAD(free_me); - pnfs_put_lseg_common(lseg); - list_add(&lseg->pls_list, &free_me); + pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); + list_add(&lseg->pls_list, &free_me); pnfs_free_lseg_list(&free_me); } } @@ -443,7 +446,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); if (atomic_dec_and_test(&lseg->pls_refcount)) { - pnfs_put_lseg_common(lseg); + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); rv = 1; } @@ -861,7 +864,7 @@ cmp_layout(struct pnfs_layout_range *l1, } static void -pnfs_insert_layout(struct pnfs_layout_hdr *lo, +pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; @@ -1211,7 +1214,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) init_lseg(lo, lseg); lseg->pls_range = res->range; pnfs_get_lseg(lseg); - pnfs_insert_layout(lo, lseg); + pnfs_layout_insert_lseg(lo, lseg); if (res->return_on_close) { set_bit(NFS_LSEG_ROC, &lseg->pls_flags); -- cgit v1.2.3 From 01d39ce82b565961abaf1930f54ccf7b32c96b15 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 17:02:32 -0400 Subject: NFSv4.1: Remove redundant reference to the pnfs_layout_hdr Each layout segment already holds a reference to the pnfs_layout_hdr, so there is no need to hold an extra reference that is released once the last layout segment is freed. Ensure that pnfs_find_alloc_layout() always returns a reference to the pnfs_layout_hdr, which will be matched by the final call to pnfs_put_layout_hdr() in pnfs_update_layout(). Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 33273b3a330..7ac5be36f13 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -331,11 +331,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs)) set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); - /* Matched by initial refcount set in alloc_init_layout_hdr */ - pnfs_put_layout_hdr_locked(lo); - } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -468,8 +465,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); if (list_empty(&lo->plh_segs)) { - if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) - pnfs_put_layout_hdr_locked(lo); + set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); return 0; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) @@ -929,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino, if (nfsi->layout) { if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) return NULL; - else - return nfsi->layout; + pnfs_get_layout_hdr(nfsi->layout); + return nfsi->layout; } spin_unlock(&ino->i_lock); new = alloc_init_layout_hdr(ino, ctx, gfp_flags); @@ -1129,7 +1125,6 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; atomic_inc(&lo->plh_outstanding); - pnfs_get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) first = true; -- cgit v1.2.3 From 6622c3ea059b2fed49924b74db41d1e0f065fbd3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 17:23:11 -0400 Subject: NFSv4.1: Free the pnfs_layout_hdr outside the inode->i_lock None of the existing pNFS layout drivers seem to require the inode to be locked while they free the layout header. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7ac5be36f13..08663146f5f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -213,7 +213,7 @@ pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) } static void -destroy_layout_hdr(struct pnfs_layout_hdr *lo) +pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) { struct nfs_inode *nfsi = NFS_I(lo->plh_inode); dprintk("%s: freeing layout cache %p\n", __func__, lo); @@ -222,14 +222,6 @@ destroy_layout_hdr(struct pnfs_layout_hdr *lo) /* Reset MDS Threshold I/O counters */ nfsi->write_io = 0; nfsi->read_io = 0; - pnfs_free_layout_hdr(lo); -} - -static void -pnfs_put_layout_hdr_locked(struct pnfs_layout_hdr *lo) -{ - if (atomic_dec_and_test(&lo->plh_refcount)) - destroy_layout_hdr(lo); } void @@ -238,8 +230,9 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) struct inode *inode = lo->plh_inode; if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { - destroy_layout_hdr(lo); + pnfs_detach_layout_hdr(lo); spin_unlock(&inode->i_lock); + pnfs_free_layout_hdr(lo); } } @@ -792,8 +785,12 @@ void pnfs_roc_release(struct inode *ino) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; lo->plh_block_lgets--; - pnfs_put_layout_hdr_locked(lo); - spin_unlock(&ino->i_lock); + if (atomic_dec_and_test(&lo->plh_refcount)) { + pnfs_detach_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_layout_hdr(lo); + } else + spin_unlock(&ino->i_lock); } void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) -- cgit v1.2.3 From 9c6263819f25254f2ed48f076b50096dd5893dfb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 17:31:43 -0400 Subject: NFSv4.1: Clean up the removal of pnfs_layout_hdr from the server list Move the code into pnfs_free_layout_hdr(), and add checks to get_layout_by_fh_locked to ensure that they don't reference a layout that is being freed. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 19 ++++++++++++++++++- fs/nfs/pnfs.c | 29 ++++++++++------------------- 2 files changed, 28 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 24252fea2c9..76b4a7a3e55 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -122,7 +122,15 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, ino = igrab(lo->plh_inode); if (!ino) continue; + spin_lock(&ino->i_lock); + /* Is this layout in the process of being freed? */ + if (NFS_I(ino)->layout != lo) { + spin_unlock(&ino->i_lock); + iput(ino); + continue; + } pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); return lo; } } @@ -196,9 +204,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, continue; list_for_each_entry(lo, &server->layouts, plh_layouts) { - if (!igrab(lo->plh_inode)) + ino = igrab(lo->plh_inode); + if (ino) + continue; + spin_lock(&ino->i_lock); + /* Is this layout in the process of being freed? */ + if (NFS_I(ino)->layout != lo) { + spin_unlock(&ino->i_lock); + iput(ino); continue; + } pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); BUG_ON(!list_empty(&lo->plh_bulk_recall)); list_add(&lo->plh_bulk_recall, &recall_list); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 08663146f5f..11cc0ad6b40 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -207,7 +207,16 @@ pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) static void pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(lo->plh_inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (!list_empty(&lo->plh_layouts)) { + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } put_rpccred(lo->plh_lc_cred); return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); } @@ -217,7 +226,6 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) { struct nfs_inode *nfsi = NFS_I(lo->plh_inode); dprintk("%s: freeing layout cache %p\n", __func__, lo); - BUG_ON(!list_empty(&lo->plh_layouts)); nfsi->layout = NULL; /* Reset MDS Threshold I/O counters */ nfsi->write_io = 0; @@ -480,22 +488,10 @@ void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; - struct pnfs_layout_hdr *lo; if (list_empty(free_me)) return; - lo = list_first_entry(free_me, struct pnfs_layout_segment, - pls_list)->pls_layout; - - if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { - struct nfs_client *clp; - - clp = NFS_SERVER(lo->plh_inode)->nfs_client; - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); free_lseg(lseg); @@ -1148,11 +1144,6 @@ pnfs_update_layout(struct inode *ino, arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); - if (!lseg && first) { - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: pnfs_put_layout_hdr(lo); -- cgit v1.2.3 From 905ca191cfe1ab18822d86e3ddef1b1b38832fdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:46:49 -0400 Subject: NFSv4.1: Clean up pnfs_put_lseg() There is no longer a need to use pnfs_free_lseg_list(). Just call pnfs_free_lseg() directly. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 11cc0ad6b40..c34ba9a0a46 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -315,7 +315,7 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) lseg->pls_layout = lo; } -static void free_lseg(struct pnfs_layout_segment *lseg) +static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { struct inode *ino = lseg->pls_layout->plh_inode; @@ -352,12 +352,9 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - LIST_HEAD(free_me); - pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); - list_add(&lseg->pls_list, &free_me); - pnfs_free_lseg_list(&free_me); + pnfs_free_lseg(lseg); } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); @@ -494,7 +491,7 @@ pnfs_free_lseg_list(struct list_head *free_me) list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); - free_lseg(lseg); + pnfs_free_lseg(lseg); } } -- cgit v1.2.3 From 8f0d27dc5d77b084b2e2fe6d883c4d5776287842 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:57:11 -0400 Subject: NFSv4.1: Balance pnfs_layout_hdr refcount in pnfs_layout_(insert|remove)_lseg Ensure that the reference count for pnfs_layout_hdr reverts to the original value after a call to pnfs_layout_remove_lseg(). Note that the caller is expected to hold a reference to the struct pnfs_layout_hdr. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c34ba9a0a46..bdd93b96905 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -320,8 +320,6 @@ static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) struct inode *ino = lseg->pls_layout->plh_inode; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ - pnfs_put_layout_hdr(NFS_I(ino)->layout); } static void @@ -332,6 +330,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); + /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ + atomic_dec(&lo->plh_refcount); if (list_empty(&lo->plh_segs)) set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); @@ -352,9 +352,11 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -- cgit v1.2.3 From a9136d4914f61110ca9897ec65ab620075c50298 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:01:56 -0400 Subject: NFSv4.1: Get rid of pNFS spin lock debugging asserts... These are all in static declared functions that are called only once. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bdd93b96905..edc8288fd3d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -859,7 +859,6 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; @@ -913,7 +912,6 @@ pnfs_find_alloc_layout(struct inode *ino, dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); - assert_spin_locked(&ino->i_lock); if (nfsi->layout) { if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) return NULL; @@ -970,7 +968,6 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && is_matching_lseg(&lseg->pls_range, range)) { -- cgit v1.2.3 From 579342785f7069d32e9637ef30d59c4256dcec3d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Sep 2012 20:37:23 -0400 Subject: NFSv4.1: Remove unused 'default allocation' for pnfs_alloc_layout_hdr() ...and ditto for pnfs_free_layout_hdr() Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index edc8288fd3d..fcc72ecf2fd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -200,8 +200,7 @@ static struct pnfs_layout_hdr * pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; - return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : - kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + return ld->alloc_layout_hdr(ino, gfp_flags); } static void @@ -218,7 +217,7 @@ pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) spin_unlock(&clp->cl_lock); } put_rpccred(lo->plh_lc_cred); - return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); + return ld->free_layout_hdr(lo); } static void -- cgit v1.2.3 From 8006bfba36d42b6976ed92979f51e5f9bef2625c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 21 Sep 2012 14:48:04 -0400 Subject: NFSv4.1: Get rid of the NFS_LAYOUT_DESTROYED state We already have a mechanism for blocking LAYOUTGET by means of the plh_block_lgets counter. The only "service" that NFS_LAYOUT_DESTROYED provides at this point is to block layoutget once the layout segment list is empty, which basically means that you have to wait until the pnfs_layout_hdr is destroyed before you can do pNFS on that file again. This patch enables the reuse of the pnfs_layout_hdr if the layout segment list is empty. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 3 +-- fs/nfs/pnfs.c | 9 +-------- fs/nfs/pnfs.h | 7 ------- 3 files changed, 2 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 6cce57e7fe5..52d84721206 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -279,8 +279,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) { struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); - return pnfs_test_layout_destroyed(lseg->pls_layout) || - filelayout_test_devid_unavailable(node); + return filelayout_test_devid_unavailable(node); } /* diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fcc72ecf2fd..bda88a27507 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -331,8 +331,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) - set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } @@ -463,10 +461,8 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); - if (list_empty(&lo->plh_segs)) { - set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); + if (list_empty(&lo->plh_segs)) return 0; - } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (!recall_range || should_free_lseg(&lseg->pls_range, recall_range)) { @@ -590,7 +586,6 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) return true; return lo->plh_block_lgets || - test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); @@ -912,8 +907,6 @@ pnfs_find_alloc_layout(struct inode *ino, dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); if (nfsi->layout) { - if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) - return NULL; pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aacda7fbb53..92f6ce6532b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -62,7 +62,6 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ - NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ NFS_LAYOUT_RETURNED, /* layout has already been returned */ }; @@ -278,12 +277,6 @@ pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); } -static inline bool -pnfs_test_layout_destroyed(struct pnfs_layout_hdr *lo) -{ - return test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags); -} - static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From 173f77e9c5cbddb02eebe17dd9c48d39e5eb86b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 21 Sep 2012 15:49:42 -0400 Subject: NFSv4.1: Clear NFS_LAYOUT_BULK_RECALL when the layout segments are freed Once all the affected layout segments have been freed up, clear the NFS_LAYOUT_BULK_RECALL flag so that we can reuse the pnfs_layout_hdr Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bda88a27507..174c51a5001 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -331,6 +331,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); + if (list_empty(&lo->plh_segs)) + clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } -- cgit v1.2.3 From e5929f3cff05e84f20c68df235f4768920e2e89e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 21 Sep 2012 16:37:02 -0400 Subject: NFSv4.1: Remove the NFS_LAYOUT_RETURNED state It serves no purpose that the test for whether or not we have valid layout segments doesn't already serve. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 7 +------ fs/nfs/pnfs.h | 19 ------------------- 2 files changed, 1 insertion(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 174c51a5001..20a1b6222ff 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -692,7 +692,7 @@ _pnfs_return_layout(struct inode *ino) spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || pnfs_test_layout_returned(lo)) { + if (!lo) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout to return\n", __func__); goto out; @@ -710,7 +710,6 @@ _pnfs_return_layout(struct inode *ino) goto out; } lo->plh_block_lgets++; - pnfs_mark_layout_returned(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); @@ -721,7 +720,6 @@ _pnfs_return_layout(struct inode *ino) status = -ENOMEM; pnfs_layout_io_set_failed(lo, IOMODE_RW); pnfs_layout_io_set_failed(lo, IOMODE_READ); - pnfs_clear_layout_returned(lo); pnfs_put_layout_hdr(lo); goto out; } @@ -1111,9 +1109,6 @@ pnfs_update_layout(struct inode *ino, if (list_empty(&lo->plh_segs)) first = true; - /* Enable LAYOUTRETURNs */ - pnfs_clear_layout_returned(lo); - spin_unlock(&ino->i_lock); if (first) { /* The lo must be on the clp list if there is any diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 92f6ce6532b..6cede2c6c96 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -62,7 +62,6 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ - NFS_LAYOUT_RETURNED, /* layout has already been returned */ }; enum layoutdriver_policy_flags { @@ -259,24 +258,6 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); -static inline void -pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo) -{ - set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); -} - -static inline void -pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo) -{ - clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); -} - -static inline bool -pnfs_test_layout_returned(struct pnfs_layout_hdr *lo) -{ - return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags); -} - static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From 65857d5768f7716da539933c2075d384b117812d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Sep 2012 13:49:27 -0400 Subject: NFSv4.1: _pnfs_return_layout() shouldn't invalidate the layout on failure Failure of the layoutreturn allocation fails is not a good reason to mark the pnfs_layout_hdr as having failed a layoutget or i/o. Just exit cleanly. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 20a1b6222ff..d737557747b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -718,8 +718,9 @@ _pnfs_return_layout(struct inode *ino) lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; - pnfs_layout_io_set_failed(lo, IOMODE_RW); - pnfs_layout_io_set_failed(lo, IOMODE_READ); + spin_lock(&ino->i_lock); + lo->plh_block_lgets--; + spin_unlock(&ino->i_lock); pnfs_put_layout_hdr(lo); goto out; } -- cgit v1.2.3 From 849b286fd026a6924cc6a4315e446ed88ab983d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 24 Sep 2012 14:18:39 -0400 Subject: NFSv4.1: nfs4_proc_layoutreturn must always drop the plh_block_lgets count Currently it does not do so if the RPC call failed to start. Fix is to move the decrement of plh_block_lgets into nfs4_layoutreturn_release. Also remove a redundant test of task->tk_status in nfs4_layoutreturn_done: if lrp->res.lrs_present is set, then obviously the RPC call succeeded. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8de0435caed..ce1ebff49fd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6346,7 +6346,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); @@ -6358,19 +6357,20 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) rpc_restart_call_prepare(task); return; } - spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0 && lrp->res.lrs_present) - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - lo->plh_block_lgets--; - spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); } static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); + spin_lock(&lo->plh_inode->i_lock); + if (lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + lo->plh_block_lgets--; + spin_unlock(&lo->plh_inode->i_lock); pnfs_put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); -- cgit v1.2.3 From fcb6d9c6b719b633e9b98d26d8a7937209e8bf21 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 26 Sep 2012 15:25:53 -0400 Subject: NFS: Always use the open stateid when checking for expired opens If we are reading through a delegation, and the delegation is OK then state->stateid will still point to a delegation stateid and not an open stateid. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ce1ebff49fd..755ee162ee7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1790,7 +1790,7 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) static int nfs41_check_open_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid *stateid = &state->open_stateid; int status; /* If a state reset has been done, test_stateid is unneeded */ -- cgit v1.2.3 From 6938867edba929a65a167a97581231e76aeb10b4 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 26 Sep 2012 15:25:52 -0400 Subject: NFS: Remove bad delegations during open recovery I put the client into an open recovery loop by: Client: Open file read half Server: Expire client (echo 0 > /sys/kernel/debug/nfsd/forget_clients) Client: Drop vm cache (echo 3 > /proc/sys/vm/drop_caches) finish reading file This causes a loop because the client never updates the nfs4_state after discovering that the delegation is invalid. This means it will keep trying to read using the bad delegation rather than attempting to re-open the file. Signed-off-by: Bryan Schumaker CC: stable@vger.kernel.org [3.4+] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 755ee162ee7..471a75f11ea 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1774,7 +1774,11 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, stateid); + nfs_remove_bad_delegation(state->inode); + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); clear_bit(NFS_DELEGATED_STATE, &state->flags); } } -- cgit v1.2.3 From 57a51048da742c764b6ce5d028c7f334ae04d363 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 9 Aug 2012 14:05:51 -0400 Subject: NFS: Use kzalloc() instead of kmalloc() in the idmapper This will allocate memory that has already been zeroed, allowing us to remove the memset later on. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a850079467d..9985a0aea5f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -632,9 +632,6 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap, substring_t substr; int token, ret; - memset(im, 0, sizeof(*im)); - memset(msg, 0, sizeof(*msg)); - im->im_type = IDMAP_TYPE_GROUP; token = match_token(desc, nfs_idmap_tokens, &substr); @@ -677,7 +674,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, int ret = -ENOMEM; /* msg and im are freed in idmap_pipe_destroy_msg */ - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out1; -- cgit v1.2.3 From 6168f62cbde8dcf4f58255794efbcdb8df603959 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 10 Sep 2012 14:00:46 -0400 Subject: NFSv4: Add ACCESS operation to OPEN compound The OPEN operation has no way to differentiate an open for read and an open for execution - both look like read to the server. This allowed users to read files that didn't have READ access but did have EXEC access, which is obviously wrong. This patch adds an ACCESS call to the OPEN compound to handle the difference between OPENs for reading and execution. Since we're going through the trouble of calling ACCESS, we check all possible access bits and cache the results hopefully avoiding an ACCESS call in the future. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 16 +++++++++++++++- fs/nfs/nfs4proc.c | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs4xdr.c | 16 ++++++++++++---- 3 files changed, 71 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 627f108ede2..ce8cb926526 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2072,7 +2072,7 @@ found: nfs_access_free_entry(entry); } -static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -2098,6 +2098,20 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s spin_unlock(&nfs_access_lru_lock); } } +EXPORT_SYMBOL_GPL(nfs_access_add_cache); + +void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) +{ + entry->mask = 0; + if (access_result & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (access_result & + (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; +} +EXPORT_SYMBOL_GPL(nfs_access_set_mask); static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 471a75f11ea..5b3207f557d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -104,6 +104,8 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; + case -NFS4ERR_ACCESS: + return -EACCES; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -860,6 +862,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + /* ask server to check for all possible rights as results are cached */ + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); p->o_arg.id.uniquifier = sp->so_seqid.owner_id; @@ -1643,6 +1648,39 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) return status; } +static int nfs4_opendata_access(struct rpc_cred *cred, + struct nfs4_opendata *opendata, + struct nfs4_state *state, fmode_t fmode) +{ + struct nfs_access_entry cache; + u32 mask; + + /* access call failed or for some reason the server doesn't + * support any access modes -- defer access call until later */ + if (opendata->o_res.access_supported == 0) + return 0; + + mask = 0; + if (fmode & FMODE_READ) + mask |= MAY_READ; + if (fmode & FMODE_WRITE) + mask |= MAY_WRITE; + if (fmode & FMODE_EXEC) + mask |= MAY_EXEC; + + cache.cred = cred; + cache.jiffies = jiffies; + nfs_access_set_mask(&cache, opendata->o_res.access_result); + nfs_access_add_cache(state->inode, &cache); + + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + /* even though OPEN succeeded, access is denied. Close the file */ + nfs4_close_state(state, fmode); + return -NFS4ERR_ACCESS; +} + /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ @@ -1900,6 +1938,10 @@ static int _nfs4_do_open(struct inode *dir, if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + status = nfs4_opendata_access(cred, opendata, state, fmode); + if (status != 0) + goto err_opendata_put; + if (opendata->o_arg.open_flags & O_EXCL) { nfs4_exclusive_attrset(opendata, sattr); @@ -1945,7 +1987,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs4_state *res; int status; - fmode &= FMODE_READ|FMODE_WRITE; + fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res, ctx_th); @@ -2771,13 +2813,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { - entry->mask = 0; - if (res.access & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + nfs_access_set_mask(entry, res.access); nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7ab29abb316..657483c34e2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -447,12 +447,14 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ @@ -467,11 +469,13 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ @@ -2220,6 +2224,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); + encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -2256,6 +2261,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); + encode_access(xdr, args->access, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -4099,7 +4105,7 @@ out_overflow: return -EIO; } -static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) +static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) { __be32 *p; uint32_t supp, acc; @@ -4113,8 +4119,8 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) goto out_overflow; supp = be32_to_cpup(p++); acc = be32_to_cpup(p); - access->supported = supp; - access->access = acc; + *supported = supp; + *access = acc; return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5892,7 +5898,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_access(xdr, res); + status = decode_access(xdr, &res->supported, &res->access); if (status != 0) goto out; decode_getfattr(xdr, res->fattr, res->server); @@ -6233,6 +6239,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, &res->fh); if (status) goto out; + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -6281,6 +6288,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; -- cgit v1.2.3 From c8ceb4124b53a439edfe3fe89a646be1e067ef17 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:06 +0400 Subject: NFS: pass net to nfs_callback_down() Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 4 ++-- fs/nfs/callback.h | 2 +- fs/nfs/nfs4client.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 4c8459e5bde..51297b2d053 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -301,7 +301,7 @@ out_err: /* * Kill the callback thread if it's no longer being used. */ -void nfs_callback_down(int minorversion) +void nfs_callback_down(int minorversion, struct net *net) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -309,7 +309,7 @@ void nfs_callback_down(int minorversion) cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, &init_net); + svc_shutdown_net(cb_info->serv, net); svc_exit_thread(cb_info->rqst); cb_info->serv = NULL; cb_info->rqst = NULL; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b44d7b128b7..309404453e9 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -194,7 +194,7 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, struct cb_process_state *cps); #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); -extern void nfs_callback_down(int minorversion); +extern void nfs_callback_down(int minorversion, struct net *net); extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid); extern int nfs4_set_callback_sessionid(struct nfs_client *clp); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 24eb663f8ed..088a7d2e2ec 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -84,7 +84,7 @@ error: static void nfs4_destroy_callback(struct nfs_client *clp) { if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version); + nfs_callback_down(clp->cl_mvops->minor_version, &init_net); } static void nfs4_shutdown_client(struct nfs_client *clp) -- cgit v1.2.3 From dd018428dce087b72d9e6a0b32e93cb8088b3aaa Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:11 +0400 Subject: NFS: callback service creation function introduced This function creates service if it's not exist, or increase usage counter of the existent, and returns pointer to it. Usage counter will be droppepd by svc_destroy() later in nfs_callback_up(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 63 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 51297b2d053..18efeb5f005 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -217,12 +217,50 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, } #endif /* CONFIG_NFS_V4_1 */ +static struct svc_serv *nfs_callback_create_svc(int minorversion) +{ + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + struct svc_serv *serv; + + /* + * Check whether we're already up and running. + */ + if (cb_info->task) { + /* + * Note: increase service usage, because later in case of error + * svc_destroy() will be called. + */ + svc_get(cb_info->serv); + return cb_info->serv; + } + + /* + * Sanity check: if there's no task, + * we should be the first user ... + */ + if (cb_info->users) + printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", + cb_info->users); + + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + if (!serv) { + printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); + return ERR_PTR(-ENOMEM); + } + /* As there is only one thread we need to over-ride the + * default maximum of 80 connections + */ + serv->sv_maxconn = 1024; + dprintk("nfs_callback_create_svc: service created\n"); + return serv; +} + /* * Bring up the callback thread if it is not already up. */ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) { - struct svc_serv *serv = NULL; + struct svc_serv *serv; struct svc_rqst *rqstp; int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -232,19 +270,17 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) struct net *net = &init_net; mutex_lock(&nfs_callback_mutex); + + serv = nfs_callback_create_svc(minorversion); + if (IS_ERR(serv)) { + ret = PTR_ERR(serv); + goto err_create; + } + if (cb_info->users++ || cb_info->task != NULL) { nfs_callback_bc_serv(minorversion, xprt, cb_info); goto out; } - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); - if (!serv) { - ret = -ENOMEM; - goto out_err; - } - /* As there is only one thread we need to over-ride the - * default maximum of 80 connections - */ - serv->sv_maxconn = 1024; ret = svc_bind(serv, net); if (ret < 0) { @@ -285,16 +321,15 @@ out: * on both success and failure so that the refcount is 1 when the * thread exits. */ - if (serv) - svc_destroy(serv); + svc_destroy(serv); +err_create: mutex_unlock(&nfs_callback_mutex); return ret; out_err: dprintk("NFS: Couldn't create callback socket or server thread; " "err = %d\n", ret); cb_info->users--; - if (serv) - svc_shutdown_net(serv, net); + svc_shutdown_net(serv, net); goto out; } -- cgit v1.2.3 From c946556b8749beb357e2d2860e7dac757972dd3d Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:16 +0400 Subject: NFS: move per-net callback thread initialization to nfs_callback_up_net() v4: 1) Callback transport creation routine selection by version simlified. This new function in now called before nfs_minorversion_callback_svc_setup()). Also few small changes: 1) current network namespace in nfs_callback_up() was replaced by transport net. 2) svc_shutdown_net() was moved prior to callback usage counter decrement (because in case of per-net data allocation faulure svc_shutdown_net() have to be skipped). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 133 +++++++++++++++++++++++++++++++++------------------- fs/nfs/nfs4client.c | 2 +- 2 files changed, 87 insertions(+), 48 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 18efeb5f005..a53b4e53d5d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -39,6 +39,32 @@ static struct svc_program nfs4_callback_program; unsigned short nfs_callback_tcpport6; +static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) +{ + int ret; + + ret = svc_create_xprt(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret <= 0) + goto out_err; + nfs_callback_tcpport = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nfs_callback_tcpport, PF_INET, net); + + ret = svc_create_xprt(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret > 0) { + nfs_callback_tcpport6 = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nfs_callback_tcpport6, PF_INET6, net); + } else if (ret != -EAFNOSUPPORT) + goto out_err; + return 0; + +out_err: + return (ret) ? ret : -ENOMEM; +} + /* * This is the NFSv4 callback kernel thread. */ @@ -80,36 +106,21 @@ nfs4_callback_svc(void *vrqstp) static struct svc_rqst * nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { - int ret; - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret <= 0) - goto out_err; - nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport, PF_INET); - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret > 0) { - nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport6, PF_INET6); - } else if (ret == -EAFNOSUPPORT) - ret = 0; - else - goto out_err; - return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - -out_err: - if (ret == 0) - ret = -ENOMEM; - return ERR_PTR(ret); } #if defined(CONFIG_NFS_V4_1) +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) +{ + /* + * Create an svc_sock for the back channel service that shares the + * fore channel connection. + * Returns the input port (0) and sets the svc_serv bc_xprt on success + */ + return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); +} + /* * The callback service for NFSv4.1 callbacks */ @@ -152,19 +163,6 @@ static struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { struct svc_rqst *rqstp; - int ret; - - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); - if (ret < 0) { - rqstp = ERR_PTR(ret); - goto out; - } /* * Save the svc_serv in the transport so that it can @@ -180,7 +178,6 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } -out: dprintk("--> %s return %ld\n", __func__, IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); return rqstp; @@ -204,6 +201,11 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = cb_info->serv; } #else +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) +{ + return 0; +} + static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, struct svc_serv *serv, struct rpc_xprt *xprt, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) @@ -217,6 +219,44 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, } #endif /* CONFIG_NFS_V4_1 */ +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +{ + int ret; + + dprintk("NFS: create per-net callback data; net=%p\n", net); + + ret = svc_bind(serv, net); + if (ret < 0) { + printk(KERN_WARNING "NFS: bind callback service failed\n"); + goto err_bind; + } + + switch (minorversion) { + case 0: + ret = nfs4_callback_up_net(serv, net); + break; + case 1: + ret = nfs41_callback_up_net(serv, net); + break; + default: + printk(KERN_ERR "NFS: unknown callback version: %d\n", + minorversion); + ret = -EINVAL; + break; + } + + if (ret < 0) { + printk(KERN_ERR "NFS: callback service start failed\n"); + goto err_socks; + } + return 0; + +err_socks: + svc_rpcb_cleanup(serv, net); +err_bind: + return ret; +} + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -267,7 +307,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) char svc_name[12]; int ret = 0; int minorversion_setup; - struct net *net = &init_net; + struct net *net = xprt->xprt_net; mutex_lock(&nfs_callback_mutex); @@ -282,11 +322,9 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto out; } - ret = svc_bind(serv, net); - if (ret < 0) { - printk(KERN_WARNING "NFS: bind callback service failed\n"); - goto out_err; - } + ret = nfs_callback_up_net(minorversion, serv, net); + if (ret < 0) + goto err_net; minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, serv, xprt, &rqstp, &callback_svc); @@ -326,10 +364,11 @@ err_create: mutex_unlock(&nfs_callback_mutex); return ret; out_err: + svc_shutdown_net(serv, net); +err_net: dprintk("NFS: Couldn't create callback socket or server thread; " "err = %d\n", ret); cb_info->users--; - svc_shutdown_net(serv, net); goto out; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 088a7d2e2ec..612f5ebaaba 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -84,7 +84,7 @@ error: static void nfs4_destroy_callback(struct nfs_client *clp) { if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(clp->cl_mvops->minor_version, &init_net); + nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net); } static void nfs4_shutdown_client(struct nfs_client *clp) -- cgit v1.2.3 From 691c457ae635a063e0e4c8551ba4566eab9a17e3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:21 +0400 Subject: NFS: callback up - transport backchannel cleanup No need to assign transports backchannel server explicitly in nfs41_callback_up() - there is nfs_callback_bc_serv() function for this. By using it, nfs4_callback_up() and nfs41_callback_up() can be called without transport argument. Note: service have to be passed to nfs_callback_bc_serv() instead of callback, since callback link can be uninitialized. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a53b4e53d5d..a528cb75121 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -104,7 +104,7 @@ nfs4_callback_svc(void *vrqstp) * Prepare to bring up the NFSv4 callback service */ static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs4_callback_up(struct svc_serv *serv) { return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); } @@ -160,16 +160,10 @@ nfs41_callback_svc(void *vrqstp) * Bring up the NFSv4.1 callback service */ static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs41_callback_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - /* - * Save the svc_serv in the transport so that it can - * be referenced when the session backchannel is initialized - */ - xprt->bc_serv = serv; - INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); init_waitqueue_head(&serv->sv_cb_waitq); @@ -184,21 +178,25 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) } static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { if (minorversion) { - *rqstpp = nfs41_callback_up(serv, xprt); + *rqstpp = nfs41_callback_up(serv); *callback_svc = nfs41_callback_svc; } return minorversion; } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { if (minorversion) - xprt->bc_serv = cb_info->serv; + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + xprt->bc_serv = serv; } #else static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) @@ -207,14 +205,14 @@ static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) } static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { return 0; } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { } #endif /* CONFIG_NFS_V4_1 */ @@ -318,7 +316,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) } if (cb_info->users++ || cb_info->task != NULL) { - nfs_callback_bc_serv(minorversion, xprt, cb_info); + nfs_callback_bc_serv(minorversion, xprt, serv); goto out; } @@ -326,11 +324,13 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) if (ret < 0) goto err_net; + nfs_callback_bc_serv(minorversion, xprt, serv); + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, - serv, xprt, &rqstp, &callback_svc); + serv, &rqstp, &callback_svc); if (!minorversion_setup) { /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv, xprt); + rqstp = nfs4_callback_up(serv); callback_svc = nfs4_callback_svc; } -- cgit v1.2.3 From 8e2461444319b8f3fe47b94ea9b5d2e1dd8adadb Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:26 +0400 Subject: NFS: callback service start function introduced This is just a code move, which from my POW makes code looks better. I.e. now on start we have 3 different stages: 1) Service creation. 2) Service per-net data allocation. 3) Service start. Patch also renames goto label "out_err:" into "err_start:" to reflect new changes. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 77 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a528cb75121..5d5f9d10cfd 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -217,6 +217,46 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, } #endif /* CONFIG_NFS_V4_1 */ +static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, + struct svc_serv *serv) +{ + struct svc_rqst *rqstp; + int (*callback_svc)(void *vrqstp); + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + char svc_name[12]; + int ret; + int minorversion_setup; + + nfs_callback_bc_serv(minorversion, xprt, serv); + + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, + serv, &rqstp, &callback_svc); + if (!minorversion_setup) { + /* v4.0 callback setup */ + rqstp = nfs4_callback_up(serv); + callback_svc = nfs4_callback_svc; + } + + if (IS_ERR(rqstp)) + return PTR_ERR(rqstp); + + svc_sock_update_bufs(serv); + + sprintf(svc_name, "nfsv4.%u-svc", minorversion); + cb_info->serv = serv; + cb_info->rqst = rqstp; + cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + if (IS_ERR(cb_info->task)) { + ret = PTR_ERR(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->rqst = NULL; + cb_info->task = NULL; + return PTR_ERR(cb_info->task); + } + dprintk("nfs_callback_up: service started\n"); + return 0; +} + static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) { int ret; @@ -299,12 +339,8 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) { struct svc_serv *serv; - struct svc_rqst *rqstp; - int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - char svc_name[12]; int ret = 0; - int minorversion_setup; struct net *net = xprt->xprt_net; mutex_lock(&nfs_callback_mutex); @@ -324,34 +360,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) if (ret < 0) goto err_net; - nfs_callback_bc_serv(minorversion, xprt, serv); - - minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, - serv, &rqstp, &callback_svc); - if (!minorversion_setup) { - /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv); - callback_svc = nfs4_callback_svc; - } - - if (IS_ERR(rqstp)) { - ret = PTR_ERR(rqstp); - goto out_err; - } - - svc_sock_update_bufs(serv); + ret = nfs_callback_start_svc(minorversion, xprt, serv); + if (ret < 0) + goto err_start; - sprintf(svc_name, "nfsv4.%u-svc", minorversion); - cb_info->serv = serv; - cb_info->rqst = rqstp; - cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); - if (IS_ERR(cb_info->task)) { - ret = PTR_ERR(cb_info->task); - svc_exit_thread(cb_info->rqst); - cb_info->rqst = NULL; - cb_info->task = NULL; - goto out_err; - } out: /* * svc_create creates the svc_serv with sv_nrthreads == 1, and then @@ -363,7 +375,8 @@ out: err_create: mutex_unlock(&nfs_callback_mutex); return ret; -out_err: + +err_start: svc_shutdown_net(serv, net); err_net: dprintk("NFS: Couldn't create callback socket or server thread; " -- cgit v1.2.3 From 23c20ecd44750dd42e5fd53285a17ca8d8a9b0a3 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:31 +0400 Subject: NFS: callback up - users counting cleanup Usage coutner now increased only is the service was started sccessfully. Even if service is running already, then goto is not required anymore, because service creation and start will be skipped. With this patch code looks clearer. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 5d5f9d10cfd..64e87ec045a 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -229,6 +229,9 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, nfs_callback_bc_serv(minorversion, xprt, serv); + if (cb_info->task) + return 0; + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, serv, &rqstp, &callback_svc); if (!minorversion_setup) { @@ -292,6 +295,8 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n err_socks: svc_rpcb_cleanup(serv, net); err_bind: + dprintk("NFS: Couldn't create callback socket: err = %d; " + "net = %p\n", ret, net); return ret; } @@ -340,7 +345,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) { struct svc_serv *serv; struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - int ret = 0; + int ret; struct net *net = xprt->xprt_net; mutex_lock(&nfs_callback_mutex); @@ -351,11 +356,6 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - if (cb_info->users++ || cb_info->task != NULL) { - nfs_callback_bc_serv(minorversion, xprt, serv); - goto out; - } - ret = nfs_callback_up_net(minorversion, serv, net); if (ret < 0) goto err_net; @@ -364,13 +364,14 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) if (ret < 0) goto err_start; -out: + cb_info->users++; /* * svc_create creates the svc_serv with sv_nrthreads == 1, and then * svc_prepare_thread increments that. So we need to call svc_destroy * on both success and failure so that the refcount is 1 when the * thread exits. */ +err_net: svc_destroy(serv); err_create: mutex_unlock(&nfs_callback_mutex); @@ -378,11 +379,8 @@ err_create: err_start: svc_shutdown_net(serv, net); -err_net: - dprintk("NFS: Couldn't create callback socket or server thread; " - "err = %d\n", ret); - cb_info->users--; - goto out; + dprintk("NFS: Couldn't create server thread; err = %d\n", ret); + goto err_net; } /* -- cgit v1.2.3 From bbe0a3aa4e227c8aae02a484ce1c0b655cd19055 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:36 +0400 Subject: NFS: make nfs_callback_tcpport per network context Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 6 ++++-- fs/nfs/callback.h | 1 - fs/nfs/netns.h | 1 + fs/nfs/nfs4state.c | 4 +++- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 64e87ec045a..94aa9d8f308 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -23,6 +23,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -42,14 +43,15 @@ unsigned short nfs_callback_tcpport6; static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) { int ret; + struct nfs_net *nn = net_generic(net, nfs_net_id); ret = svc_create_xprt(serv, "tcp", net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; - nfs_callback_tcpport = ret; + nn->nfs_callback_tcpport = ret; dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nfs_callback_tcpport, PF_INET, net); + nn->nfs_callback_tcpport, PF_INET, net); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 309404453e9..1c167d16368 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -208,7 +208,6 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport; extern unsigned short nfs_callback_tcpport6; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 0539de1b8d1..1538d3a83cd 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -22,6 +22,7 @@ struct nfs_net { struct list_head nfs_volume_list; #if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ + unsigned short nfs_callback_tcpport; #endif spinlock_t nfs_client_lock; struct timespec boot_time; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a5331ec094a..716cdc20475 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -56,6 +56,7 @@ #include "delegation.h" #include "internal.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_STATE @@ -73,10 +74,11 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) }; unsigned short port; int status; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - port = nfs_callback_tcpport; + port = nn->nfs_callback_tcpport; if (clp->cl_addr.ss_family == AF_INET6) port = nfs_callback_tcpport6; -- cgit v1.2.3 From 29dcc16a8e29371e11fb58fc1292e01f30ff13c5 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:41 +0400 Subject: NFS: make nfs_callback_tcpport6 per network context Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 6 ++---- fs/nfs/callback.h | 1 - fs/nfs/netns.h | 1 + fs/nfs/nfs4state.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 94aa9d8f308..baafa0f1e55 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -38,8 +38,6 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned short nfs_callback_tcpport6; - static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) { int ret; @@ -56,9 +54,9 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) ret = svc_create_xprt(serv, "tcp", net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { - nfs_callback_tcpport6 = ret; + nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nfs_callback_tcpport6, PF_INET6, net); + nn->nfs_callback_tcpport6, PF_INET6, net); } else if (ret != -EAFNOSUPPORT) goto out_err; return 0; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 1c167d16368..c07a8d460d3 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -208,6 +208,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport6; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 1538d3a83cd..137238b012f 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -23,6 +23,7 @@ struct nfs_net { #if IS_ENABLED(CONFIG_NFS_V4) struct idr cb_ident_idr; /* Protected by nfs_client_lock */ unsigned short nfs_callback_tcpport; + unsigned short nfs_callback_tcpport6; #endif spinlock_t nfs_client_lock; struct timespec boot_time; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 716cdc20475..bd8ed01cb0e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -80,7 +80,7 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) goto do_confirm; port = nn->nfs_callback_tcpport; if (clp->cl_addr.ss_family == AF_INET6) - port = nfs_callback_tcpport6; + port = nn->nfs_callback_tcpport6; status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); if (status != 0) -- cgit v1.2.3 From b3d19c51723be69fddb64723bebb5a30fb57a483 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:46 +0400 Subject: NFS: callback per-net usage counting introduced This patch also introduces refcount-aware nfs_callback_down_net() wrapper for svc_shutdown_net(). Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 19 +++++++++++++++++-- fs/nfs/netns.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index baafa0f1e55..6dfdc8311f2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -260,10 +260,25 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, return 0; } +static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + if (--nn->cb_users[minorversion]) + return; + + dprintk("NFS: destroy per-net callback data; net=%p\n", net); + svc_shutdown_net(serv, net); +} + static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; + if (nn->cb_users[minorversion]++) + return 0; + dprintk("NFS: create per-net callback data; net=%p\n", net); ret = svc_bind(serv, net); @@ -378,7 +393,7 @@ err_create: return ret; err_start: - svc_shutdown_net(serv, net); + nfs_callback_down_net(minorversion, serv, net); dprintk("NFS: Couldn't create server thread; err = %d\n", ret); goto err_net; } @@ -391,10 +406,10 @@ void nfs_callback_down(int minorversion, struct net *net) struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; mutex_lock(&nfs_callback_mutex); + nfs_callback_down_net(minorversion, cb_info->serv, net); cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); - svc_shutdown_net(cb_info->serv, net); svc_exit_thread(cb_info->rqst); cb_info->serv = NULL; cb_info->rqst = NULL; diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 137238b012f..b9c7f9b1f91 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -24,6 +24,7 @@ struct nfs_net { struct idr cb_ident_idr; /* Protected by nfs_client_lock */ unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; + int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif spinlock_t nfs_client_lock; struct timespec boot_time; -- cgit v1.2.3 From 1dc42e04b75779d321f1d17dca3873004066f667 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 20 Aug 2012 18:00:51 +0400 Subject: NFS: add debug messages to callback down function Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 6dfdc8311f2..8ed0bc8cffb 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -410,7 +410,9 @@ void nfs_callback_down(int minorversion, struct net *net) cb_info->users--; if (cb_info->users == 0 && cb_info->task != NULL) { kthread_stop(cb_info->task); + dprintk("nfs_callback_down: service stopped\n"); svc_exit_thread(cb_info->rqst); + dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; cb_info->rqst = NULL; cb_info->task = NULL; -- cgit v1.2.3 From 8cb7f74eeeb5441811d93f94b6138d4a5a9d8b20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:23:14 -0400 Subject: NFS: nfs_parsed_mount_options can use unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/nfs/super.c: In function ‘nfs_compare_remount_data’: fs/nfs/super.c:2042:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2043:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2044:20: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2046:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2047:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2048:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2049:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/super.c:2050:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Seen with gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 31fdb03225c..89560be07e4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -101,11 +101,11 @@ struct nfs_client_initdata { */ struct nfs_parsed_mount_data { int flags; - int rsize, wsize; - int timeo, retrans; - int acregmin, acregmax, + unsigned int rsize, wsize; + unsigned int timeo, retrans; + unsigned int acregmin, acregmax, acdirmin, acdirmax; - int namlen; + unsigned int namlen; unsigned int options; unsigned int bsize; unsigned int auth_flavor_len; -- cgit v1.2.3 From ffe5a83005b0d23575ab109755b4cb5518a5d91f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:23:23 -0400 Subject: NFS: Slow down state manager after an unhandled error If the state manager thread is not actually able to fully recover from some situation, it wakes up waiters, who kick off a new state manager thread. Quite often the fresh invocation of the state manager is just as successful. This results in a livelock as the client dumps thousands of NFS requests a second on the network in a vain attempt to recover. Not very friendly. To mitigate this situation, add a delay in the state manager after an unhandled error, so that the client sends just a few requests every second in this case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bd8ed01cb0e..38eeefd9537 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2015,6 +2015,7 @@ out_error: pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" " with error %d\n", section_sep, section, clp->cl_hostname, -status); + ssleep(1); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } -- cgit v1.2.3 From ba9b584c1dc37851d9c6ca6d0d2ccba55d9aad04 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:02 -0400 Subject: SUNRPC: Introduce rpc_clone_client_set_auth() An ULP is supposed to be able to replace a GSS rpc_auth object with another GSS rpc_auth object using rpcauth_create(). However, rpcauth_create() in 3.5 reliably fails with -EEXIST in this case. This is because when gss_create() attempts to create the upcall pipes, sometimes they are already there. For example if a pipe FS mount event occurs, or a previous GSS flavor was in use for this rpc_clnt. It turns out that's not the only problem here. While working on a fix for the above problem, we noticed that replacing an rpc_clnt's rpc_auth is not safe, since dereferencing the cl_auth field is not protected in any way. So we're deprecating the ability of rpcauth_create() to switch an rpc_clnt's security flavor during normal operation. Instead, let's add a fresh API that clones an rpc_clnt and gives the clone a new flavor before it's used. This makes immediate use of the new __rpc_clone_client() helper. This can be used in a similar fashion to rpcauth_create() when a client is hunting for the correct security flavor. Instead of replacing an rpc_clnt's security flavor in a loop, the ULP replaces the whole rpc_clnt. To fix the -EEXIST problem, any ULP logic that relies on replacing an rpc_clnt's rpc_auth with rpcauth_create() must be changed to use this API instead. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 13 ++----------- fs/nfs/nfs4namespace.c | 14 +------------- 2 files changed, 3 insertions(+), 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 99694442b93..143149db344 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -668,7 +668,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server, { struct nfs_client *clp = server->nfs_client; - server->client = rpc_clone_client(clp->cl_rpcclient); + server->client = rpc_clone_client_set_auth(clp->cl_rpcclient, + pseudoflavour); if (IS_ERR(server->client)) { dprintk("%s: couldn't create rpc_client!\n", __func__); return PTR_ERR(server->client); @@ -678,16 +679,6 @@ int nfs_init_server_rpcclient(struct nfs_server *server, timeo, sizeof(server->client->cl_timeout_default)); server->client->cl_timeout = &server->client->cl_timeout_default; - - if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(pseudoflavour, server->client); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __func__); - return PTR_ERR(auth); - } - } server->client->cl_softrtry = 0; if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4fdeb1b7042..79fbb61ce20 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -192,25 +192,13 @@ out: struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, struct qstr *name) { - struct rpc_clnt *clone; - struct rpc_auth *auth; rpc_authflavor_t flavor; flavor = nfs4_negotiate_security(inode, name); if ((int)flavor < 0) return ERR_PTR((int)flavor); - clone = rpc_clone_client(clnt); - if (IS_ERR(clone)) - return clone; - - auth = rpcauth_create(flavor, clone); - if (IS_ERR(auth)) { - rpc_shutdown_client(clone); - clone = ERR_PTR(-EIO); - } - - return clone; + return rpc_clone_client_set_auth(clnt, flavor); } static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, -- cgit v1.2.3 From 896526174ce2b6a773e187ebe5a047b68230e2c4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:11 -0400 Subject: NFS: Introduce "migration" mount option Currently, the Linux client uses a unique nfs_client_id4.id string when identifying itself to distinct NFS servers. To support transparent state migration, the Linux client will have to use the same nfs_client_id4 string for all servers it communicates with (also known as the "uniform client string" approach). Otherwise NFS servers can not recognize that open and lock state need to be merged after a file system transition. Unfortunately, there are some NFSv4.0 servers currently in the field that do not tolerate the uniform client string approach. Thus, by default, our NFSv4.0 mounts will continue to use the current approach, and we introduce a mount option that switches them to use the uniform model. Client administrators must identify which servers can be mounted with this option. Eventually most NFSv4.0 servers will be able to handle the uniform approach, and we can change the default. The first mount of a server controls the behavior for all subsequent mounts for the lifetime of that set of mounts of that server. After the last mount of that server is gone, the client erases the data structure that tracks the lease. A subsequent lease may then honor a different "migration" setting. This patch adds only the infrastructure for parsing the new mount option. Support for uniform client strings is added in a subsequent patch. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/super.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 143149db344..92aed2e08bd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -752,6 +752,8 @@ static int nfs_init_server(struct nfs_server *server, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (server->options & NFS_OPTION_MIGRATION) + set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b8eda700584..056138d45c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -88,6 +88,7 @@ enum { Opt_sharecache, Opt_nosharecache, Opt_resvport, Opt_noresvport, Opt_fscache, Opt_nofscache, + Opt_migration, Opt_nomigration, /* Mount options that take integer arguments */ Opt_port, @@ -147,6 +148,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noresvport, "noresvport" }, { Opt_fscache, "fsc" }, { Opt_nofscache, "nofsc" }, + { Opt_migration, "migration" }, + { Opt_nomigration, "nomigration" }, { Opt_port, "port=%s" }, { Opt_rsize, "rsize=%s" }, @@ -676,6 +679,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + if (nfss->options & NFS_OPTION_MIGRATION) + seq_printf(m, ",migration"); + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) seq_printf(m, ",lookupcache=none"); @@ -1243,6 +1249,12 @@ static int nfs_parse_mount_options(char *raw, kfree(mnt->fscache_uniq); mnt->fscache_uniq = NULL; break; + case Opt_migration: + mnt->options |= NFS_OPTION_MIGRATION; + break; + case Opt_nomigration: + mnt->options &= NFS_OPTION_MIGRATION; + break; /* * options that take numeric values @@ -1535,6 +1547,10 @@ static int nfs_parse_mount_options(char *raw, if (mnt->minorversion && mnt->version != 4) goto out_minorversion_mismatch; + if (mnt->options & NFS_OPTION_MIGRATION && + mnt->version != 4 && mnt->minorversion != 0) + goto out_migration_misuse; + /* * verify that any proto=/mountproto= options match the address * familiies in the addr=/mountaddr= options. @@ -1572,6 +1588,10 @@ out_minorversion_mismatch: printk(KERN_INFO "NFS: mount option vers=%u does not support " "minorversion=%u\n", mnt->version, mnt->minorversion); return 0; +out_migration_misuse: + printk(KERN_INFO + "NFS: 'migration' not supported for this NFS version\n"); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; -- cgit v1.2.3 From e984a55a7418f777407c7edbb2bdf5eb9559b5e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:21 -0400 Subject: NFS: Use the same nfs_client_id4 for every server Currently, when identifying itself to NFS servers, the Linux NFS client uses a unique nfs_client_id4.id string for each server IP address it talks with. For example, when client A talks to server X, the client identifies itself using a string like "AX". The requirements for these strings are specified in detail by RFC 3530 (and bis). This form of client identification presents a problem for Transparent State Migration. When client A's state on server X is migrated to server Y, it continues to be associated with string "AX." But, according to the rules of client string construction above, client A will present string "AY" when communicating with server Y. Server Y thus has no way to know that client A should be associated with the state migrated from server X. "AX" is all but abandoned, interfering with establishing fresh state for client A on server Y. To support transparent state migration, then, NFSv4.0 clients must instead use the same nfs_client_id4.id string to identify themselves to every NFS server; something like "A". Now a client identifies itself as "A" to server X. When a file system on server X transitions to server Y, and client A identifies itself as "A" to server Y, Y will know immediately that the state associated with "A," whether it is native or migrated, is owned by the client, and can merge both into a single lease. As a pre-requisite to adding support for NFSv4 migration to the Linux NFS client, this patch changes the way Linux identifies itself to NFS servers via the SETCLIENTID (NFSv4 minor version 0) and EXCHANGE_ID (NFSv4 minor version 1) operations. In addition to removing the server's IP address from nfs_client_id4, the Linux NFS client will also no longer use its own source IP address as part of the nfs_client_id4 string. On multi-homed clients, the value of this address depends on the address family and network routing used to contact the server, thus it can be different for each server. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b3207f557d..46141117196 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4047,6 +4047,32 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } +static unsigned int +nfs4_init_nonuniform_client_string(const struct nfs_client *clp, + char *buf, size_t len) +{ + unsigned int result; + + rcu_read_lock(); + result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_PROTO)); + rcu_read_unlock(); + return result; +} + +static unsigned int +nfs4_init_uniform_client_string(const struct nfs_client *clp, + char *buf, size_t len) +{ + return scnprintf(buf, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); +} + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure @@ -4077,15 +4103,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - rcu_read_lock(); - setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + setclientid.sc_name_len = + nfs4_init_uniform_client_string(clp, + setclientid.sc_name, + sizeof(setclientid.sc_name)); + else + setclientid.sc_name_len = + nfs4_init_nonuniform_client_string(clp, + setclientid.sc_name, + sizeof(setclientid.sc_name)); /* cb_client4 */ + rcu_read_lock(); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), rpc_peeraddr2str(clp->cl_rpcclient, @@ -5307,10 +5336,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s", - clp->cl_ipaddr, - clp->cl_rpcclient->cl_nodename); + args.id_len = nfs4_init_uniform_client_string(clp, args.id, + sizeof(args.id)); dprintk("NFS call exchange_id auth=%s, '%.*s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, args.id_len, args.id); -- cgit v1.2.3 From 05f4c350ee02e9461c6ae3a880ea326a06835e37 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:32 -0400 Subject: NFS: Discover NFSv4 server trunking when mounting "Server trunking" is a fancy named for a multi-homed NFS server. Trunking might occur if a client sends NFS requests for a single workload to multiple network interfaces on the same server. There are some implications for NFSv4 state management that make it useful for a client to know if a single NFSv4 server instance is multi-homed. (Note this is only a consideration for NFSv4, not for legacy versions of NFS, which are stateless). If a client cares about server trunking, no NFSv4 operations can proceed until that client determines who it is talking to. Thus server IP trunking discovery must be done when the client first encounters an unfamiliar server IP address. The nfs_get_client() function walks the nfs_client_list and matches on server IP address. The outcome of that walk tells us immediately if we have an unfamiliar server IP address. It invokes nfs_init_client() in this case. Thus, nfs4_init_client() is a good spot to perform trunking discovery. Discovery requires a client to establish a fresh client ID, so our client will now send SETCLIENTID or EXCHANGE_ID as the first NFS operation after a successful ping, rather than waiting for an application to perform an operation that requires NFSv4 state. The exact process for detecting trunking is different for NFSv4.0 and NFSv4.1, so a minorversion-specific init_client callout method is introduced. CLID_INUSE recovery is important for the trunking discovery process. CLID_INUSE is a sign the server recognizes the client's nfs_client_id4 id string, but the client is using the wrong principal this time for the SETCLIENTID operation. The SETCLIENTID must be retried with a series of different principals until one works, and then the rest of trunking discovery can proceed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +- fs/nfs/internal.h | 6 ++ fs/nfs/nfs4_fs.h | 8 ++ fs/nfs/nfs4client.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 4 + fs/nfs/nfs4state.c | 182 ++++++++++++++++++++++++++++++++++++- 6 files changed, 454 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 92aed2e08bd..57d2a5c3d93 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -498,7 +498,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, return nfs_found_client(cl_init, clp); } if (new) { - list_add(&new->cl_share_link, &nn->nfs_client_list); + list_add_tail(&new->cl_share_link, + &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, timeparms, ip_addr, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89560be07e4..89a795dc302 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -483,6 +483,12 @@ extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); +extern int nfs40_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); +extern int nfs41_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); /* * Determine the device name as a string diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9cacc131a8a..832503c7a00 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -191,6 +191,8 @@ struct nfs4_state_recovery_ops { int (*establish_clid)(struct nfs_client *, struct rpc_cred *); struct rpc_cred * (*get_clid_cred)(struct nfs_client *); int (*reclaim_complete)(struct nfs_client *); + int (*detect_trunking)(struct nfs_client *, struct nfs_client **, + struct rpc_cred *); }; struct nfs4_state_maintenance_ops { @@ -320,9 +322,15 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **); +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 612f5ebaaba..14ddd4d3096 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -185,6 +185,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour) { char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_client *old; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -230,6 +231,17 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (!nfs4_has_session(clp)) nfs_mark_client_ready(clp, NFS_CS_READY); + + error = nfs4_discover_server_trunking(clp, &old); + if (error < 0) + goto error; + if (clp != old) { + clp->cl_preserve_clid = true; + nfs_put_client(clp); + clp = old; + atomic_inc(&clp->cl_count); + } + return clp; error: @@ -239,6 +251,247 @@ error: return ERR_PTR(error); } +/* + * Returns true if the client IDs match + */ +static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +{ + if (a->cl_clientid != b->cl_clientid) { + dprintk("NFS: --> %s client ID %llx does not match %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return false; + } + dprintk("NFS: --> %s client ID %llx matches %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return true; +} + +/* + * SETCLIENTID just did a callback update with the callback ident in + * "drop," but server trunking discovery claims "drop" and "keep" are + * actually the same server. Swap the callback IDs so that "keep" + * will continue to use the callback ident the server now knows about, + * and so that "keep"'s original callback ident is destroyed when + * "drop" is freed. + */ +static void nfs4_swap_callback_idents(struct nfs_client *keep, + struct nfs_client *drop) +{ + struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id); + unsigned int save = keep->cl_cb_ident; + + if (keep->cl_cb_ident == drop->cl_cb_ident) + return; + + dprintk("%s: keeping callback ident %u and dropping ident %u\n", + __func__, keep->cl_cb_ident, drop->cl_cb_ident); + + spin_lock(&nn->nfs_client_lock); + + idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident); + keep->cl_cb_ident = drop->cl_cb_ident; + + idr_replace(&nn->cb_ident_idr, drop, save); + drop->cl_cb_ident = save; + + spin_unlock(&nn->nfs_client_lock); +} + +/** + * nfs40_walk_client_list - Find server that recognizes a client ID + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs40_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs40_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + struct nfs4_setclientid_res clid = { + .clientid = new->cl_clientid, + .confirm = new->cl_confirm, + }; + int status; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos" */ + if (pos->cl_cons_state < NFS_CS_READY) + continue; + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (pos->cl_clientid != new->cl_clientid) + continue; + + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + + status = nfs4_proc_setclientid_confirm(pos, &clid, cred); + if (status == 0) { + nfs4_swap_callback_idents(pos, new); + + nfs_put_client(pos); + *result = pos; + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + return 0; + } + if (status != -NFS4ERR_STALE_CLIENTID) { + nfs_put_client(pos); + dprintk("NFS: <-- %s status = %d, no result\n", + __func__, status); + return status; + } + + spin_lock(&nn->nfs_client_lock); + prev = pos; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + if (prev) + nfs_put_client(prev); + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} + +#ifdef CONFIG_NFS_V4_1 +/* + * Returns true if the server owners match + */ +static bool +nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +{ + struct nfs41_server_owner *o1 = a->cl_serverowner; + struct nfs41_server_owner *o2 = b->cl_serverowner; + + if (o1->minor_id != o2->minor_id) { + dprintk("NFS: --> %s server owner minor IDs do not match\n", + __func__); + return false; + } + + if (o1->major_id_sz != o2->major_id_sz) + goto out_major_mismatch; + if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) + goto out_major_mismatch; + + dprintk("NFS: --> %s server owners match\n", __func__); + return true; + +out_major_mismatch: + dprintk("NFS: --> %s server owner major IDs do not match\n", + __func__); + return false; +} + +/** + * nfs41_walk_client_list - Find nfs_client that matches a client/server owner + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs41_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs41_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + int error; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos", especially the client + * ID and serverowner fields. Wait for CREATE_SESSION + * to finish. */ + if (pos->cl_cons_state < NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + error = nfs_wait_client_init_complete(pos); + if (error < 0) { + nfs_put_client(pos); + continue; + } + + spin_lock(&nn->nfs_client_lock); + } + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (!nfs4_match_clientids(pos, new)) + continue; + + if (!nfs4_match_serverowners(pos, new)) + continue; + + spin_unlock(&nn->nfs_client_lock); + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + + *result = pos; + return 0; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} +#endif /* CONFIG_NFS_V4_1 */ + static void nfs4_destroy_server(struct nfs_server *server) { nfs_server_return_all_delegations(server); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 46141117196..b5834abfcbf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5458,6 +5458,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_exchange_flags == 0) goto out; + if (clp->cl_preserve_clid) + goto out; cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) @@ -6871,6 +6873,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, .get_clid_cred = nfs4_get_setclientid_cred, + .detect_trunking = nfs40_discover_server_trunking, }; #if defined(CONFIG_NFS_V4_1) @@ -6882,6 +6885,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, + .detect_trunking = nfs41_discover_server_trunking, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 38eeefd9537..5c428664370 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -51,6 +51,8 @@ #include #include +#include + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -63,7 +65,7 @@ #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; - +static DEFINE_MUTEX(nfs_clid_init_mutex); static LIST_HEAD(nfs4_clientid_list); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) @@ -98,6 +100,55 @@ out: return status; } +/** + * nfs40_discover_server_trunking - Detect server IP address trunking (mv0) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs4_setclientid_res clid = { + .clientid = clp->cl_clientid, + .confirm = clp->cl_confirm, + }; + unsigned short port; + int status; + + port = nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); + if (status != 0) + goto out; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; + + status = nfs40_walk_client_list(clp, result, cred); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + case 0: + /* Sustain the lease, even if it's empty. If the clientid4 + * goes stale it's of no use for trunking discovery. */ + nfs4_schedule_state_renewal(*result); + break; + } + +out: + return status; +} + struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -277,6 +328,32 @@ out: return status; } +/** + * nfs41_discover_server_trunking - Detect server IP address trunking (mv1) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status. + * If NFS4_OK is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + int status; + + status = nfs4_proc_exchange_id(clp, cred); + if (status != NFS4_OK) + return status; + + return nfs41_walk_client_list(clp, result, cred); +} + struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; @@ -1705,6 +1782,109 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } +/** + * nfs4_discover_server_trunking - Detect server IP address trunking + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * + * Returns zero or a negative errno. If zero is returned, + * an nfs_client pointer is planted in "result". + * + * Note: since we are invoked in process context, and + * not from inside the state manager, we cannot use + * nfs4_handle_reclaim_lease_error(). + */ +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result) +{ + const struct nfs4_state_recovery_ops *ops = + clp->cl_mvops->reboot_recovery_ops; + rpc_authflavor_t *flavors, flav, save; + struct rpc_clnt *clnt; + struct rpc_cred *cred; + int i, len, status; + + dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); + + len = NFS_MAX_SECFLAVORS; + flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); + if (flavors == NULL) { + status = -ENOMEM; + goto out; + } + len = rpcauth_list_flavors(flavors, len); + if (len < 0) { + status = len; + goto out_free; + } + clnt = clp->cl_rpcclient; + save = clnt->cl_auth->au_flavor; + i = 0; + + mutex_lock(&nfs_clid_init_mutex); + status = -ENOENT; +again: + cred = ops->get_clid_cred(clp); + if (cred == NULL) + goto out_unlock; + + status = ops->detect_trunking(clp, result, cred); + put_rpccred(cred); + switch (status) { + case 0: + break; + + case -EACCES: + if (clp->cl_machine_cred == NULL) + break; + /* Handle case where the user hasn't set up machine creds */ + nfs4_clear_machine_cred(clp); + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EAGAIN: + ssleep(1); + dprintk("NFS: %s after status %d, retrying\n", + __func__, status); + goto again; + + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_WRONGSEC: + status = -EPERM; + if (i >= len) + break; + + flav = flavors[i++]; + if (flav == save) + flav = flavors[i++]; + clnt = rpc_clone_client_set_auth(clnt, flav); + if (IS_ERR(clnt)) { + status = PTR_ERR(clnt); + break; + } + clp->cl_rpcclient = clnt; + goto again; + + case -NFS4ERR_MINOR_VERS_MISMATCH: + status = -EPROTONOSUPPORT; + break; + + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + status = -EKEYEXPIRED; + } + +out_unlock: + mutex_unlock(&nfs_clid_init_mutex); +out_free: + kfree(flavors); +out: + dprintk("NFS: %s: status = %d\n", __func__, status); + return status; +} + #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { -- cgit v1.2.3 From 6f2ea7f2a3ff3cd342bface43f8b4bf5e431cf36 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:41 -0400 Subject: NFS: Add nfs4_unique_id boot parameter An optional boot parameter is introduced to allow client administrators to specify a string that the Linux NFS client can insert into its nfs_client_id4 id string, to make it both more globally unique, and to ensure that it doesn't change even if the client's nodename changes. If this boot parameter is not specified, the client's nodename is used, as before. Client installation procedures can create a unique string (typically, a UUID) which remains unchanged during the lifetime of that client instance. This works just like creating a UUID for the label of the system's root and boot volumes. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4proc.c | 6 +++++- fs/nfs/super.c | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 832503c7a00..a525fdefccd 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -380,6 +380,9 @@ extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short send_implementation_id; +#define NFS4_CLIENT_ID_UNIQ_LEN (64) +extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b5834abfcbf..9aa97112426 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4068,9 +4068,13 @@ static unsigned int nfs4_init_uniform_client_string(const struct nfs_client *clp, char *buf, size_t len) { + char *nodename = clp->cl_rpcclient->cl_nodename; + + if (nfs4_client_id_uniquifier[0] != '\0') + nodename = nfs4_client_id_uniquifier; return scnprintf(buf, len, "Linux NFSv%u.%u %s", clp->rpc_ops->version, clp->cl_minorversion, - clp->cl_rpcclient->cl_nodename); + nodename); } /** diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 056138d45c1..56f02a9bd6d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2662,6 +2662,7 @@ unsigned int nfs_idmap_cache_timeout = 600; bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; +char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); EXPORT_SYMBOL_GPL(nfs_callback_tcpport); @@ -2669,6 +2670,7 @@ EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); EXPORT_SYMBOL_GPL(send_implementation_id); +EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); #define NFS_CALLBACK_MAXPORTNR (65535U) @@ -2694,6 +2696,8 @@ static struct kernel_param_ops param_ops_portnr = { module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); module_param(nfs_idmap_cache_timeout, int, 0644); module_param(nfs4_disable_idmapping, bool, 0644); +module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier, + NFS4_CLIENT_ID_UNIQ_LEN, 0600); MODULE_PARM_DESC(nfs4_disable_idmapping, "Turn off NFSv4 idmapping when using 'sec=sys'"); module_param(max_session_slots, ushort, 0644); @@ -2702,6 +2706,7 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); +MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); MODULE_ALIAS("nfs4"); #endif /* CONFIG_NFS_V4 */ -- cgit v1.2.3 From 7acdb026818455638543b04b68d4a580c367fba8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:48 +0800 Subject: NFSv41: fix DIO write_io calculation pnfs_within_mdsthreshold() is called inside pg_init. We need to set read_io/write_io before that. Otherwise we fail pnfs_within_mdsthreshold() and IO goes to MDS. A simple test case: dd if=foo of=/mnt/pnfs/bar bs=10M count=1 oflag=direct Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 22130df1621..253d397780b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -467,10 +467,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + NFS_I(inode)->read_io += iov_length(iov, nr_segs); result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); if (!result) result = nfs_direct_wait(dreq); - NFS_I(inode)->read_io += result; out_release: nfs_direct_req_release(dreq); out: @@ -818,6 +818,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, get_dreq(dreq); atomic_inc(&inode->i_dio_count); + NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); @@ -829,7 +830,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } nfs_pageio_complete(&desc); - NFS_I(dreq->inode)->write_io += desc.pg_bytes_written; /* * If no bytes were started, return the error, and let the -- cgit v1.2.3 From dc182549d439f60c332bf74d7f220a1bccf37da6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:49 +0800 Subject: NFS41: fix error of setting blocklayoutdriver After commit e38eb650 (NFS: set_pnfs_layoutdriver() from nfs4_proc_fsinfo()), set_pnfs_layoutdriver() is called inside nfs4_proc_fsinfo(), but pnfs_blksize is not set. It causes setting blocklayoutdriver failure and pnfsblock mount failure. Cc: stable [since v3.5] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/nfs4proc.c | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57d2a5c3d93..bab3e8af574 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -849,7 +849,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - server->pnfs_blksize = fsinfo->blksize; server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9aa97112426..e10d66f5be0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3402,8 +3402,11 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s nfs_fattr_init(fsinfo->fattr); error = nfs4_do_fsinfo(server, fhandle, fsinfo); - if (error == 0) + if (error == 0) { + /* block layout checks this! */ + server->pnfs_blksize = fsinfo->blksize; set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + } return error; } -- cgit v1.2.3 From 5d0e3a004f02bffab51f542fa1d5b2e2854d8545 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:50 +0800 Subject: Revert "pnfsblock: bail out partial page IO" This reverts commit 159e0561e322dd8008fff59e36efff8d2bdd0b0e, in favor of a more complete fix to the alignment issue. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 329bfbfed37..3c61514599a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -228,14 +228,6 @@ bl_end_par_io_read(void *data, int unused) schedule_work(&rdata->task.u.tk_work); } -static bool -bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) -{ - if ((offset & blkmask) || (len & blkmask)) - return false; - return true; -} - static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -252,9 +244,6 @@ bl_read_pagelist(struct nfs_read_data *rdata) dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); - if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) - goto use_mds; - par = alloc_parallel(rdata); if (!par) goto use_mds; @@ -563,7 +552,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par = NULL; + struct parallel_io *par; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; struct page **pages = wdata->args.pages; @@ -574,10 +563,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); - /* Check for alignment first */ - if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) - goto out_mds; - /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -1011,32 +996,14 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } -static void -bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) - nfs_pageio_reset_read_mds(pgio); - else - pnfs_generic_pg_init_read(pgio, req); -} - -static void -bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) - nfs_pageio_reset_write_mds(pgio); - else - pnfs_generic_pg_init_write(pgio, req); -} - static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = bl_pg_init_read, + .pg_init = pnfs_generic_pg_init_read, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = bl_pg_init_write, + .pg_init = pnfs_generic_pg_init_write, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From fe6e1e8d9fad86873eb74a26e80a8f91f9e870b5 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:51 +0800 Subject: pnfsblock: fix partial page buffer wirte If applications use flock to protect its write range, generic NFS will not do read-modify-write cycle at page cache level. Therefore LD should know how to handle non-sector aligned writes. Otherwise there will be data corruption. Cc: stable Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 177 ++++++++++++++++++++++++++++++++++++--- fs/nfs/blocklayout/blocklayout.h | 1 + 2 files changed, 166 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 3c61514599a..a9fe644a12d 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, return bio; } -static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, +static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, void (*end_io)(struct bio *, int err), - struct parallel_io *par) + struct parallel_io *par, + unsigned int offset, int len) { + isect = isect + (offset >> SECTOR_SHIFT); + dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, + npg, rw, (unsigned long long)isect, offset, len); retry: if (!bio) { bio = bl_alloc_init_bio(npg, isect, be, end_io, par); if (!bio) return ERR_PTR(-ENOMEM); } - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + if (bio_add_page(bio, page, len, offset) < len) { bio = bl_submit_bio(rw, bio); goto retry; } return bio; } +static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, + sector_t isect, struct page *page, + struct pnfs_block_extent *be, + void (*end_io)(struct bio *, int err), + struct parallel_io *par) +{ + return do_add_page_to_bio(bio, npg, rw, isect, page, be, + end_io, par, 0, PAGE_CACHE_SIZE); +} + /* This is basically copied from mpage_end_io_read */ static void bl_end_io_read(struct bio *bio, int err) { @@ -450,6 +464,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) return; } +static void +bl_read_single_end_io(struct bio *bio, int error) +{ + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct page *page = bvec->bv_page; + + /* Only one page in bvec */ + unlock_page(page); +} + +static int +bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int offset, unsigned int len) +{ + struct bio *bio; + struct page *shadow_page; + sector_t isect; + char *kaddr, *kshadow_addr; + int ret = 0; + + dprintk("%s: offset %u len %u\n", __func__, offset, len); + + shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (shadow_page == NULL) + return -ENOMEM; + + bio = bio_alloc(GFP_NOIO, 1); + if (bio == NULL) + return -ENOMEM; + + isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + + (offset / SECTOR_SIZE); + + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = bl_read_single_end_io; + + lock_page(shadow_page); + if (bio_add_page(bio, shadow_page, + SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { + unlock_page(shadow_page); + bio_put(bio); + return -EIO; + } + + submit_bio(READ, bio); + wait_on_page_locked(shadow_page); + if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { + ret = -EIO; + } else { + kaddr = kmap_atomic(page); + kshadow_addr = kmap_atomic(shadow_page); + memcpy(kaddr + offset, kshadow_addr + offset, len); + kunmap_atomic(kshadow_addr); + kunmap_atomic(kaddr); + } + __free_page(shadow_page); + bio_put(bio); + + return ret; +} + +static int +bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int dirty_offset, unsigned int dirty_len, + bool full_page) +{ + int ret = 0; + unsigned int start, end; + + if (full_page) { + start = 0; + end = PAGE_CACHE_SIZE; + } else { + start = round_down(dirty_offset, SECTOR_SIZE); + end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); + } + + dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); + if (!be) { + zero_user_segments(page, start, dirty_offset, + dirty_offset + dirty_len, end); + if (start == 0 && end == PAGE_CACHE_SIZE && + trylock_page(page)) { + SetPageUptodate(page); + unlock_page(page); + } + return ret; + } + + if (start != dirty_offset) + ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); + + if (!ret && (dirty_offset + dirty_len < end)) + ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, + end - dirty_offset - dirty_len); + + return ret; +} + /* Given an unmapped page, zero it or read in page for COW, page is locked * by caller. */ @@ -483,7 +597,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) SetPageUptodate(page); cleanup: - bl_put_extent(cow_read); if (bh) free_buffer_head(bh); if (ret) { @@ -555,6 +668,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct parallel_io *par; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; struct page **pages = wdata->args.pages; struct page *page; pgoff_t index; @@ -659,10 +773,11 @@ next_page: if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); + bl_put_extent(cow_read); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), - isect, NULL); + isect, &cow_read); if (!be || !is_writable(be, isect)) { header->pnfs_error = -EINVAL; goto out; @@ -679,7 +794,26 @@ next_page: extent_length = be->be_length - (isect - be->be_f_offset); } - if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + + dprintk("%s offset %lld count %Zu\n", __func__, offset, count); + pg_offset = offset & ~PAGE_CACHE_MASK; + if (pg_offset + count > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = count; + + saved_len = pg_len; + if (be->be_state == PNFS_BLOCK_INVALID_DATA && + !bl_is_sector_init(be->be_inval, isect)) { + ret = bl_read_partial_page_sync(pages[i], cow_read, + pg_offset, pg_len, true); + if (ret) { + dprintk("%s bl_read_partial_page_sync fail %d\n", + __func__, ret); + header->pnfs_error = ret; + goto out; + } + ret = bl_mark_sectors_init(be->be_inval, isect, PAGE_CACHE_SECTORS); if (unlikely(ret)) { @@ -688,15 +822,35 @@ next_page: header->pnfs_error = ret; goto out; } + + /* Expand to full page write */ + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } else if ((pg_offset & (SECTOR_SIZE - 1)) || + (pg_len & (SECTOR_SIZE - 1))){ + /* ahh, nasty case. We have to do sync full sector + * read-modify-write cycles. + */ + unsigned int saved_offset = pg_offset; + ret = bl_read_partial_page_sync(pages[i], be, pg_offset, + pg_len, false); + pg_offset = round_down(pg_offset, SECTOR_SIZE); + pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) + - pg_offset; } - bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + + + bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, isect, pages[i], be, - bl_end_io_write, par); + bl_end_io_write, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } + offset += saved_len; + count -= saved_len; isect += PAGE_CACHE_SECTORS; last_isect = isect; extent_length -= PAGE_CACHE_SECTORS; @@ -714,17 +868,16 @@ next_page: } write_done: - wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); - if (count < wdata->res.count) { - wdata->res.count = count; - } + wdata->res.count = wdata->args.count; out: bl_put_extent(be); + bl_put_extent(cow_read); bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; out_mds: bl_put_extent(be); + bl_put_extent(cow_read); kfree(par); return PNFS_NOT_ATTEMPTED; } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 03350690118..39bb51a8dd1 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -41,6 +41,7 @@ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) struct block_mount_id { spinlock_t bm_lock; /* protects list */ -- cgit v1.2.3 From f742dc4a32587bff50b13dde9d8894b96851951a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:52 +0800 Subject: pnfsblock: fix non-aligned DIO read For DIO read, if it is not sector aligned, we should reject it and resend via MDS. Otherwise there might be data corruption. Also teach bl_read_pagelist to handle partial page reads for DIO. Cc: stable [since v3.4] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 64 +++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index a9fe644a12d..61e04fb7c4b 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -252,8 +252,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; + size_t bytes_left = rdata->args.count; + unsigned int pg_offset, pg_len; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); @@ -287,36 +290,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) extent_length = min(extent_length, cow_length); } } + + if (is_dio) { + pg_offset = f_offset & ~PAGE_CACHE_MASK; + if (pg_offset + bytes_left > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = bytes_left; + + f_offset += pg_len; + bytes_left -= pg_len; + isect += (pg_offset >> SECTOR_SHIFT); + } else { + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } + hole = is_hole(be, isect); if (hole && !cow_read) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ dprintk("%s Zeroing page for hole\n", __func__); - zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); + zero_user_segment(pages[i], pg_offset, pg_len); print_page(pages[i]); SetPageUptodate(pages[i]); } else { struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, rdata->pages.npages - i, READ, isect, pages[i], be_read, - bl_end_io_read, par); + bl_end_io_read, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } } - isect += PAGE_CACHE_SECTORS; + isect += (pg_len >> SECTOR_SHIFT); extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - f_offset; + rdata->res.count = header->inode->i_size - rdata->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; + rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; } out: bl_put_extent(be); @@ -1149,9 +1169,37 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static bool +is_aligned_req(struct nfs_page *req, unsigned int alignment) +{ + return IS_ALIGNED(req->wb_offset, alignment) && + IS_ALIGNED(req->wb_bytes, alignment); +} + +static void +bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) + nfs_pageio_reset_read_mds(pgio); + else + pnfs_generic_pg_init_read(pgio, req); +} + +static bool +bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, - .pg_test = pnfs_generic_pg_test, + .pg_init = bl_pg_init_read, + .pg_test = bl_pg_test_read, .pg_doio = pnfs_generic_pg_readpages, }; -- cgit v1.2.3 From 96c9eae638765c2bf2ca4f5a6325484f9bb69aa7 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 24 Aug 2012 00:27:53 +0800 Subject: pnfsblock: fix non-aligned DIO write For DIO writes, if it is not blocksize aligned, we need to do internal serialization. It may slow down writers anyway. So we just bail them out and resend to MDS. Cc: stable [since v3.4] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 61e04fb7c4b..af3ef0e6849 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -685,7 +685,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; - struct parallel_io *par; + struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; unsigned int pg_offset, pg_len, saved_len; @@ -697,6 +697,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); + + if (header->dreq != NULL && + (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || + !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { + dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); + goto out_mds; + } /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -1197,6 +1204,27 @@ bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return pnfs_generic_pg_test(pgio, prev, req); } +void +bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) + nfs_pageio_reset_write_mds(pgio); + else + pnfs_generic_pg_init_write(pgio, req); +} + +static bool +bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); +} + static const struct nfs_pageio_ops bl_pg_read_ops = { .pg_init = bl_pg_init_read, .pg_test = bl_pg_test_read, @@ -1204,8 +1232,8 @@ static const struct nfs_pageio_ops bl_pg_read_ops = { }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, - .pg_test = pnfs_generic_pg_test, + .pg_init = bl_pg_init_write, + .pg_test = bl_pg_test_write, .pg_doio = pnfs_generic_pg_writepages, }; -- cgit v1.2.3 From 4e266229dbb0782d91b75633322edd632794b86d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 5 Sep 2012 14:38:03 +0800 Subject: pnfsblock: use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1f9a6032796..9c3e117c3ed 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); p = xdr_encode_hyper(p, 0LL); *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); - list_del(&lce->bse_node); - list_add_tail(&lce->bse_node, &bl->bl_committing); + list_move_tail(&lce->bse_node, &bl->bl_committing); bl->bl_count--; count++; } -- cgit v1.2.3 From ee34e13620d0678d420ce50101aaef94ab81fc74 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 10 Sep 2012 08:40:16 +0800 Subject: NFS: Remove unnecessary semicolons (fs/nfs/client.c) There are some unnecessary semicolons in function find_nfs_version. Just remove them. Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index bab3e8af574..8b39a42ac35 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -93,10 +93,10 @@ static struct nfs_subversion *find_nfs_version(unsigned int version) spin_unlock(&nfs_version_lock); return nfs; } - }; + } spin_unlock(&nfs_version_lock); - return ERR_PTR(-EPROTONOSUPPORT);; + return ERR_PTR(-EPROTONOSUPPORT); } struct nfs_subversion *get_nfs_version(unsigned int version) -- cgit v1.2.3 From 7297cb682acb506ada2e01fbc9b447b04d69936c Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Wed, 26 Sep 2012 21:51:46 +0200 Subject: nfs: replace strict_strto* with kstrto* [nfs] replace strict_str* with kstr* variants * replace string conversions with newer kstr* functions Signed-off-by: Daniel Walter Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 ++-- fs/nfs/super.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 9985a0aea5f..27dde503a6b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -158,7 +158,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re return 0; memcpy(buf, name, namelen); buf[namelen] = '\0'; - if (strict_strtoul(buf, 0, &val) != 0) + if (kstrtoul(buf, 0, &val) != 0) return 0; *res = val; return 1; @@ -364,7 +364,7 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ if (data_size <= 0) { ret = -EINVAL; } else { - ret = strict_strtol(id_str, 10, &id_long); + ret = kstrtol(id_str, 10, &id_long); *id = (__u32)id_long; } return ret; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 56f02a9bd6d..a719bc0640b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1112,7 +1112,7 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option) string = match_strdup(args); if (string == NULL) return -ENOMEM; - rc = strict_strtoul(string, 10, option); + rc = kstrtoul(string, 10, option); kfree(string); return rc; @@ -2681,7 +2681,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp) if (!val) return -EINVAL; - ret = strict_strtoul(val, 0, &num); + ret = kstrtoul(val, 0, &num); if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) return -EINVAL; *((unsigned int *)kp->arg) = num; -- cgit v1.2.3 From 9f62387d6e26532bcbfb15606956074192ee526a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2012 16:17:31 -0700 Subject: NFSv4: Fix up a merge conflict between migration and container changes nfs_callback_tcpport is now per-net_namespace. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5c428664370..3da8130d446 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -121,12 +121,13 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, .clientid = clp->cl_clientid, .confirm = clp->cl_confirm, }; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); unsigned short port; int status; - port = nfs_callback_tcpport; + port = nn->nfs_callback_tcpport; if (clp->cl_addr.ss_family == AF_INET6) - port = nfs_callback_tcpport6; + port = nn->nfs_callback_tcpport6; status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); if (status != 0) -- cgit v1.2.3 From 758201e2c94b7d26ea0ac64e55cab1d53742780a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2012 16:33:18 -0700 Subject: NFSv4: Fix the minor version callback channel startup The current spaghetti code confuses some versions of gcc (and just looks ugly as hell)! Clean up... Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8ed0bc8cffb..2245bef50f3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -177,15 +178,11 @@ nfs41_callback_up(struct svc_serv *serv) return rqstp; } -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { - if (minorversion) { - *rqstpp = nfs41_callback_up(serv); - *callback_svc = nfs41_callback_svc; - } - return minorversion; + *rqstpp = nfs41_callback_up(serv); + *callback_svc = nfs41_callback_svc; } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, @@ -204,11 +201,11 @@ static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) return 0; } -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { - return 0; + *rqstpp = ERR_PTR(-ENOTSUPP); + *callback_svc = ERR_PTR(-ENOTSUPP); } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, @@ -225,19 +222,21 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; char svc_name[12]; int ret; - int minorversion_setup; nfs_callback_bc_serv(minorversion, xprt, serv); if (cb_info->task) return 0; - minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, - serv, &rqstp, &callback_svc); - if (!minorversion_setup) { + switch (minorversion) { + case 0: /* v4.0 callback setup */ rqstp = nfs4_callback_up(serv); callback_svc = nfs4_callback_svc; + break; + default: + nfs_minorversion_callback_svc_setup(serv, + &rqstp, &callback_svc); } if (IS_ERR(rqstp)) -- cgit v1.2.3 From f9d640f3a4f043f7dff66ad7bd8cb29ec145c41d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2012 16:37:51 -0700 Subject: NFSv4: nfs4_match_clientids is only used by NFSv4.1 Fix another compiler warning. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 14ddd4d3096..8466e6046ff 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -251,21 +251,6 @@ error: return ERR_PTR(error); } -/* - * Returns true if the client IDs match - */ -static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) -{ - if (a->cl_clientid != b->cl_clientid) { - dprintk("NFS: --> %s client ID %llx does not match %llx\n", - __func__, a->cl_clientid, b->cl_clientid); - return false; - } - dprintk("NFS: --> %s client ID %llx matches %llx\n", - __func__, a->cl_clientid, b->cl_clientid); - return true; -} - /* * SETCLIENTID just did a callback update with the callback ident in * "drop," but server trunking discovery claims "drop" and "keep" are @@ -383,6 +368,21 @@ int nfs40_walk_client_list(struct nfs_client *new, } #ifdef CONFIG_NFS_V4_1 +/* + * Returns true if the client IDs match + */ +static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +{ + if (a->cl_clientid != b->cl_clientid) { + dprintk("NFS: --> %s client ID %llx does not match %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return false; + } + dprintk("NFS: --> %s client ID %llx matches %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return true; +} + /* * Returns true if the server owners match */ -- cgit v1.2.3 From 47b803c8d2e0545221397d175e3563f9f3695628 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 1 Oct 2012 20:42:32 -0400 Subject: NFSv4.0 reclaim reboot state when re-establishing clientid We should reclaim reboot state when the clientid is stale. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3da8130d446..24a3ab492df 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1680,8 +1680,8 @@ out: return nfs4_recovery_handle_error(clp, status); } -/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors - * on EXCHANGE_ID for v4.1 +/* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors + * and for recoverable errors on EXCHANGE_ID for v4.1 */ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) { @@ -1691,8 +1691,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return -ESERVERFAULT; /* Lease confirmation error: retry after purging the lease */ ssleep(1); + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + break; case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + nfs4_state_clear_reclaim_reboot(clp); + nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_CLID_INUSE: pr_err("NFS: Server %s reports our clientid is in use\n", -- cgit v1.2.3 From ca57ccc48f6a9a3ec655f87acebab82bf01088e7 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 2 Oct 2012 14:18:54 +0400 Subject: nfs: include NFSv4 header in netns.h Build error: fs/nfs/netns.h:27:15: error: 'NFS4_MAX_MINOR_VERSION' undeclared here (not in a function) Reported-by: Fengguang Wu Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/netns.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index b9c7f9b1f91..8ee1fab8326 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -5,6 +5,7 @@ #ifndef __NFS_NETNS_H__ #define __NFS_NETNS_H__ +#include #include #include -- cgit v1.2.3 From 0b37d20ca225f57fff9237bc75d11d3995c8fed6 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 2 Oct 2012 14:18:59 +0400 Subject: nfs: declare nfs_callback_tcp_port in header Sparse warning: fs/nfs/super.c:2638:16: sparse: symbol 'nfs_callback_tcpport' was not declared. Should it be static? Reported-by: Fengguang Wu Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c07a8d460d3..4251c2ae06a 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -208,5 +208,6 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; +extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ -- cgit v1.2.3 From 3dd4f8ef7b26d5d4ada6e2f4b92d99b5e2255f72 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 2 Oct 2012 14:19:04 +0400 Subject: nfs: declare nfs_xdev_mount as static Sparse warning: fs/nfs/super.c:2517:15: warning: symbol 'nfs_xdev_mount' was not declared. Should it be static? Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a719bc0640b..b4079bbd30d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2514,7 +2514,7 @@ EXPORT_SYMBOL_GPL(nfs_kill_super); /* * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) */ -struct dentry * +static struct dentry * nfs_xdev_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { -- cgit v1.2.3 From 22e243096104a741a0872328160c89d121a648c2 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 2 Oct 2012 14:19:09 +0400 Subject: nfs: include nfs4_fh.h in nfs4sysctl.c Sparse warnings: fs/nfs/nfs4sysctl.c:56:5: warning: symbol 'nfs4_register_sysctl' was not declared. Should it be static? fs/nfs/nfs4sysctl.c:64:6: warning: symbol 'nfs4_unregister_sysctl' was not declared. Should it be static? Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/nfs4sysctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 5729bc8aa75..2628d921b7e 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -9,6 +9,7 @@ #include #include +#include "nfs4_fs.h" #include "callback.h" static const int nfs_set_port_min = 0; -- cgit v1.2.3 From 4e437e95ae8fce164a97f4d67866c9a7e7ed9335 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Tue, 2 Oct 2012 14:19:14 +0400 Subject: nfs: include internal.h in getroot.h Sparse warning: fs/nfs/nfs4getroot.c:11:5: warning: symbol 'nfs4_get_rootfh' was not declared. Should it be static? Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- fs/nfs/getroot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 4654ced096a..033803c3664 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -32,6 +32,8 @@ #include +#include "internal.h" + #define NFSDBG_FACILITY NFSDBG_CLIENT /* -- cgit v1.2.3 From 6f018efac14eb267d3ba0aa4294594b556147dba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 08:29:14 -0700 Subject: NFSv4.1: bl_pg_init_write should be static Reported-by: Fengguang Wu Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index af3ef0e6849..a34014a7f9a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1204,7 +1204,7 @@ bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return pnfs_generic_pg_test(pgio, prev, req); } -void +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (pgio->pg_dreq != NULL && -- cgit v1.2.3 From ee314c2a35ee7ea7ffa72e2aca83b041138f3358 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2012 17:25:48 -0700 Subject: NFSv4.1: Handle BAD_STATEID and EXPIRED errors in layoutget If the layoutget call returns a stateid error, we want to invalidate the layout stateid, and/or recover the open stateid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e10d66f5be0..1e0faf9fa07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6272,26 +6272,44 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct inode *inode = lgp->args.inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo; + struct nfs4_state *state = NULL; dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &lgp->res.seq_res)) - return; + goto out; switch (task->tk_status) { case 0: - break; + goto out; case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: task->tk_status = -NFS4ERR_DELAY; - /* Fall through */ - default: - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (!lo || list_empty(&lo->plh_segs)) { + spin_unlock(&inode->i_lock); + /* If the open stateid was bad, then recover it. */ + state = lgp->args.ctx->state; + } else { + LIST_HEAD(head); + + pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); + spin_unlock(&inode->i_lock); + /* Mark the bad layout state as invalid, then + * retry using the open stateid. */ + pnfs_free_lseg_list(&head); } } + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + rpc_restart_call_prepare(task); +out: dprintk("<-- %s\n", __func__); } -- cgit v1.2.3 From c2ccc084eb46ae718a200ad9c2606c258bf79a25 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 2 Oct 2012 09:18:12 -0700 Subject: NFS: nfs41_walk_client_list(): re-lock before iterating Sparse identified an execution path in nfs41_walk_client_list() where the nfs_client_lock is not re-acquired before taking the next loop iteration. fs/nfs/nfs4client.c:437:9: sparse: context imbalance in 'nfs41_walk_client_list' - different lock contexts for basic block Signed-off-by: Chuck Lever Cc: Fengguang Wu Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8466e6046ff..6bacfde1319 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -450,6 +450,7 @@ int nfs41_walk_client_list(struct nfs_client *new, error = nfs_wait_client_init_complete(pos); if (error < 0) { nfs_put_client(pos); + spin_lock(&nn->nfs_client_lock); continue; } -- cgit v1.2.3 From f8aa23a55f813c9bddec2a6176e0e67274e6e7c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:56 +0100 Subject: KEYS: Use keyring_alloc() to create special keyrings Use keyring_alloc() to create special keyrings now that it has a permissions parameter rather than using key_alloc() + key_instantiate_and_link(). Also document and export keyring_alloc() so that modules can use it too. Signed-off-by: David Howells --- fs/nfs/idmap.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a850079467d..957134b4c0f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -192,19 +192,15 @@ static int nfs_idmap_init_keyring(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&key_type_id_resolver); if (ret < 0) goto failed_put_key; -- cgit v1.2.3 From fd4835708ffd3387a76df2d1d3021717b0b63761 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 2 Oct 2012 11:33:22 -0700 Subject: NFSv4.1: don't do two EXCHANGE_IDs on mount Since the addition of NFSv4 server trunking detection the mount context calls nfs4_proc_exchange_id then schedules the state manager, which also calls nfs4_proc_exchange_id. Setting the NFS4CLNT_LEASE_CONFIRM bit makes the state manager skip the unneeded EXCHANGE_ID and continue on with session creation. Reported-by: Jorge Mora Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 24a3ab492df..c351e6b3983 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -351,6 +351,7 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, status = nfs4_proc_exchange_id(clp, cred); if (status != NFS4_OK) return status; + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return nfs41_walk_client_list(clp, result, cred); } -- cgit v1.2.3 From ddfc4e171292d63d7e3f8c95ff9c3ef9932870ce Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 2 Oct 2012 16:01:38 -0400 Subject: NFS: Set key construction data for the legacy upcall This prevents a null pointer dereference when nfs_idmap_complete_pipe_upcall_locked() calls complete_request_key(). Fixes a regression caused by commit 0cac12023 (NFSv4: Ensure that idmap_pipe_downcall sanity-checks the downcall data). Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 675b389cba5..9cc4a3fbf4b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -707,6 +707,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons, msg = &data->pipe_msg; im = &data->idmap_msg; data->idmap = idmap; + data->key_cons = cons; ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); if (ret < 0) -- cgit v1.2.3 From bbd3a8eee82a2a6f4aa1cce60ccb014f25e5c560 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 2 Oct 2012 14:49:51 -0700 Subject: NFSv4: don't check MAY_WRITE access bit in OPEN Don't check MAY_WRITE as a newly created file may not have write mode bits, but POSIX allows the creating process to write regardless. This is ok because NFSv4 OPEN ops handle write permissions correctly - the ACCESS in the OPEN compound is to differentiate READ v EXEC permissions. Fixes a regression due to commit 6168f62c (NFSv4: Add ACCESS operation to OPEN compound) Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1e0faf9fa07..ccada6856f0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1661,10 +1661,10 @@ static int nfs4_opendata_access(struct rpc_cred *cred, return 0; mask = 0; + /* don't check MAY_WRITE - a newly created file may not have + * write mode bits, but POSIX allows the creating process to write */ if (fmode & FMODE_READ) mask |= MAY_READ; - if (fmode & FMODE_WRITE) - mask |= MAY_WRITE; if (fmode & FMODE_EXEC) mask |= MAY_EXEC; @@ -1673,7 +1673,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) return 0; /* even though OPEN succeeded, access is denied. Close the file */ -- cgit v1.2.3 From ae2bb03236fc978bdf673c19d39832500793b83c Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 2 Oct 2012 14:49:52 -0700 Subject: NFSv4: don't put ACCESS in OPEN compound if O_EXCL Don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS will return permission denied for all bits until close. Fixes a regression due to commit 6168f62c (NFSv4: Add ACCESS operation to OPEN compound) Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++++--- fs/nfs/nfs4xdr.c | 12 ++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ccada6856f0..21cfac7c2ff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -862,9 +862,15 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - /* ask server to check for all possible rights as results are cached */ - p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS + * will return permission denied for all bits until close */ + if (!(flags & O_EXCL)) { + /* ask server to check for all possible rights as results + * are cached */ + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + p->o_res.access_request = p->o_arg.access; + } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); p->o_arg.id.uniquifier = sp->so_seqid.owner_id; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 657483c34e2..0d6030510fe 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2224,7 +2224,8 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_access(xdr, args->access, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -2261,7 +2262,8 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); - encode_access(xdr, args->access, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -6239,7 +6241,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, &res->fh); if (status) goto out; - decode_access(xdr, &res->access_supported, &res->access_result); + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -6288,7 +6291,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; - decode_access(xdr, &res->access_supported, &res->access_result); + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; -- cgit v1.2.3 From 251ec410c495cb93c7ae2cb4beda29205d9bd35f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 15:41:05 -0700 Subject: NFSv4.1: Fix another refcount issue in pnfs_find_alloc_layout Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d737557747b..fe9968a62ca 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -907,18 +907,19 @@ pnfs_find_alloc_layout(struct inode *ino, dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); - if (nfsi->layout) { - pnfs_get_layout_hdr(nfsi->layout); - return nfsi->layout; - } + if (nfsi->layout != NULL) + goto out_existing; spin_unlock(&ino->i_lock); new = alloc_init_layout_hdr(ino, ctx, gfp_flags); spin_lock(&ino->i_lock); - if (likely(nfsi->layout == NULL)) /* Won the race? */ + if (likely(nfsi->layout == NULL)) { /* Won the race? */ nfsi->layout = new; - else - pnfs_free_layout_hdr(new); + return new; + } + pnfs_free_layout_hdr(new); +out_existing: + pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; } -- cgit v1.2.3 From 038d6493763d900797dfeb555502d3b0d8103fba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 16:38:41 -0700 Subject: NFSv4.1: Always set the layout stateid if this is the first layoutget If the list of layout segments is empty, we must unconditionally set the layout stateid. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fe9968a62ca..c8c1d0cc197 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -558,7 +558,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if ((int)(newseq - oldseq) > 0) { + if (list_empty(&lo->plh_segs) || (int)(newseq - oldseq) > 0) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { u32 new_barrier = be32_to_cpu(new->seqid); @@ -1181,6 +1181,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid, false); + init_lseg(lo, lseg); lseg->pls_range = res->range; pnfs_get_lseg(lseg); @@ -1191,8 +1195,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); } - /* Done processing layoutget. Set the layout stateid */ - pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); return lseg; out: -- cgit v1.2.3 From 5a65503f3dbdb4aa1cd6cb58c479c015d093292b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 16:47:14 -0700 Subject: NFSv4.1: Deal with wraparound issues when updating the layout stateid ...and add a helper function. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c8c1d0cc197..f1387e87513 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -549,6 +549,15 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) } } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)s1 - (s32)s2 > 0; +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -558,7 +567,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if (list_empty(&lo->plh_segs) || (int)(newseq - oldseq) > 0) { + if (list_empty(&lo->plh_segs) || pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { u32 new_barrier = be32_to_cpu(new->seqid); -- cgit v1.2.3 From 25a1a6211dd2fcbf0e45a07030703e2a42d7aa87 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 16:56:49 -0700 Subject: NFSv4.1: Deal with wraparound when updating the layout "barrier" seqid ...and fix a bug in pnfs_set_layout_stateid. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f1387e87513..de827251bda 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -572,7 +572,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, if (update_barrier) { u32 new_barrier = be32_to_cpu(new->seqid); - if ((int)(new_barrier - lo->plh_barrier)) + if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) lo->plh_barrier = new_barrier; } else { /* Because of wraparound, we want to keep the barrier @@ -593,9 +593,12 @@ static bool pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, int lget) { - if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) - return true; + if (stateid != NULL) { + u32 seqid = be32_to_cpu(stateid->seqid); + + if (!pnfs_seqid_is_newer(seqid, lo->plh_barrier)) + return true; + } return lo->plh_block_lgets || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && -- cgit v1.2.3 From 807d66d80221920729a8d4abfa04246546a6d3fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Oct 2012 17:09:00 -0700 Subject: NFSv4: nfs4_open_done first must check that GETATTR decoded a file type ...before it can check the validity of that file type. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 21cfac7c2ff..68438aa4f08 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1537,7 +1537,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) return; if (task->tk_status == 0) { - switch (data->o_res.f_attr->mode & S_IFMT) { + if (data->o_res.f_attr->valid & NFS_ATTR_FATTR_TYPE) { + switch (data->o_res.f_attr->mode & S_IFMT) { case S_IFREG: break; case S_IFLNK: @@ -1548,6 +1549,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) break; default: data->rpc_status = -ENOTDIR; + } } renew_lease(data->o_res.server, data->timestamp); if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) -- cgit v1.2.3 From e23008ec81ef37b7b271669ce5d2de2643b2dc75 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 2 Oct 2012 21:07:32 -0400 Subject: NFSv4 reduce attribute requests for open reclaim We currently make no distinction in attribute requests between normal OPENs and OPEN with CLAIM_PREVIOUS. This offers more possibility of failures in the GETATTR response which foils OPEN reclaim attempts. Reduce the requested attributes to the bare minimum needed to update the reclaim open stateid and split nfs4_opendata_to_nfs4_state processing accordingly. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 114 +++++++++++++++++++++++++++++++++++++++++------------- fs/nfs/nfs4xdr.c | 2 +- 2 files changed, 89 insertions(+), 27 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68438aa4f08..2b62e3f79c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -152,6 +152,12 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { FATTR4_WORD2_MDSTHRESHOLD }; +static const u32 nfs4_open_noattr_bitmap[3] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_FILEID, +}; + const u32 nfs4_statfs_bitmap[2] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE @@ -1126,11 +1132,80 @@ out_return_state: return state; } -static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +static void +nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) +{ + struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; + struct nfs_delegation *delegation; + int delegation_flags = 0; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation) + delegation_flags = delegation->flags; + rcu_read_unlock(); + if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { + pr_err_ratelimited("NFS: Broken NFSv4 server %s is " + "returning a delegation for " + "OPEN(CLAIM_DELEGATE_CUR)\n", + clp->cl_hostname); + } else if ((delegation_flags & 1UL<inode, + data->owner->so_cred, + &data->o_res); + else + nfs_inode_reclaim_delegation(state->inode, + data->owner->so_cred, + &data->o_res); +} + +/* + * Check the inode attributes against the CLAIM_PREVIOUS returned attributes + * and update the nfs4_state. + */ +static struct nfs4_state * +_nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) +{ + struct inode *inode = data->state->inode; + struct nfs4_state *state = data->state; + int ret; + + if (!data->rpc_done) { + ret = data->rpc_status; + goto err; + } + + ret = -ESTALE; + if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || + !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || + !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) + goto err; + + ret = -ENOMEM; + state = nfs4_get_open_state(inode, data->owner); + if (state == NULL) + goto err; + + ret = nfs_refresh_inode(inode, &data->f_attr); + if (ret) + goto err; + + if (data->o_res.delegation_type != 0) + nfs4_opendata_check_deleg(data, state); + update_open_stateid(state, &data->o_res.stateid, NULL, + data->o_arg.fmode); + + return state; +err: + return ERR_PTR(ret); + +} + +static struct nfs4_state * +_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { struct inode *inode; struct nfs4_state *state = NULL; - struct nfs_delegation *delegation; int ret; if (!data->rpc_done) { @@ -1149,30 +1224,8 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data state = nfs4_get_open_state(inode, data->owner); if (state == NULL) goto err_put_inode; - if (data->o_res.delegation_type != 0) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - int delegation_flags = 0; - - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { - pr_err_ratelimited("NFS: Broken NFSv4 server %s is " - "returning a delegation for " - "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - } else if ((delegation_flags & 1UL<inode, - data->owner->so_cred, - &data->o_res); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - &data->o_res); - } - + if (data->o_res.delegation_type != 0) + nfs4_opendata_check_deleg(data, state); update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); iput(inode); @@ -1184,6 +1237,14 @@ err: return ERR_PTR(ret); } +static struct nfs4_state * +nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) + return _nfs4_opendata_reclaim_to_nfs4_state(data); + return _nfs4_opendata_to_nfs4_state(data); +} + static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -1505,6 +1566,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0d6030510fe..40836ee5dc3 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2264,7 +2264,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_open(xdr, args, &hdr); if (args->access) encode_access(xdr, args->access, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } -- cgit v1.2.3 From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Sep 2012 11:33:07 +1000 Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems There's no reason to call rcu_barrier() on every deactivate_locked_super(). We only need to make sure that all delayed rcu free inodes are flushed before we destroy related cache. Removing rcu_barrier() from deactivate_locked_super() affects some fast paths. E.g. on my machine exit_group() of a last process in IPC namespace takes 0.07538s. rcu_barrier() takes 0.05188s of that time. Signed-off-by: Kirill A. Shutemov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/nfs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f..e4c716d374a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } -- cgit v1.2.3 From 8544a9dc18a1aa787b85425ecc9233b0a9adaff4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Oct 2012 10:54:50 -0700 Subject: NFSv4.1: Remove the dependency on CONFIG_EXPERIMENTAL CONFIG_EXPERIMENTAL is deprecated and, regardless of that, this code is being enabled in most newer distributions. Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index db7ad719628..13ca196385f 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -95,8 +95,8 @@ config NFS_SWAP This option enables swapon to work on files located on NFS mounts. config NFS_V4_1 - bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_V4 && EXPERIMENTAL + bool "NFS client support for NFSv4.1" + depends on NFS_V4 select SUNRPC_BACKCHANNEL help This option enables support for minor version 1 of the NFSv4 protocol -- cgit v1.2.3 From 5f65753033d8c5a53e65810bff3832e8282c68d1 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 3 Oct 2012 02:39:34 -0400 Subject: NFSv4 set open access operation call flag in nfs4_init_opendata_res nfs4_open_recover_helper zeros the nfs4_opendata result structures, removing the result access_request information which leads to an XDR decode error. Move the setting of the result access_request field to nfs4_init_opendata_res which sets all the other required nfs4_opendata result fields and is shared between the open and recover open paths. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2b62e3f79c9..68b21d81b7a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -840,6 +840,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; + p->o_res.access_request = p->o_arg.access; nfs_fattr_init(&p->f_attr); nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); } @@ -875,7 +876,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, * are cached */ p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; - p->o_res.access_request = p->o_arg.access; } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); -- cgit v1.2.3 From 0f35ad6f688e9b7bcaa918a42130695822906f11 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Oct 2012 16:28:17 -0700 Subject: NFSv4.1: Deal with seqid wraparound in the pNFS return-on-close code Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index de827251bda..42613bd19f8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -801,7 +801,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; - if ((int)(barrier - lo->plh_barrier) > 0) + if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) lo->plh_barrier = barrier; spin_unlock(&ino->i_lock); } -- cgit v1.2.3 From 22aaf71495570b31350c37fd0aa736551bbaa3c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Oct 2012 16:32:22 -0700 Subject: NFSv4.1: Ensure that the layout sequence id stays 'close' to the current Clamp the layout barrier sequence id to the current sequence id minus the maximum number of outstanding layoutget requests. Also ensure that we correctly initialise lo->plh_barrier if there are no layout segments associated to this layout header. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 42613bd19f8..861dd97b569 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -563,28 +563,23 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { - u32 oldseq, newseq; + u32 oldseq, newseq, new_barrier; + int empty = list_empty(&lo->plh_segs); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if (list_empty(&lo->plh_segs) || pnfs_seqid_is_newer(newseq, oldseq)) { + if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->seqid); - - if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) - lo->plh_barrier = new_barrier; + new_barrier = be32_to_cpu(new->seqid); } else { /* Because of wraparound, we want to keep the barrier - * "close" to the current seqids. It needs to be - * within 2**31 to count as "behind", so if it - * gets too near that limit, give us a litle leeway - * and bring it to within 2**30. - * NOTE - and yes, this is all unsigned arithmetic. + * "close" to the current seqids. */ - if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) - lo->plh_barrier = newseq - (1 << 30); + new_barrier = newseq - atomic_read(&lo->plh_outstanding); } + if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + lo->plh_barrier = new_barrier; } } -- cgit v1.2.3 From 19c54abab79404c027ff61f13468e78a3e2a0065 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Oct 2012 16:56:58 -0700 Subject: NFSv4.1: Cleanup ugliness in pnfs_layoutgets_blocked() Split it into two functions, one which checks if layoutgets are blocked, and one which checks if the layout stateid has expired. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 861dd97b569..bd9769296e4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -583,17 +583,19 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, } } -/* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, - int lget) +pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid) { - if (stateid != NULL) { - u32 seqid = be32_to_cpu(stateid->seqid); + u32 seqid = be32_to_cpu(stateid->seqid); - if (!pnfs_seqid_is_newer(seqid, lo->plh_barrier)) - return true; - } + return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); +} + +/* lget is set to 1 if called from inside send_layoutget call chain */ +static bool +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) +{ return lo->plh_block_lgets || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && @@ -608,7 +610,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, NULL, 1)) { + if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -1111,7 +1113,7 @@ pnfs_update_layout(struct inode *ino, if (lseg) goto out_unlock; - if (pnfs_layoutgets_blocked(lo, NULL, 0)) + if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); @@ -1184,7 +1186,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } - if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { + if (pnfs_layoutgets_blocked(lo, 1) || + pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } -- cgit v1.2.3 From 35754bc00e94e598c432ad02f7a3d3063c4402e3 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS: track direct IO left bytes Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 253d397780b..4be8673ee18 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -78,6 +78,7 @@ struct nfs_direct_req { atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ ssize_t count, /* bytes actually processed */ + bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -390,6 +391,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -457,6 +459,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, goto out; dreq->inode = inode; + dreq->bytes_left = iov_length(iov, nr_segs); dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -710,6 +713,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d user_addr += req_len; pos += req_len; count -= req_len; + dreq->bytes_left -= req_len; } /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); @@ -860,6 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, goto out; dreq->inode = inode; + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { -- cgit v1.2.3 From 6296556f0b31eaff29f2a3aee2c17b7eae895b98 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS41: send real write size in layoutget For buffer write, block layout client scan inode mapping to find next hole and use offset-to-hole as layoutget length. Object layout client uses offset-to-isize as layoutget length. For direct write, both block layout and object layout use dreq->bytes_left. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 38 +++++++++++++++++++++++++++++++++++--- fs/nfs/direct.c | 7 +++++++ fs/nfs/internal.h | 1 + fs/nfs/objlayout/objio_osd.c | 9 ++++++++- fs/nfs/pnfs.c | 6 ++++-- fs/nfs/pnfs.h | 3 ++- 6 files changed, 57 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index a34014a7f9a..f1027b06a1a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -37,6 +37,7 @@ #include /* struct bio */ #include /* various write calls */ #include +#include #include "../pnfs.h" #include "../internal.h" @@ -1204,14 +1205,45 @@ bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return pnfs_generic_pg_test(pgio, prev, req); } +/* + * Return the number of contiguous bytes for a given inode + * starting at page frame idx. + */ +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) +{ + struct address_space *mapping = inode->i_mapping; + pgoff_t end; + + /* Optimize common case that writes from 0 to end of file */ + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + + if (!end) + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); + else + return (end - idx) << PAGE_CACHE_SHIFT; +} + static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (pgio->pg_dreq != NULL && - !is_aligned_req(req, PAGE_CACHE_SIZE)) + !is_aligned_req(req, PAGE_CACHE_SIZE)) { nfs_pageio_reset_write_mds(pgio); - else - pnfs_generic_pg_init_write(pgio, req); + } else { + u64 wb_size; + if (pgio->pg_dreq == NULL) + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, + req->wb_index); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); + } } static bool diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4be8673ee18..cae26cbd59e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +{ + return dreq->bytes_left; +} +EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); + /* * Collects and returns the final error value/byte-count. */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89a795dc302..59b133c5d65 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) { inode_dio_wait(inode); } +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ea6d111b03e..be731e6b7b9 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -41,6 +41,7 @@ #include #include "objlayout.h" +#include "../internal.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -606,8 +607,14 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { unsigned long stripe_end = 0; + u64 wb_size; - pnfs_generic_pg_init_write(pgio, req); + if (pgio->pg_dreq == NULL) + wb_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); if (unlikely(pgio->pg_lseg == NULL)) return; /* Not pNFS */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bd9769296e4..9a2bcce4528 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1240,7 +1240,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); void -pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size) { BUG_ON(pgio->pg_lseg != NULL); @@ -1248,10 +1249,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * nfs_pageio_reset_write_mds(pgio); return; } + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + wb_size, IOMODE_RW, GFP_NOFS); /* If no lseg, fall back to write through mds */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6cede2c6c96..2d722dba111 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -186,7 +186,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); -void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); -- cgit v1.2.3 From 1fd937bd7583e618df0528f0268b210f265d6910 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: NFS41: send real read size in layoutget For buffer read, use offst-to-isize. For direct read, use dreq->bytes_left. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9a2bcce4528..fe624c91bd0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1220,16 +1220,24 @@ out_forget_reply: void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + u64 rd_size = req->wb_bytes; + BUG_ON(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_read_mds(pgio); return; } + + if (pgio->pg_dreq == NULL) + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), - req->wb_bytes, + rd_size, IOMODE_READ, GFP_KERNEL); /* If no lseg, fall back to read through mds */ -- cgit v1.2.3 From af283885b70248268617955a5ea5476647bd556b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 25 Sep 2012 14:55:57 +0800 Subject: pnfsblock: cleanup nfs4_blkdev_get It is not needed at all and it is messing with return values... Reported-by: Wei Yongjun Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.h | 1 - fs/nfs/blocklayout/blocklayoutdev.c | 25 +++++-------------------- 2 files changed, 5 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 39bb51a8dd1..f4891bde885 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -173,7 +173,6 @@ struct bl_msg_hdr { /* blocklayoutdev.c */ ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); void bl_pipe_destroy_msg(struct rpc_pipe_msg *); -struct block_device *nfs4_blkdev_get(dev_t dev); int nfs4_blkdev_put(struct block_device *bdev); struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, struct pnfs_device *dev); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index c96554245cc..a86c5bdad9e 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -53,22 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) return 0; } -/* Open a block_device by device number. */ -struct block_device *nfs4_blkdev_get(dev_t dev) -{ - struct block_device *bd; - - dprintk("%s enter\n", __func__); - bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); - if (IS_ERR(bd)) - goto fail; - return bd; -fail: - dprintk("%s failed to open device : %ld\n", - __func__, PTR_ERR(bd)); - return NULL; -} - /* * Release the block device */ @@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server, goto out; } - bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); + bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), + FMODE_READ, NULL); if (IS_ERR(bd)) { - rc = PTR_ERR(bd); - dprintk("%s failed to open device : %d\n", __func__, rc); - rv = ERR_PTR(rc); + dprintk("%s failed to open device : %ld\n", __func__, + PTR_ERR(bd)); + rv = ERR_CAST(bd); goto out; } -- cgit v1.2.3 From 0b173bc4daa8f8ec03a85abf5e47b23502ff80af Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:28:46 -0700 Subject: mm: kill vma flag VM_CAN_NONLINEAR Move actual pte filling for non-linear file mappings into the new special vma operation: ->remap_pages(). Filesystems must implement this method to get non-linear mapping support, if it uses filemap_fault() then generic_file_remap_pages() can be used. Now device drivers can implement this method and obtain nonlinear vma support. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf #arch/tile Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6a7fcab7ecb..f692be97676 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -578,6 +578,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = nfs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int nfs_need_sync_write(struct file *filp, struct inode *inode) -- cgit v1.2.3 From 8fcdc31b3d09bc348ff9bf752ae1291828756cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Oct 2012 12:26:04 -0400 Subject: NFSv4.1: Kill nfs4_ds_disconnect() There is nothing to prevent another thread from dereferencing ds->ds_clp during or after the call to nfs4_ds_disconnect(), and Oopsing due to the resulting NULL pointer. Instead, we should just rely on filelayout_mark_devid_invalid() to keep us out of trouble by avoiding that deviceid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 1 - fs/nfs/nfs4filelayout.h | 1 - fs/nfs/nfs4filelayoutdev.c | 22 ---------------------- 3 files changed, 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 52d84721206..816c2d0d133 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -207,7 +207,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); _pnfs_return_layout(inode); rpc_wake_up(&tbl->slot_tbl_waitq); - nfs4_ds_disconnect(clp); /* fall through */ default: reset: diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index dca47d78671..8c07241fe52 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -149,6 +149,5 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); -void nfs4_ds_disconnect(struct nfs_client *clp); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3336d5eaf87..a8eaa9b7bb0 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -148,28 +148,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) return NULL; } -/* - * Lookup DS by nfs_client pointer. Zero data server client pointer - */ -void nfs4_ds_disconnect(struct nfs_client *clp) -{ - struct nfs4_pnfs_ds *ds; - struct nfs_client *found = NULL; - - dprintk("%s clp %p\n", __func__, clp); - spin_lock(&nfs4_ds_cache_lock); - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (ds->ds_clp && ds->ds_clp == clp) { - found = ds->ds_clp; - ds->ds_clp = NULL; - } - spin_unlock(&nfs4_ds_cache_lock); - if (found) { - set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - nfs_put_client(clp); - } -} - /* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only supports IPv4 and IPv6 addresses -- cgit v1.2.3 From d527e5c15de8de813cd0a2ad0b769f68c6226938 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Oct 2012 13:43:38 -0400 Subject: NFSv4.1: Do not call pnfs_return_layout() from an rpciod context Move the call to pnfs_return_layout() to the read and write rpc_release() callbacks, so that it gets called from nfsiod, which is a more appropriate context. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 18 +++++++++++++++--- fs/nfs/pnfs.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 816c2d0d133..e7aee566861 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -122,12 +122,21 @@ static void filelayout_reset_read(struct nfs_read_data *data) } } +static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) +{ + if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return; + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); + pnfs_return_layout(inode); +} + static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg) { - struct inode *inode = lseg->pls_layout->plh_inode; + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct inode *inode = lo->plh_inode; struct nfs_server *mds_server = NFS_SERVER(inode); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs_client *mds_client = mds_server->nfs_client; @@ -204,8 +213,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_mark_deviceid_unavailable(devid); - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); - _pnfs_return_layout(inode); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: @@ -330,7 +338,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; + struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(rdata->ds_clp); rdata->header->mds_ops->rpc_release(data); } @@ -428,7 +438,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; + struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + filelayout_fenceme(lo->plh_inode, lo); nfs_put_client(wdata->ds_clp); wdata->header->mds_ops->rpc_release(data); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2d722dba111..dbf7bba52da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -62,6 +62,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ + NFS_LAYOUT_RETURN, /* Return this layout ASAP */ }; enum layoutdriver_policy_flags { -- cgit v1.2.3 From 1813badd98ce02e4b96d8997b68ddef4d4ad4ec5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Oct 2012 14:36:52 -0400 Subject: NFSv4.1: Use kcalloc() to allocate zeroed arrays instead of kzalloc() Don't circumvent the array size checks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e7aee566861..2e45fd9c02a 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -750,7 +750,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; if (fl->num_fh > 0) { - fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), + fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]), gfp_flags); if (!fl->fh_array) goto out_err; -- cgit v1.2.3 From d6aa6a81d41eb48125b4622306b61dd398923ad9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Oct 2012 12:32:24 -0400 Subject: NFSv4: fs/nfs/nfs4getroot.c needs to include "internal.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a warning about "no previous prototype for ‘nfs4_get_rootfh’" Signed-off-by: Trond Myklebust --- fs/nfs/nfs4getroot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 6a83780e0ce..549462e5b9b 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -5,6 +5,7 @@ #include #include "nfs4_fs.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_CLIENT -- cgit v1.2.3 From 2e928e4878fbc6328c2b1fc897d008db257bfbb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Oct 2012 12:34:56 -0400 Subject: NFSv4.1: Declare osd_pri_2_pnfs_err(), objio_init_read/write to be static Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index be731e6b7b9..c6f990656f8 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -369,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir) kfree(objios); } -enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) +static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) { switch (oep) { case OSD_ERR_PRI_NO_ERROR: @@ -574,7 +574,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, (unsigned long)pgio->pg_layout_private; } -void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { pnfs_generic_pg_init_read(pgio, req); if (unlikely(pgio->pg_lseg == NULL)) @@ -604,7 +604,7 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, return false; } -void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { unsigned long stripe_end = 0; u64 wb_size; -- cgit v1.2.3 From e9b7e91745fa9df94900c8ab08e633f336686098 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Oct 2012 12:30:44 -0400 Subject: NFSv4: Fix the return value for nfs_callback_start_svc returning PTR_ERR(cb_info->task) just after we have set it to NULL looks like a typo... Signed-off-by: Trond Myklebust Cc: Stanislav Kinsbursky --- fs/nfs/callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 9a521fb3986..5088b57b078 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -241,7 +241,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_exit_thread(cb_info->rqst); cb_info->rqst = NULL; cb_info->task = NULL; - return PTR_ERR(cb_info->task); + return ret; } dprintk("nfs_callback_up: service started\n"); return 0; -- cgit v1.2.3 From 399f11c3d872bd748e1575574de265a6304c7c43 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Tue, 30 Oct 2012 16:06:35 -0400 Subject: NFS: Wait for session recovery to finish before returning Currently, we will schedule session recovery and then return to the caller of nfs4_handle_exception. This works for most cases, but causes a hang on the following test case: Client Server ------ ------ Open file over NFS v4.1 Write to file Expire client Try to lock file The server will return NFS4ERR_BADSESSION, prompting the client to schedule recovery. However, the client will continue placing lock attempts and the open recovery never seems to be scheduled. The simplest solution is to wait for session recovery to run before retrying the lock. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68b21d81b7a..d5fbf1f49d5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); nfs4_schedule_session_recovery(clp->cl_session, errorcode); - exception->retry = 1; - break; + goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { -- cgit v1.2.3 From 2240a9e2d013d8269ea425b73e1d7a54c7bc141f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 18:37:40 -0400 Subject: NFSv4.1: We must release the sequence id when we fail to get a session slot If we do not release the sequence id in cases where we fail to get a session slot, then we can deadlock if we hit a recovery scenario. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d5fbf1f49d5..e0423bb5a88 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1571,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task)) - return; - rpc_call_start(task); + &data->o_res.seq_res, + task) != 0) + nfs_release_seqid(data->o_arg.seqid); + else + rpc_call_start(task); return; unlock_no_action: rcu_read_unlock(); @@ -2295,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, &calldata->res.seq_res, - task)) - goto out; - rpc_call_start(task); + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); out: dprintk("%s: done!\n", __func__); } @@ -4544,9 +4547,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - rpc_call_start(task); + &calldata->res.seq_res, + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4691,7 +4696,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) - return; + goto out_release_lock_seqid; data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4700,10 +4705,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, + task) == 0) { + rpc_call_start(task); return; - rpc_call_start(task); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + } + nfs_release_seqid(data->arg.open_seqid); +out_release_lock_seqid: + nfs_release_seqid(data->arg.lock_seqid); + dprintk("%s: done!, ret = %d\n", __func__, task->tk_status); } static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) -- cgit v1.2.3 From 2b1bc308f492589f7d49012ed24561534ea2be8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 18:53:23 -0400 Subject: NFSv4: nfs4_locku_done must release the sequence id If the state recovery machinery is triggered by the call to nfs4_async_handle_error() then we can deadlock. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e0423bb5a88..1465364501b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4531,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) rpc_restart_call_prepare(task); } + nfs_release_seqid(calldata->arg.seqid); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) -- cgit v1.2.3 From 8d96b10639fb402357b75b055b1e82a65ff95050 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 31 Oct 2012 12:16:01 +1100 Subject: NFS: fix bug in legacy DNS resolver. The DNS resolver's use of the sunrpc cache involves a 'ttl' number (relative) rather that a timeout (absolute). This confused me when I wrote commit c5b29f885afe890f953f7f23424045cdad31d3e4 "sunrpc: use seconds since boot in expiry cache" and I managed to break it. The effect is that any TTL is interpreted as 0, and nothing useful gets into the cache. This patch removes the use of get_expiry() - which really expects an expiry time - and uses get_uint() instead, treating the int correctly as a ttl. This fixes a regression that has been present since 2.6.37, causing certain NFS accesses in certain environments to incorrectly fail. Reported-by: Chuck Lever Tested-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 31c26c4dcc2..ca4b11ec87a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) { char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; struct nfs_dns_ent key, *item; - unsigned long ttl; + unsigned int ttl; ssize_t len; int ret = -EINVAL; @@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) key.namelen = len; memset(&key.h, 0, sizeof(key.h)); - ttl = get_expiry(&buf); + if (get_uint(&buf, &ttl) < 0) + goto out; if (ttl == 0) goto out; key.h.expiry_time = ttl + seconds_since_boot(); -- cgit v1.2.3 From 7175fe90153e6375082d65884fbb41ab3bbb4901 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Wed, 31 Oct 2012 16:05:48 +0800 Subject: nfs: Check whether a layout pointer is NULL before free it The new layout pointer in pnfs_find_alloc_layout() may be NULL because of out of memory. we must do some check work, otherwise pnfs_free_layout_hdr() will go wrong because it can not deal with a NULL pointer. Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fe624c91bd0..2878f97bd78 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino, if (likely(nfsi->layout == NULL)) { /* Won the race? */ nfsi->layout = new; return new; - } - pnfs_free_layout_hdr(new); + } else if (new != NULL) + pnfs_free_layout_hdr(new); out_existing: pnfs_get_layout_hdr(nfsi->layout); return nfsi->layout; -- cgit v1.2.3 From acce94e68a0f346115fd41cdc298197d2d5a59ad Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 16 Oct 2012 13:22:19 -0400 Subject: nfsv3: Make v3 mounts fail with ETIMEDOUTs instead EIO on mountd timeouts In very busy v3 environment, rpc.mountd can respond to the NULL procedure but not the MNT procedure in a timely manner causing the MNT procedure to time out. The problem is the mount system call returns EIO which causes the mount to fail, instead of ETIMEDOUT, which would cause the mount to be retried. This patch sets the RPC_TASK_SOFT|RPC_TASK_TIMEOUT flags to the rpc_call_sync() call in nfs_mount() which causes ETIMEDOUT to be returned on timed out connections. Signed-off-by: Steve Dickson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/mount_clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 8e65c7f1f87..015f71f8f62 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info) else msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; - status = rpc_call_sync(mnt_clnt, &msg, 0); + status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT); rpc_shutdown_client(mnt_clnt); if (status < 0) -- cgit v1.2.3 From 97a54868262da1629a3e65121e65b8e8c4419d9f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 21 Oct 2012 19:23:52 +0100 Subject: nfs: Show original device name verbatim in /proc/*/mount{s,info} Since commit c7f404b ('vfs: new superblock methods to override /proc/*/mount{s,info}'), nfs_path() is used to generate the mounted device name reported back to userland. nfs_path() always generates a trailing slash when the given dentry is the root of an NFS mount, but userland may expect the original device name to be returned verbatim (as it used to be). Make this canonicalisation optional and change the callers accordingly. [jrnieder@gmail.com: use flag instead of bool argument] Reported-and-tested-by: Chris Hiestand Reference: http://bugs.debian.org/669314 Signed-off-by: Ben Hutchings Cc: # v2.6.39+ Signed-off-by: Jonathan Nieder Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 5 +++-- fs/nfs/namespace.c | 19 ++++++++++++++----- fs/nfs/nfs4namespace.c | 3 ++- fs/nfs/super.c | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 59b133c5d65..a54fe51c1df 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -353,8 +353,9 @@ extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); /* namespace.c */ +#define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, - char *buffer, ssize_t buflen); + char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); @@ -498,7 +499,7 @@ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; - return nfs_path(&dummy, dentry, buffer, buflen); + return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 655925373b9..dd057bc6b65 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer + * @flags - options (see below) * * Helper function for constructing the server pathname * by arbitrary hashed dentry. @@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition * and in generating /proc/mounts and friends. + * + * Supported flags: + * NFS_PATH_CANONICAL: ensure there is exactly one slash after + * the original device (export) name + * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) +char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, + unsigned flags) { char *end; int namelen; @@ -74,7 +81,7 @@ rename_retry: rcu_read_unlock(); goto rename_retry; } - if (*end != '/') { + if ((flags & NFS_PATH_CANONICAL) && *end != '/') { if (--buflen < 0) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); @@ -91,9 +98,11 @@ rename_retry: return end; } namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; + if (flags & NFS_PATH_CANONICAL) { + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + } buflen -= namelen; if (buflen < 0) { spin_unlock(&dentry->d_lock); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 79fbb61ce20..1e09eb78543 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end) static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) { char *limit; - char *path = nfs_path(&limit, dentry, buffer, buflen); + char *path = nfs_path(&limit, dentry, buffer, buflen, + NFS_PATH_CANONICAL); if (!IS_ERR(path)) { char *path_component = nfs_path_component(path, limit); if (path_component) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e831bce4976..13c2a5be476 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -771,7 +771,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root) int err = 0; if (!page) return -ENOMEM; - devname = nfs_path(&dummy, root, page, PAGE_SIZE); + devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0); if (IS_ERR(devname)) err = PTR_ERR(devname); else -- cgit v1.2.3 From 324d003b0cd82151adbaecefef57b73f7959a469 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 30 Oct 2012 17:01:39 -0400 Subject: NFS: add nfs_sb_deactive_async to avoid deadlock Use nfs_sb_deactive_async instead of nfs_sb_deactive when in a workqueue context. This avoids a deadlock where rpc_shutdown_client loops forever in a workqueue kworker context, trying to kill all RPC tasks associated with the client, while one or more of these tasks have already been assigned to the same kworker (and will never run rpc_exit_task). This approach is needed because RPC tasks that have already been assigned to a kworker by queue_work cannot be canceled, as explained in the comment for workqueue.c:insert_wq_barrier. Signed-off-by: Weston Andros Adamson [Trond: add module_get/put.] Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 ++++- fs/nfs/internal.h | 1 + fs/nfs/nfs4proc.c | 2 +- fs/nfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/unlink.c | 2 +- 5 files changed, 56 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5c7325c5c5e..6fa01aea248 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); - nfs_sb_deactive(sb); + if (is_sync) + nfs_sb_deactive(sb); + else + nfs_sb_deactive_async(sb); kfree(ctx->mdsthreshold); kfree(ctx); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a54fe51c1df..05521cadac2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -351,6 +351,7 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); +extern void nfs_sb_deactive_async(struct super_block *sb); /* namespace.c */ #define NFS_PATH_CANONICAL 1 diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1465364501b..8cfbac1a8d5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2197,7 +2197,7 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); kfree(calldata); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 13c2a5be476..652d3f7176a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -54,6 +54,7 @@ #include #include #include +#include #include @@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_sb_deactive); +static int nfs_deactivate_super_async_work(void *ptr) +{ + struct super_block *sb = ptr; + + deactivate_super(sb); + module_put_and_exit(0); + return 0; +} + +/* + * same effect as deactivate_super, but will do final unmount in kthread + * context + */ +static void nfs_deactivate_super_async(struct super_block *sb) +{ + struct task_struct *task; + char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_server *server = NFS_SB(sb); + struct nfs_client *clp = server->nfs_client; + + if (!atomic_add_unless(&sb->s_active, -1, 1)) { + rcu_read_lock(); + snprintf(buf, sizeof(buf), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + + __module_get(THIS_MODULE); + task = kthread_run(nfs_deactivate_super_async_work, sb, + "%s-deactivate-super", buf); + if (IS_ERR(task)) { + pr_err("%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); + /* make synchronous call and hope for the best */ + deactivate_super(sb); + module_put(THIS_MODULE); + } + } +} + +void nfs_sb_deactive_async(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + nfs_deactivate_super_async(sb); +} +EXPORT_SYMBOL_GPL(nfs_sb_deactive_async); + /* * Deliver file system statistics to userspace */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 13cea637eff..3f79c77153b 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata) nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); - nfs_sb_deactive(sb); + nfs_sb_deactive_async(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) -- cgit v1.2.3 From f9b1ef5f06d65a01952169b67d474f7f0dcb0206 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 16:48:40 -0400 Subject: NFSv4: Initialise the NFSv4.1 slot table highest_used_slotid correctly Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8cfbac1a8d5..091baab3ecc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5677,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->slots = new; tbl->max_slots = max_slots; } - tbl->highest_used_slotid = -1; /* no slot is currently used */ + tbl->highest_used_slotid = NFS4_NO_SLOT; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); -- cgit v1.2.3 From 998f40b550f257e436485291802fa938e4cf580f Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 2 Nov 2012 18:00:56 -0400 Subject: NFS4: nfs4_opendata_access should return errno Return errno - not an NFS4ERR_. This worked because NFS4ERR_ACCESS == EACCES. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 091baab3ecc..5eec4429970 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1749,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, /* even though OPEN succeeded, access is denied. Close the file */ nfs4_close_state(state, fmode); - return -NFS4ERR_ACCESS; + return -EACCES; } /* -- cgit v1.2.3 From 1fea73a86527d7ec463af6ff04b0830e1425ff6c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 11:24:57 -0400 Subject: NFS: Get rid of unnecessary asserts If the nfs_client fails to initialise correctly, then it will return an error condition. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 +----- fs/nfs/nfs4client.c | 4 ---- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8b39a42ac35..c285e0a117e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -277,7 +277,7 @@ void nfs_put_client(struct nfs_client *clp) nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); - BUG_ON(!list_empty(&clp->cl_superblocks)); + WARN_ON_ONCE(!list_empty(&clp->cl_superblocks)); clp->rpc_ops->free_client(clp); } @@ -1061,10 +1061,6 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, if (error < 0) goto error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - /* Probe the root fh to retrieve its FSID */ error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); if (error < 0) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6bacfde1319..72717e67b34 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -713,10 +713,6 @@ static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fattr *fattr; int error; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - /* data servers support only a subset of NFSv4.1 */ if (is_ds_only_client(server->nfs_client)) return -EPROTONOSUPPORT; -- cgit v1.2.3 From 7fc388460e8479c5b3120cb2fcf0e0daec70b93f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 11:51:21 -0400 Subject: NFS: Remove asserts from the NFS XDR code Convert the ones that are not trivial to check into WARN_ON_ONCE(). Remove checks for things such as NFS2_MAXPATHLEN, which are trivially done by the caller. Add a comment to the case of nfs3_xdr_enc_setacl3args. What is being done there is just wrong... Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 4 +--- fs/nfs/nfs3xdr.c | 7 +++---- fs/nfs/nfs4xdr.c | 6 ++---- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index d04f0df7be5..06b9df49f7f 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -195,7 +195,6 @@ static void encode_fhandle(struct xdr_stream *xdr, const struct nfs_fh *fh) { __be32 *p; - BUG_ON(fh->size != NFS2_FHSIZE); p = xdr_reserve_space(xdr, NFS2_FHSIZE); memcpy(p, fh->data, NFS2_FHSIZE); } @@ -388,7 +387,7 @@ static void encode_filename(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(length > NFS2_MAXNAMLEN); + WARN_ON_ONCE(length > NFS2_MAXNAMLEN); p = xdr_reserve_space(xdr, 4 + length); xdr_encode_opaque(p, name, length); } @@ -428,7 +427,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length) { __be32 *p; - BUG_ON(length > NFS2_MAXPATHLEN); p = xdr_reserve_space(xdr, 4); *p = cpu_to_be32(length); xdr_write_pages(xdr, pages, 0, length); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cbe89400df..bffc32406fb 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -198,7 +198,7 @@ static void encode_filename3(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(length > NFS3_MAXNAMLEN); + WARN_ON_ONCE(length > NFS3_MAXNAMLEN); p = xdr_reserve_space(xdr, 4 + length); xdr_encode_opaque(p, name, length); } @@ -238,7 +238,6 @@ out_overflow: static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages, const u32 length) { - BUG_ON(length > NFS3_MAXPATHLEN); encode_uint32(xdr, length); xdr_write_pages(xdr, pages, 0, length); } @@ -388,7 +387,6 @@ out_overflow: */ static void encode_ftype3(struct xdr_stream *xdr, const u32 type) { - BUG_ON(type > NF3FIFO); encode_uint32(xdr, type); } @@ -443,7 +441,7 @@ static void encode_nfs_fh3(struct xdr_stream *xdr, const struct nfs_fh *fh) { __be32 *p; - BUG_ON(fh->size > NFS3_FHSIZE); + WARN_ON_ONCE(fh->size > NFS3_FHSIZE); p = xdr_reserve_space(xdr, 4 + fh->size); xdr_encode_opaque(p, fh->data, fh->size); } @@ -1339,6 +1337,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? args->acl_access : NULL, 1, 0); + /* FIXME: this is just broken */ BUG_ON(error < 0); error = nfsacl_encode(xdr->buf, base + error, args->inode, (args->mask & NFS_DFACL) ? diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 40836ee5dc3..672d9b0ef2c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -936,7 +936,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr, * but this is not required as a MUST for the server to do so. */ hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; - BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); + WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN); encode_string(xdr, hdr->taglen, hdr->tag); p = reserve_space(xdr, 8); *p++ = cpu_to_be32(hdr->minorversion); @@ -955,7 +955,7 @@ static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, static void encode_nops(struct compound_hdr *hdr) { - BUG_ON(hdr->nops > NFS4_MAX_OPS); + WARN_ON_ONCE(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } @@ -1403,7 +1403,6 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a *p = cpu_to_be32(NFS4_OPEN_NOCREATE); break; default: - BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); *p = cpu_to_be32(NFS4_OPEN_CREATE); encode_createmode(xdr, arg); } @@ -1621,7 +1620,6 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); - BUG_ON(arg->acl_len % 4); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); -- cgit v1.2.3 From d3edcf96141a7729b12ef5ecab6d5f634e24c61a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 13:14:43 -0400 Subject: NFSv4: Remove the BUG_ON() from nfs4_get_lease_time_prepare()... An EAGAIN return value would be unexpected, but there is no reason to BUG... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5eec4429970..14d86ef493a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5581,8 +5581,8 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, &data->args->la_seq_args, &data->res->lr_seq_res, task); - BUG_ON(ret == -EAGAIN); - rpc_call_start(task); + if (ret != -EAGAIN) + rpc_call_start(task); dprintk("<-- %s\n", __func__); } -- cgit v1.2.3 From eba24e1fe57df4e4cdee58af940f762eb336a113 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 14:47:33 -0400 Subject: NFSv4.1: Remove unused function last_byte_offset Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objlayout.c | 11 ----------- fs/nfs/pnfs.c | 11 ----------- 2 files changed, 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 87461354530..a9ebd817278 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -148,17 +148,6 @@ end_offset(u64 start, u64 len) return end >= start ? end : NFS4_MAX_UINT64; } -/* last octet in a range */ -static inline u64 -last_byte_offset(u64 start, u64 len) -{ - u64 end; - - BUG_ON(!len); - end = start + len; - return end > start ? end - 1 : NFS4_MAX_UINT64; -} - static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, struct page ***p_pages, unsigned *p_pgbase, u64 offset, unsigned long count) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2878f97bd78..dcbc9b20474 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -369,17 +369,6 @@ end_offset(u64 start, u64 len) return end >= start ? end : NFS4_MAX_UINT64; } -/* last octet in a range */ -static inline u64 -last_byte_offset(u64 start, u64 len) -{ - u64 end; - - BUG_ON(!len); - end = start + len; - return end > start ? end - 1 : NFS4_MAX_UINT64; -} - /* * is l2 fully contained in l1? * start1 end1 -- cgit v1.2.3 From bc5a89b337ee4b2fa6f577e7e1220d8c1ece71fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 14:58:04 -0400 Subject: NFSv4.1: Remove assertion BUG_ON()s from the files and generic layout code Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 13 ++++--------- fs/nfs/nfs4filelayoutdev.c | 2 -- fs/nfs/pnfs.c | 6 ++---- 3 files changed, 6 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 2e45fd9c02a..bfb28fa38e7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -512,7 +512,6 @@ filelayout_read_pagelist(struct nfs_read_data *data) loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; - int status; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", __func__, hdr->inode->i_ino, @@ -538,9 +537,8 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, &filelayout_read_call_ops, RPC_TASK_SOFTCONN); - BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -554,7 +552,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; - int status; /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); @@ -579,10 +576,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); - BUG_ON(status != 0); return PNFS_ATTEMPTED; } @@ -909,7 +905,7 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { /* @@ -939,7 +935,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) goto out_mds; @@ -1187,7 +1183,6 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, */ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - BUG_ON(!list_empty(&b->written)); pnfs_put_lseg(b->wlseg); b->wlseg = NULL; } diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index a8eaa9b7bb0..93e2530d709 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -162,8 +162,6 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - BUG_ON(list_empty(&ds->ds_addrs)); - list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index dcbc9b20474..e7165d91536 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -634,7 +634,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - BUG_ON(ctx == NULL); lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) return NULL; @@ -1115,7 +1114,6 @@ pnfs_update_layout(struct inode *ino, * chance of a CB_LAYOUTRECALL(FILE) coming in. */ spin_lock(&clp->cl_lock); - BUG_ON(!list_empty(&lo->plh_layouts)); list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } @@ -1211,7 +1209,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r { u64 rd_size = req->wb_bytes; - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_read_mds(pgio); @@ -1240,7 +1238,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - BUG_ON(pgio->pg_lseg != NULL); + WARN_ON_ONCE(pgio->pg_lseg != NULL); if (req->wb_offset != req->wb_pgbase) { nfs_pageio_reset_write_mds(pgio); -- cgit v1.2.3 From deed85e760c8c88cd984c5921dd8cb6b697b6134 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 15:02:01 -0400 Subject: NFS: Remove BUG_ON() calls from the generic writeback code ...and ensure that we set the return value for nfs_page_async_flush() to zero! (Reported-by: Dros Adamson) Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9347ab7c957..f5bc8e11713 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -239,21 +239,18 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static int nfs_set_page_writeback(struct page *page) +static void nfs_set_page_writeback(struct page *page) { + struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host); int ret = test_set_page_writeback(page); - if (!ret) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_server *nfss = NFS_SERVER(inode); + WARN_ON_ONCE(ret != 0); - if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) { - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } + if (atomic_long_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); } - return ret; } static void nfs_end_page_writeback(struct page *page) @@ -315,10 +312,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (IS_ERR(req)) goto out; - ret = nfs_set_page_writeback(page); - BUG_ON(ret != 0); - BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + nfs_set_page_writeback(page); + WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); ret = pgio->pg_error; @@ -451,8 +448,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&inode->i_lock); if (likely(!PageSwapCache(req->wb_page))) { set_page_private(req->wb_page, 0); @@ -1727,7 +1722,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) struct nfs_page *req; int ret = 0; - BUG_ON(!PageLocked(page)); for (;;) { wait_on_page_writeback(page); req = nfs_page_find_request(page); -- cgit v1.2.3 From 4ea8fed593218b658927b763f02941cd16c2ed9d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 15:47:41 -0400 Subject: NFSv4: Get rid of unnecessary BUG_ON()s Signed-off-by: Trond Myklebust --- fs/nfs/cache_lib.c | 1 - fs/nfs/callback_proc.c | 1 - fs/nfs/nfs4file.c | 1 - fs/nfs/nfs4proc.c | 14 ++++++-------- fs/nfs/nfs4state.c | 1 - 5 files changed, 6 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index dded2636811..862a2f16db6 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -118,7 +118,6 @@ int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) struct dentry *dir; dir = rpc_d_lookup_sb(sb, "cache"); - BUG_ON(dir == NULL); ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); dput(dir); return ret; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 76b4a7a3e55..0be08b964f3 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -216,7 +216,6 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, } pnfs_get_layout_hdr(lo); spin_unlock(&ino->i_lock); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); list_add(&lo->plh_bulk_recall, &recall_list); } } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index afddd6639af..e7699308364 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -20,7 +20,6 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct iattr attr; int err; - BUG_ON(inode != dentry->d_inode); /* * If no cached dentry exists or if it's negative, NFSv4 handled the * opens in ->lookup() or ->create(). diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14d86ef493a..6300cdd8110 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -206,7 +206,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent { __be32 *start, *p; - BUG_ON(readdir->count < 80); if (cookie > 2) { readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); @@ -415,7 +414,6 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp static void nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) { - BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -2533,7 +2531,8 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); - BUG_ON(len < 0); + if (len < 0) + return len; for (i = 0; i < len; i++) { /* AUTH_UNIX is the default flavor if none was specified, @@ -3362,9 +3361,6 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, int mode = sattr->ia_mode; int status = -ENOMEM; - BUG_ON(!(sattr->ia_valid & ATTR_MODE)); - BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); - data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4SOCK); if (data == NULL) goto out; @@ -3380,10 +3376,13 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, data->arg.ftype = NF4CHR; data->arg.u.device.specdata1 = MAJOR(rdev); data->arg.u.device.specdata2 = MINOR(rdev); + } else if (!S_ISSOCK(mode)) { + status = -EINVAL; + goto out_free; } status = nfs4_do_create(dir, dentry, data); - +out_free: nfs4_free_createdata(data); out: return status; @@ -5357,7 +5356,6 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred }; dprintk("--> %s\n", __func__); - BUG_ON(clp == NULL); res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); if (unlikely(res.session == NULL)) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c351e6b3983..e0a28dffd29 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1086,7 +1086,6 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; -- cgit v1.2.3 From f48407ddd46bd215a7b4e1af3940e759a93640c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 16:19:30 -0400 Subject: NFS: Remove BUG_ON()s in the fs/nfs/inode.c Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6fa01aea248..117183b1ee0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -112,8 +112,8 @@ void nfs_clear_inode(struct inode *inode) /* * The following should never happen... */ - BUG_ON(nfs_have_writebacks(inode)); - BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + WARN_ON_ONCE(nfs_have_writebacks(inode)); + WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); nfs_fscache_release_inode_cookie(inode); -- cgit v1.2.3 From 28d79ea33f52cae1ea04808e1ec52b8657b5d804 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Oct 2012 16:25:42 -0400 Subject: NFS: Remove the BUG_ON() in the mount code Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 015f71f8f62..91a6faf811a 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -169,6 +169,9 @@ int nfs_mount(struct nfs_mount_request *info) (info->hostname ? info->hostname : "server"), info->dirpath); + if (strlen(info->dirpath) > MNTPATHLEN) + return -ENAMETOOLONG; + if (info->noresvport) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; @@ -242,6 +245,9 @@ void nfs_umount(const struct nfs_mount_request *info) struct rpc_clnt *clnt; int status; + if (strlen(info->dirpath) > MNTPATHLEN) + return; + if (info->noresvport) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; @@ -283,7 +289,6 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) const u32 pathname_len = strlen(pathname); __be32 *p; - BUG_ON(pathname_len > MNTPATHLEN); p = xdr_reserve_space(xdr, 4 + pathname_len); xdr_encode_opaque(p, pathname, pathname_len); } -- cgit v1.2.3 From 6bdb5f213c4344324f600dde885f25768fbd14db Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 12 Nov 2012 16:55:38 -0500 Subject: NFS: Add sequence_priviliged_ops for nfs4_proc_sequence() If I mount an NFS v4.1 server to a single client multiple times and then run xfstests over each mountpoint I usually get the client into a state where recovery deadlocks. The server informs the client of a cb_path_down sequence error, the client then does a bind_connection_to_session and checks the status of the lease. I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING flag on the client, but this flag is never unset before nfs4_check_lease() reaches nfs4_proc_sequence(). This causes the client to deadlock, halting all NFS activity to the server. nfs4_proc_sequence() is only called by the state manager, so I can change it to run in privileged mode to bypass the NFS4_SESSION_DRAINING check and avoid the deadlock. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6300cdd8110..a32d953b08d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6136,13 +6136,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) rpc_call_start(task); } +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + nfs41_sequence_prepare(task, data); +} + static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, .rpc_release = nfs41_sequence_release, }; -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static const struct rpc_call_ops nfs41_sequence_privileged_ops = { + .rpc_call_done = nfs41_sequence_call_done, + .rpc_call_prepare = nfs41_sequence_prepare_privileged, + .rpc_release = nfs41_sequence_release, +}; + +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, + const struct rpc_call_ops *seq_ops) { struct nfs4_sequence_data *calldata; struct rpc_message msg = { @@ -6152,7 +6165,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ struct rpc_task_setup task_setup_data = { .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, - .callback_ops = &nfs41_sequence_ops, + .callback_ops = seq_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, }; @@ -6179,7 +6192,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return 0; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -6193,7 +6206,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; -- cgit v1.2.3 From 5df904aeb0d9baad90e78fc730dfe1afa4996005 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Nov 2012 09:22:14 -0500 Subject: NFSv4.1: Handle session reset and bind_conn_to_session before lease check We can't send a SEQUENCE op unless the session is OK, so it is pointless to handle the CHECK_LEASE state before we've dealt with SESSION_RESET and BIND_CONN_TO_SESSION. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e0a28dffd29..f3d1bc48c9c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2114,15 +2114,6 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; } - if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { - section = "check lease"; - status = nfs4_check_lease(clp); - if (status < 0) - goto out_error; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) - continue; - } - /* Initialize or reset the session */ if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { section = "reset session"; @@ -2143,6 +2134,14 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; } + if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + section = "check lease"; + status = nfs4_check_lease(clp); + if (status < 0) + goto out_error; + continue; + } + /* Recall session slots */ if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) { section = "recall slot"; -- cgit v1.2.3 From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 11:02:55 -0500 Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results Don't store the target request and response sizes in the same variables used to store the server's replies to those targets. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a32d953b08d..3e572dc316e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5807,8 +5807,8 @@ void nfs4_destroy_session(struct nfs4_session *session) static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) { struct nfs4_session *session = args->client->cl_session; - unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, - mxresp_sz = session->fc_attrs.max_resp_sz; + unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, + mxresp_sz = session->fc_target_max_resp_sz; if (mxrqst_sz == 0) mxrqst_sz = NFS_MAX_FILE_IO_SIZE; @@ -6015,24 +6015,28 @@ int nfs4_init_session(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs4_session *session; - unsigned int rsize, wsize; + unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; + unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; if (!nfs4_has_session(clp)) return 0; + if (server->rsize != 0) + target_max_resp_sz = server->rsize; + target_max_resp_sz += nfs41_maxread_overhead; + + if (server->wsize != 0) + target_max_rqst_sz = server->wsize; + target_max_rqst_sz += nfs41_maxwrite_overhead; + session = clp->cl_session; spin_lock(&clp->cl_lock); if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - - rsize = server->rsize; - if (rsize == 0) - rsize = NFS_MAX_FILE_IO_SIZE; - wsize = server->wsize; - if (wsize == 0) - wsize = NFS_MAX_FILE_IO_SIZE; - - session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; - session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; + /* Initialise targets and channel attributes */ + session->fc_target_max_rqst_sz = target_max_rqst_sz; + session->fc_attrs.max_rqst_sz = target_max_rqst_sz; + session->fc_target_max_resp_sz = target_max_resp_sz; + session->fc_attrs.max_resp_sz = target_max_resp_sz; } spin_unlock(&clp->cl_lock); -- cgit v1.2.3 From 688a9024e2bc8d07cdc62e287dfb048722cf96df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 10:53:39 -0500 Subject: NFSv4.1: Adjust CREATE_SESSION arguments when mounting a new filesystem If we're mounting a new filesystem, ensure that the session has negotiated large enough request and reply sizes to match the wsize and rsize mount arguments. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e572dc316e..ee82cdddeeb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6037,9 +6037,22 @@ int nfs4_init_session(struct nfs_server *server) session->fc_attrs.max_rqst_sz = target_max_rqst_sz; session->fc_target_max_resp_sz = target_max_resp_sz; session->fc_attrs.max_resp_sz = target_max_resp_sz; + } else { + /* Just adjust the targets */ + if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { + session->fc_target_max_rqst_sz = target_max_rqst_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } + if (target_max_resp_sz > session->fc_target_max_resp_sz) { + session->fc_target_max_resp_sz = target_max_resp_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } } spin_unlock(&clp->cl_lock); + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + nfs4_schedule_lease_recovery(clp); + return nfs41_check_session_ready(clp); } -- cgit v1.2.3 From 43095d397219aa1898db23937b03c1215ef16a37 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 11:13:12 -0500 Subject: NFSv4.1: We must bump the clientid sequence number after CREATE_SESSION We must always bump the clientid sequence number after a successful call to CREATE_SESSION on the server. The result of nfs4_verify_channel_attrs() is irrelevant to that requirement. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ee82cdddeeb..1ac339b4f09 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5917,10 +5917,9 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (!status) + if (!status) { /* Verify the session's negotiated channel_attrs values */ status = nfs4_verify_channel_attrs(&args, session); - if (!status) { /* Increment the clientid slot sequence id */ clp->cl_seqid++; } -- cgit v1.2.3 From 2d473d378eb571ad77f9563653639aa35e22d39c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Nov 2012 18:03:22 -0500 Subject: NFSv4.1: nfs4_alloc_slots doesn't need zeroing All that memory is going to be initialised to non-zero by nfs4_add_and_init_slots anyway. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1ac339b4f09..0402ebb9b49 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5658,7 +5658,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) { - return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags); + return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags); } static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, -- cgit v1.2.3 From 9216106a847a53e6d0fe6d11dfd9175f2ca7fccf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Nov 2012 19:50:45 -0500 Subject: NFSv4.1: clean up nfs4_recall_slot to use nfs4_alloc_slots Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a525fdefccd..36880b9aa91 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,6 +258,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); +extern struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags); + static inline bool is_ds_only_client(struct nfs_client *clp) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0402ebb9b49..5e5cc5a5065 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5656,7 +5656,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) +struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) { return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f3d1bc48c9c..96fcbb97fd6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2033,8 +2033,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; nfs4_begin_drain_session(clp); fc_tbl = &clp->cl_session->fc_slot_table; - new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), - GFP_NOFS); + new = nfs4_alloc_slots(fc_tbl->target_max_slots, GFP_NOFS); if (!new) return -ENOMEM; -- cgit v1.2.3 From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:12:38 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time Store the renewal time inside the session slot instead. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e5cc5a5065..14b39742b6e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -486,6 +486,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { + struct nfs4_slot *slot; unsigned long timestamp; struct nfs_client *clp; @@ -502,12 +503,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * if (!RPC_WAS_SENT(task)) goto out; + slot = res->sr_slot; + /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: /* Update the slot's sequence and clientid lease timer */ - ++res->sr_slot->seq_nr; - timestamp = res->sr_renewal_time; + ++slot->seq_nr; + timestamp = slot->renewal_time; clp = res->sr_session->clp; do_renew_lease(clp, timestamp); /* Check sequence flags */ @@ -521,12 +524,12 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * */ dprintk("%s: slot=%td seq=%d: Operation in progress\n", __func__, - res->sr_slot - res->sr_session->fc_slot_table.slots, - res->sr_slot->seq_nr); + slot - res->sr_session->fc_slot_table.slots, + slot->seq_nr); goto out_retry; default: /* Just update the slot sequence no. */ - ++res->sr_slot->seq_nr; + ++slot->seq_nr; } out: /* The session may be reset by one of the error handlers. */ @@ -637,6 +640,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); slot = tbl->slots + slotid; + slot->renewal_time = jiffies; args->sa_session = session; args->sa_slotid = slotid; @@ -644,7 +648,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, res->sr_session = session; res->sr_slot = slot; - res->sr_renewal_time = jiffies; res->sr_status_flags = 0; /* * sr_status is only set in decode_sequence, and so will remain -- cgit v1.2.3 From fe20d7d5eefb218b82033ba5c13cbcbd2a3d874c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Nov 2012 22:49:36 -0500 Subject: NFSv4: Fix a compile time warning when #undef CONFIG_NFS_V4_1 The function nfs4_get_machine_cred_locked is used by NFSv4.0 routines too. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a525fdefccd..ea4e3624104 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -321,13 +321,13 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); int nfs4_discover_server_trunking(struct nfs_client *clp, struct nfs_client **); int nfs40_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) -struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); -- cgit v1.2.3 From d751f748b359534d78e2b2e52b59d39f0e0540aa Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Fri, 16 Nov 2012 18:12:06 -0500 Subject: NFS: Reduce stack use in encode_exchange_id() encode_exchange_id() uses more stack space than necessary, giving a compile time warning. Reduce the size of the static buffer for implementation name. Signed-off-by: Jim Rees Reviewed-by: "Adamson, Dros" Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 40836ee5dc3..142aacb9245 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -270,6 +270,8 @@ static int nfs4_stat_to_errno(int); #if defined(CONFIG_NFS_V4_1) #define NFS4_MAX_MACHINE_NAME_LEN (64) +#define IMPL_NAME_LIMIT (sizeof(utsname()->sysname) + sizeof(utsname()->release) + \ + sizeof(utsname()->version) + sizeof(utsname()->machine) + 8) #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ @@ -282,7 +284,7 @@ static int nfs4_stat_to_errno(int); 1 /* nii_domain */ + \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* nii_name */ + \ - XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + XDR_QUADLEN(IMPL_NAME_LIMIT) + \ 3 /* nii_date */) #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 2 /* eir_clientid */ + \ @@ -1713,7 +1715,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - char impl_name[NFS4_OPAQUE_LIMIT]; + char impl_name[IMPL_NAME_LIMIT]; int len = 0; encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); @@ -1728,7 +1730,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, if (send_implementation_id && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - <= NFS4_OPAQUE_LIMIT + 1) + <= sizeof(impl_name) + 1) len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", utsname()->sysname, utsname()->release, utsname()->version, utsname()->machine); -- cgit v1.2.3 From 4c1002100898d03c5c9142ffaf58351c841ab94a Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Mon, 12 Nov 2012 09:27:37 +0800 Subject: nfs: Fix wrong slab cache in nfs_commit_mempool The slab cache in nfs_commit_mempool is wrong, and I think it is just a slip. I tested it on a x86-32 machine, the size of nfs_write_header is 544, and the size of nfs_commit_data is 408, so it works fine. It is also true that sizeof(struct nfs_write_header) > sizeof(struct nfs_commit_data) on other platforms in my opinoin. Just fix it. Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9347ab7c957..f710e39f6ba 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1829,7 +1829,7 @@ int __init nfs_init_writepagecache(void) goto out_destroy_write_mempool; nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, - nfs_wdata_cachep); + nfs_cdata_cachep); if (nfs_commit_mempool == NULL) goto out_destroy_commit_cache; -- cgit v1.2.3 From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:25:01 -0500 Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session pointer Move the session pointer into the slot table, then have struct nfs4_slot point to that slot table. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4proc.c | 33 +++++++++++++++++++++++---------- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 8 +++++--- 4 files changed, 31 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 36880b9aa91..42c58691fb4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,7 +258,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); -extern struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags); +extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, + u32 max_slots, gfp_t gfp_flags); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14b39742b6e..5b61c4a8319 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -467,25 +467,28 @@ void nfs4_check_drain_bc_complete(struct nfs4_session *ses) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { + struct nfs4_session *session; struct nfs4_slot_table *tbl; - tbl = &res->sr_session->fc_slot_table; if (!res->sr_slot) { /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); return; } + tbl = res->sr_slot->table; + session = tbl->session; spin_lock(&tbl->slot_tbl_lock); nfs4_free_slot(tbl, res->sr_slot - tbl->slots); - nfs4_check_drain_fc_complete(res->sr_session); + nfs4_check_drain_fc_complete(session); spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { + struct nfs4_session *session; struct nfs4_slot *slot; unsigned long timestamp; struct nfs_client *clp; @@ -504,6 +507,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * goto out; slot = res->sr_slot; + session = slot->table->session; /* Check the SEQUENCE operation status */ switch (res->sr_status) { @@ -511,7 +515,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * /* Update the slot's sequence and clientid lease timer */ ++slot->seq_nr; timestamp = slot->renewal_time; - clp = res->sr_session->clp; + clp = session->clp; do_renew_lease(clp, timestamp); /* Check sequence flags */ if (res->sr_status_flags != 0) @@ -524,7 +528,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * */ dprintk("%s: slot=%td seq=%d: Operation in progress\n", __func__, - slot - res->sr_session->fc_slot_table.slots, + slot - session->fc_slot_table.slots, slot->seq_nr); goto out_retry; default: @@ -546,7 +550,7 @@ out_retry: static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { - if (res->sr_session == NULL) + if (res->sr_slot == NULL) return 1; return nfs41_sequence_done(task, res); } @@ -591,7 +595,6 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args, args->sa_cache_this = 0; if (cache_reply) args->sa_cache_this = 1; - res->sr_session = NULL; res->sr_slot = NULL; } @@ -646,7 +649,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); - res->sr_session = session; res->sr_slot = slot; res->sr_status_flags = 0; /* @@ -5659,9 +5661,18 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) +struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, + u32 max_slots, gfp_t gfp_flags) { - return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags); + struct nfs4_slot *tbl; + u32 i; + + tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); + if (tbl != NULL) { + for (i = 0; i < max_slots; i++) + tbl[i].table = table; + } + return tbl; } static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, @@ -5699,7 +5710,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, /* Does the newly negotiated max_reqs match the existing slot table? */ if (max_reqs != tbl->max_slots) { - new = nfs4_alloc_slots(max_reqs, GFP_NOFS); + new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS); if (!new) goto out; } @@ -5738,11 +5749,13 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) dprintk("--> %s\n", __func__); /* Fore channel */ tbl = &ses->fc_slot_table; + tbl->session = ses; status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); if (status) /* -ENOMEM */ return status; /* Back channel */ tbl = &ses->bc_slot_table; + tbl->session = ses; status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 96fcbb97fd6..9495789c425 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2033,7 +2033,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; nfs4_begin_drain_session(clp); fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl->target_max_slots, GFP_NOFS); + new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS); if (!new) return -ENOMEM; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 672d9b0ef2c..4126f054610 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5507,12 +5507,13 @@ static int decode_sequence(struct xdr_stream *xdr, struct rpc_rqst *rqstp) { #if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session; struct nfs4_sessionid id; u32 dummy; int status; __be32 *p; - if (!res->sr_session) + if (res->sr_slot == NULL) return 0; status = decode_op_hdr(xdr, OP_SEQUENCE); @@ -5526,8 +5527,9 @@ static int decode_sequence(struct xdr_stream *xdr, * sequence number, the server is looney tunes. */ status = -EREMOTEIO; + session = res->sr_slot->table->session; - if (memcmp(id.data, res->sr_session->sess_id.data, + if (memcmp(id.data, session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("%s Invalid session id\n", __func__); goto out_err; @@ -5545,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr, } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { + if (dummy != res->sr_slot - session->fc_slot_table.slots) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } -- cgit v1.2.3 From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:45:06 -0500 Subject: NFSv4.1: Label each entry in the session slot tables with its slot number Instead of doing slot table pointer gymnastics every time we want to know which slot we're using. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++----- fs/nfs/nfs4xdr.c | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5b61c4a8319..4311dba49c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -526,9 +526,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * returned NFS4ERR_DELAY as per Section 2.10.6.2 * of RFC5661. */ - dprintk("%s: slot=%td seq=%d: Operation in progress\n", + dprintk("%s: slot=%u seq=%u: Operation in progress\n", __func__, - slot - session->fc_slot_table.slots, + slot->slot_nr, slot->seq_nr); goto out_retry; default: @@ -671,9 +671,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, if (session == NULL) goto out; - dprintk("--> %s clp %p session %p sr_slot %td\n", + dprintk("--> %s clp %p session %p sr_slot %d\n", __func__, session->clp, session, res->sr_slot ? - res->sr_slot - session->fc_slot_table.slots : -1); + res->sr_slot->slot_nr : -1); ret = nfs41_setup_sequence(session, args, res, task); out: @@ -5669,8 +5669,10 @@ struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); if (tbl != NULL) { - for (i = 0; i < max_slots; i++) + for (i = 0; i < max_slots; i++) { tbl[i].table = table; + tbl[i].slot_nr = i; + } } return tbl; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4126f054610..50bac706616 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5547,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr, } /* slot id */ dummy = be32_to_cpup(p++); - if (dummy != res->sr_slot - session->fc_slot_table.slots) { + if (dummy != res->sr_slot->slot_nr) { dprintk("%s Invalid slot id\n", __func__); goto out_err; } -- cgit v1.2.3 From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 12:58:36 -0500 Subject: NFSv4.1: Simplify struct nfs4_sequence_args too Replace the session pointer + slotid with a pointer to the allocated slot. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4xdr.c | 21 ++++++++++----------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4311dba49c5..6c41a34e34b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -591,7 +591,7 @@ out: static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { - args->sa_session = NULL; + args->sa_slot = NULL; args->sa_cache_this = 0; if (cache_reply) args->sa_cache_this = 1; @@ -644,8 +644,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); slot = tbl->slots + slotid; slot->renewal_time = jiffies; - args->sa_session = session; - args->sa_slotid = slotid; + + args->sa_slot = slot; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 50bac706616..27b0fec1a6b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1833,18 +1833,16 @@ static void encode_sequence(struct xdr_stream *xdr, struct compound_hdr *hdr) { #if defined(CONFIG_NFS_V4_1) - struct nfs4_session *session = args->sa_session; + struct nfs4_session *session; struct nfs4_slot_table *tp; - struct nfs4_slot *slot; + struct nfs4_slot *slot = args->sa_slot; __be32 *p; - if (!session) + if (slot == NULL) return; - tp = &session->fc_slot_table; - - WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); - slot = tp->slots + args->sa_slotid; + tp = slot->table; + session = tp->session; encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); @@ -1858,12 +1856,12 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[1], ((u32 *)session->sess_id.data)[2], ((u32 *)session->sess_id.data)[3], - slot->seq_nr, args->sa_slotid, + slot->seq_nr, slot->slot_nr, tp->highest_used_slotid, args->sa_cache_this); p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); - *p++ = cpu_to_be32(args->sa_slotid); + *p++ = cpu_to_be32(slot->slot_nr); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); #endif /* CONFIG_NFS_V4_1 */ @@ -2025,8 +2023,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) { #if defined(CONFIG_NFS_V4_1) - if (args->sa_session) - return args->sa_session->clp->cl_mvops->minor_version; + + if (args->sa_slot) + return args->sa_slot->table->session->clp->cl_mvops->minor_version; #endif /* CONFIG_NFS_V4_1 */ return 0; } -- cgit v1.2.3 From 2dc03b7f00d7fcd7dbb9302c5ebbd0c2b7fa3557 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Nov 2012 16:10:11 -0500 Subject: NFSv4.1: Simplify slot allocation Clean up the NFSv4.1 slot allocation by replacing nfs_find_slot() with a function nfs_alloc_slot() that returns a pointer to the nfs4_slot instead of an offset into the slot table. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6c41a34e34b..0789ef18a94 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -556,20 +556,18 @@ static int nfs4_sequence_done(struct rpc_task *task, } /* - * nfs4_find_slot - efficiently look for a free slot + * nfs4_alloc_slot - efficiently look for a free slot * - * nfs4_find_slot looks for an unset bit in the used_slots bitmap. + * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. * If found, we mark the slot as used, update the highest_used_slotid, * and respectively set up the sequence operation args. - * The slot number is returned if found, or NFS4_NO_SLOT otherwise. * * Note: must be called with under the slot_tbl_lock. */ -static u32 -nfs4_find_slot(struct nfs4_slot_table *tbl) +static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) { + struct nfs4_slot *ret = NULL; u32 slotid; - u32 ret_id = NFS4_NO_SLOT; dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, @@ -581,11 +579,14 @@ nfs4_find_slot(struct nfs4_slot_table *tbl) if (slotid > tbl->highest_used_slotid || tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; - ret_id = slotid; + ret = &tbl->slots[slotid]; + ret->renewal_time = jiffies; + out: dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", - __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id); - return ret_id; + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + ret ? ret->slot_nr : -1); + return ret; } static void nfs41_init_sequence(struct nfs4_sequence_args *args, @@ -605,7 +606,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - u32 slotid; dprintk("--> %s\n", __func__); /* slot already allocated? */ @@ -632,8 +632,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, return -EAGAIN; } - slotid = nfs4_find_slot(tbl); - if (slotid == NFS4_NO_SLOT) { + slot = nfs4_alloc_slot(tbl); + if (slot == NULL) { rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); @@ -642,12 +642,11 @@ int nfs41_setup_sequence(struct nfs4_session *session, spin_unlock(&tbl->slot_tbl_lock); rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); - slot = tbl->slots + slotid; - slot->renewal_time = jiffies; args->sa_slot = slot; - dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); + dprintk("<-- %s slotid=%d seqid=%d\n", __func__, + slot->slot_nr, slot->seq_nr); res->sr_slot = slot; res->sr_status_flags = 0; -- cgit v1.2.3 From f4af6e2abc8efb1695203a2b76876edf80f79960 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 14:17:32 -0500 Subject: NFSv4.1: Clean up nfs4_free_slot Change the argument to take the pointer to the slot, instead of just the slotid. We know that the new value of highest_used_slot must be less than the current value. No need to scan the whole table. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0789ef18a94..197ef3e4e1f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -412,16 +412,18 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) { + u32 slotid = slot->slot_nr; + /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); /* update highest_used_slotid when it is freed */ if (slotid == tbl->highest_used_slotid) { - slotid = find_last_bit(tbl->used_slots, tbl->max_slots); - if (slotid < tbl->max_slots) - tbl->highest_used_slotid = slotid; + u32 new_max = find_last_bit(tbl->used_slots, slotid); + if (new_max < slotid) + tbl->highest_used_slotid = new_max; else tbl->highest_used_slotid = NFS4_NO_SLOT; } @@ -480,7 +482,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) session = tbl->session; spin_lock(&tbl->slot_tbl_lock); - nfs4_free_slot(tbl, res->sr_slot - tbl->slots); + nfs4_free_slot(tbl, res->sr_slot); nfs4_check_drain_fc_complete(session); spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; -- cgit v1.2.3 From 696199f8ccf7fc6d17ef89c296ad3b6c78c52d9c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:00:51 -0500 Subject: don't do blind d_drop() in nfs_prime_dcache() Signed-off-by: Al Viro --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce8cb926526..99489cfca24 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_refresh_inode(dentry->d_inode, entry->fattr); goto out; } else { - d_drop(dentry); + if (d_invalidate(dentry) != 0) + goto out; dput(dentry); } } -- cgit v1.2.3 From c44600c9d1de64314c2bd58103f15acb53e10073 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Nov 2012 22:04:36 -0500 Subject: nfs_lookup_revalidate(): fix a leak We are leaking fattr and fhandle if we decide that dentry is not to be invalidated, after all (e.g. happens to be a mountpoint). Just free both before that... Signed-off-by: Al Viro --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 99489cfca24..b9e66b7e0c1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1101,6 +1101,8 @@ out_set_verifier: out_zap_parent: nfs_zap_caches(dir); out_bad: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ @@ -1113,8 +1115,6 @@ out_zap_parent: shrink_dcache_parent(dentry); } d_drop(dentry); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", __func__, dentry->d_parent->d_name.name, -- cgit v1.2.3 From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 12:49:27 -0500 Subject: NFSv4.1: Ensure that the client tracks the server target_highest_slotid Dynamic slot allocation in NFSv4.1 depends on the client being able to track the server's target value for the highest slotid in the slot table. See the reference in Section 2.10.6.1 of RFC5661. To avoid ordering problems in the case where 2 SEQUENCE replies contain conflicting updates to this target value, we also introduce a generation counter, to track whether or not an RPC containing a SEQUENCE operation was launched before or after the last update. Also rename the nfs4_slot_table target_max_slots field to 'target_highest_slotid' to avoid confusion with a slot table size or number of slots. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/nfs4proc.c | 25 +++++++++++++++++++++++++ fs/nfs/nfs4state.c | 7 +++---- fs/nfs/nfs4xdr.c | 4 ++-- 4 files changed, 31 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0be08b964f3..0ef047b7d28 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -576,7 +576,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (args->crsa_target_max_slots == fc_tbl->max_slots) goto out; - fc_tbl->target_max_slots = args->crsa_target_max_slots; + fc_tbl->target_highest_slotid = args->crsa_target_max_slots; nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 197ef3e4e1f..d91abaa522e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -488,6 +488,28 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) res->sr_slot = NULL; } +/* Update the client's idea of target_highest_slotid */ +static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + if (tbl->target_highest_slotid == target_highest_slotid) + return; + tbl->target_highest_slotid = target_highest_slotid; + tbl->generation++; +} + +static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res) +{ + spin_lock(&tbl->slot_tbl_lock); + if (tbl->generation != slot->generation) + goto out; + nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); +out: + spin_unlock(&tbl->slot_tbl_lock); +} + static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; @@ -522,6 +544,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * /* Check sequence flags */ if (res->sr_status_flags != 0) nfs4_schedule_lease_recovery(clp); + nfs41_update_target_slotid(slot->table, slot, res); break; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -583,6 +606,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) tbl->highest_used_slotid = slotid; ret = &tbl->slots[slotid]; ret->renewal_time = jiffies; + ret->generation = tbl->generation; out: dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", @@ -5693,6 +5717,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->max_slots = max_slots; } tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->target_highest_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9495789c425..842cb8c2f65 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2033,17 +2033,16 @@ static int nfs4_recall_slot(struct nfs_client *clp) return 0; nfs4_begin_drain_session(clp); fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS); + new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS); if (!new) return -ENOMEM; spin_lock(&fc_tbl->slot_tbl_lock); - for (i = 0; i < fc_tbl->target_max_slots; i++) + for (i = 0; i <= fc_tbl->target_highest_slotid; i++) new[i].seq_nr = fc_tbl->slots[i].seq_nr; old = fc_tbl->slots; fc_tbl->slots = new; - fc_tbl->max_slots = fc_tbl->target_max_slots; - fc_tbl->target_max_slots = 0; + fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 27b0fec1a6b..05d34f1fcc1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5552,8 +5552,8 @@ static int decode_sequence(struct xdr_stream *xdr, } /* highest slot id - currently not processed */ dummy = be32_to_cpup(p++); - /* target highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); + /* target highest slot id */ + res->sr_target_highest_slotid = be32_to_cpup(p++); /* result flags */ res->sr_status_flags = be32_to_cpup(p); status = 0; -- cgit v1.2.3 From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 18:10:30 -0500 Subject: NFSv4.1: Reset the sequence number for slots that have been deallocated When the server tells us that it is dynamically resizing the session replay cache, we should reset the sequence number for those slots that have been deallocated. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 ++++++++++++++++++ fs/nfs/nfs4xdr.c | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d91abaa522e..52435ec4419 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -498,6 +498,22 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->generation++; } +static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, + u32 highest_slotid) +{ + unsigned int max_slotid, i; + + if (tbl->server_highest_slotid == highest_slotid) + return; + if (tbl->highest_used_slotid > highest_slotid) + return; + max_slotid = min(tbl->max_slots - 1, highest_slotid); + /* Reset the seq_nr for deallocated slots */ + for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++) + tbl->slots[i].seq_nr = 1; + tbl->server_highest_slotid = highest_slotid; +} + static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res) @@ -505,6 +521,7 @@ static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_lock(&tbl->slot_tbl_lock); if (tbl->generation != slot->generation) goto out; + nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); out: spin_unlock(&tbl->slot_tbl_lock); @@ -5718,6 +5735,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, } tbl->highest_used_slotid = NFS4_NO_SLOT; tbl->target_highest_slotid = max_slots - 1; + tbl->server_highest_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 05d34f1fcc1..a67040f5159 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5550,8 +5550,8 @@ static int decode_sequence(struct xdr_stream *xdr, dprintk("%s Invalid slot id\n", __func__); goto out_err; } - /* highest slot id - currently not processed */ - dummy = be32_to_cpup(p++); + /* highest slot id */ + res->sr_highest_slotid = be32_to_cpup(p++); /* target highest slot id */ res->sr_target_highest_slotid = be32_to_cpup(p++); /* result flags */ -- cgit v1.2.3 From ce008c4bb9766bc7eeb02e8299c8baadc25da90b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 15:16:30 -0500 Subject: NFSv4.1: Fix nfs4_callback_recallslot to work with dynamic slot allocation Ensure that the NFSv4.1 CB_RECALL_SLOT callback updates the slot table target max slotid safely. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0ef047b7d28..15b9879d6fb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -576,7 +576,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (args->crsa_target_max_slots == fc_tbl->max_slots) goto out; - fc_tbl->target_highest_slotid = args->crsa_target_max_slots; + nfs41_set_target_slotid(fc_tbl, args->crsa_target_max_slots); nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 42c58691fb4..5d4e82b10c3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -260,6 +260,8 @@ extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, u32 max_slots, gfp_t gfp_flags); +extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 52435ec4419..62212231ce6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -498,6 +498,14 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->generation++; } +void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + spin_lock(&tbl->slot_tbl_lock); + nfs41_set_target_slotid_locked(tbl, target_highest_slotid); + spin_unlock(&tbl->slot_tbl_lock); +} + static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, u32 highest_slotid) { -- cgit v1.2.3 From d5fb4ce33e26e4c1c31c1609b8ffbb24f80bcab8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 20:24:02 -0500 Subject: NFSv4.1: Don't confuse target_highest_slotid and max_slots in cb_recall_slot Don't confuse the table size and the target_highest_slotid... Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 +- fs/nfs/callback_proc.c | 12 +++++------- fs/nfs/callback_xdr.c | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 4251c2ae06a..e75631e264f 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -142,7 +142,7 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_recallslotargs { struct sockaddr *crsa_addr; - uint32_t crsa_target_max_slots; + uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 15b9879d6fb..ed0b446e2e3 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -561,22 +561,20 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), - args->crsa_target_max_slots); + args->crsa_target_highest_slotid); fc_tbl = &cps->clp->cl_session->fc_slot_table; status = htonl(NFS4ERR_BAD_HIGH_SLOT); - if (args->crsa_target_max_slots > fc_tbl->max_slots || - args->crsa_target_max_slots < 1) + if (args->crsa_target_highest_slotid >= fc_tbl->max_slots || + args->crsa_target_highest_slotid < 1) goto out; status = htonl(NFS4_OK); - if (args->crsa_target_max_slots == fc_tbl->max_slots) - goto out; - nfs41_set_target_slotid(fc_tbl, args->crsa_target_max_slots); + nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid); nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 742ff4ffced..81e8c7d4c2e 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -520,7 +520,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); - args->crsa_target_max_slots = ntohl(*p++); + args->crsa_target_highest_slotid = ntohl(*p++); return 0; } -- cgit v1.2.3 From 1b285ff16ab52fb401aed7ce70abed4bb65b30b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 22:32:48 -0500 Subject: NFSv4.1: Allow the server to recall all but one slot If the server wants to leave us with only one slot, or it wants to "shrink" our slot table to something larger than we have now, then so be it. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ed0b446e2e3..a0546eca6f6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -567,11 +567,6 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, fc_tbl = &cps->clp->cl_session->fc_slot_table; - status = htonl(NFS4ERR_BAD_HIGH_SLOT); - if (args->crsa_target_highest_slotid >= fc_tbl->max_slots || - args->crsa_target_highest_slotid < 1) - goto out; - status = htonl(NFS4_OK); nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid); -- cgit v1.2.3 From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 14:45:48 -0500 Subject: NFSv4.1: Support dynamic resizing of the session slot table Allow the server to control the size of the session slot table by adjusting the value of sr_target_max_slots in the reply to the SEQUENCE operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 ++++++++++-- fs/nfs/nfs4state.c | 6 +++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62212231ce6..1792ece8b53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -492,10 +492,17 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { + unsigned int max_slotid, i; + if (tbl->target_highest_slotid == target_highest_slotid) return; tbl->target_highest_slotid = target_highest_slotid; tbl->generation++; + + max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid); + for (i = tbl->max_slotid + 1; i <= max_slotid; i++) + rpc_wake_up_next(&tbl->slot_tbl_waitq); + tbl->max_slotid = max_slotid; } void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, @@ -622,8 +629,8 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slots); - slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); - if (slotid >= tbl->max_slots) + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); + if (slotid > tbl->max_slotid) goto out; __set_bit(slotid, tbl->used_slots); if (slotid > tbl->highest_used_slotid || @@ -5744,6 +5751,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, tbl->highest_used_slotid = NFS4_NO_SLOT; tbl->target_highest_slotid = max_slots - 1; tbl->server_highest_slotid = max_slots - 1; + tbl->max_slotid = max_slots - 1; for (i = 0; i < tbl->max_slots; i++) tbl->slots[i].seq_nr = ivalue; spin_unlock(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 842cb8c2f65..1b7fa73c943 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -254,15 +254,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; struct nfs4_slot_table *tbl; - int max_slots; + unsigned int i; if (ses == NULL) return; tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { spin_lock(&tbl->slot_tbl_lock); - max_slots = tbl->max_slots; - while (max_slots--) { + for (i = 0; i <= tbl->max_slotid; i++) { if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs4_set_task_privileged, NULL) == NULL) @@ -2043,6 +2042,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) old = fc_tbl->slots; fc_tbl->slots = new; fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; + fc_tbl->max_slotid = fc_tbl->target_highest_slotid; clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; spin_unlock(&fc_tbl->slot_tbl_lock); -- cgit v1.2.3 From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 19:49:20 -0500 Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly Instead of an array of slots, use a singly linked list of slots that can be dynamically appended to or shrunk. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4proc.c | 174 +++++++++++++++++++++++++++++++++++------------------ fs/nfs/nfs4state.c | 22 ++----- 3 files changed, 120 insertions(+), 80 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5d4e82b10c3..856bc496a21 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,10 +258,10 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp, extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); -extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, - u32 max_slots, gfp_t gfp_flags); extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); +extern int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue); static inline bool is_ds_only_client(struct nfs_client *clp) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1792ece8b53..fc65300172e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -396,6 +396,27 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp #if defined(CONFIG_NFS_V4_1) +/* + * nfs4_shrink_slot_table - free retired slots from the slot table + */ +static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) +{ + struct nfs4_slot **p; + if (newsize >= tbl->max_slots) + return; + + p = &tbl->slots; + while (newsize--) + p = &(*p)->next; + while (*p) { + struct nfs4_slot *slot = *p; + + *p = slot->next; + kfree(slot); + tbl->max_slots--; + } +} + /* * nfs4_free_slot - free a slot and efficiently update slot table. * @@ -499,7 +520,7 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->target_highest_slotid = target_highest_slotid; tbl->generation++; - max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid); + max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); for (i = tbl->max_slotid + 1; i <= max_slotid; i++) rpc_wake_up_next(&tbl->slot_tbl_waitq); tbl->max_slotid = max_slotid; @@ -516,16 +537,12 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, u32 highest_slotid) { - unsigned int max_slotid, i; - if (tbl->server_highest_slotid == highest_slotid) return; if (tbl->highest_used_slotid > highest_slotid) return; - max_slotid = min(tbl->max_slots - 1, highest_slotid); - /* Reset the seq_nr for deallocated slots */ - for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++) - tbl->slots[i].seq_nr = 1; + /* Deallocate slots */ + nfs4_shrink_slot_table(tbl, highest_slotid + 1); tbl->server_highest_slotid = highest_slotid; } @@ -612,6 +629,42 @@ static int nfs4_sequence_done(struct rpc_task *task, return nfs41_sequence_done(task, res); } +static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot *slot; + + slot = kzalloc(sizeof(*slot), gfp_mask); + if (slot) { + slot->table = tbl; + slot->slot_nr = slotid; + slot->seq_nr = seq_init; + } + return slot; +} + +static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot **p, *slot; + + p = &tbl->slots; + for (;;) { + if (*p == NULL) { + *p = nfs4_new_slot(tbl, tbl->max_slots, + seq_init, gfp_mask); + if (*p == NULL) + break; + tbl->max_slots++; + } + slot = *p; + if (slot->slot_nr == slotid) + return slot; + p = &slot->next; + } + return NULL; +} + /* * nfs4_alloc_slot - efficiently look for a free slot * @@ -628,15 +681,17 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, - tbl->max_slots); + tbl->max_slotid + 1); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); if (slotid > tbl->max_slotid) goto out; + ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + if (ret == NULL) + goto out; __set_bit(slotid, tbl->used_slots); if (slotid > tbl->highest_used_slotid || tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; - ret = &tbl->slots[slotid]; ret->renewal_time = jiffies; ret->generation = tbl->generation; @@ -5718,67 +5773,56 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table, - u32 max_slots, gfp_t gfp_flags) +static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) { - struct nfs4_slot *tbl; - u32 i; - - tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags); - if (tbl != NULL) { - for (i = 0; i < max_slots; i++) { - tbl[i].table = table; - tbl[i].slot_nr = i; - } - } - return tbl; + if (max_reqs <= tbl->max_slots) + return 0; + if (nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS)) + return 0; + return -ENOMEM; } -static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, - struct nfs4_slot *new, - u32 max_slots, +static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, + u32 server_highest_slotid, u32 ivalue) { - struct nfs4_slot *old = NULL; - u32 i; + struct nfs4_slot **p; - spin_lock(&tbl->slot_tbl_lock); - if (new) { - old = tbl->slots; - tbl->slots = new; - tbl->max_slots = max_slots; + nfs4_shrink_slot_table(tbl, server_highest_slotid + 1); + p = &tbl->slots; + while (*p) { + (*p)->seq_nr = ivalue; + p = &(*p)->next; } tbl->highest_used_slotid = NFS4_NO_SLOT; - tbl->target_highest_slotid = max_slots - 1; - tbl->server_highest_slotid = max_slots - 1; - tbl->max_slotid = max_slots - 1; - for (i = 0; i < tbl->max_slots; i++) - tbl->slots[i].seq_nr = ivalue; - spin_unlock(&tbl->slot_tbl_lock); - kfree(old); + tbl->target_highest_slotid = server_highest_slotid; + tbl->server_highest_slotid = server_highest_slotid; + tbl->max_slotid = server_highest_slotid; } /* * (re)Initialise a slot table */ -static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, - u32 ivalue) +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) { - struct nfs4_slot *new = NULL; - int ret = -ENOMEM; + int ret; dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, max_reqs, tbl->max_slots); - /* Does the newly negotiated max_reqs match the existing slot table? */ - if (max_reqs != tbl->max_slots) { - new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS); - if (!new) - goto out; - } - ret = 0; + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + goto out; + + spin_lock(&tbl->slot_tbl_lock); + nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); + spin_unlock(&tbl->slot_tbl_lock); - nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); out: @@ -5786,18 +5830,28 @@ out: return ret; } +int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + int ret; + + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + return ret; + spin_lock(&tbl->slot_tbl_lock); + nfs4_shrink_slot_table(tbl, max_reqs); + tbl->max_slotid = max_reqs - 1; + spin_unlock(&tbl->slot_tbl_lock); + return 0; +} + /* Destroy the slot table */ static void nfs4_destroy_slot_tables(struct nfs4_session *session) { - if (session->fc_slot_table.slots != NULL) { - kfree(session->fc_slot_table.slots); - session->fc_slot_table.slots = NULL; - } - if (session->bc_slot_table.slots != NULL) { - kfree(session->bc_slot_table.slots); - session->bc_slot_table.slots = NULL; - } - return; + nfs4_shrink_slot_table(&session->fc_slot_table, 0); + nfs4_shrink_slot_table(&session->bc_slot_table, 0); } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1b7fa73c943..c14b2c7ac8a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2025,29 +2025,15 @@ out: static int nfs4_recall_slot(struct nfs_client *clp) { struct nfs4_slot_table *fc_tbl; - struct nfs4_slot *new, *old; - int i; + u32 new_size; if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - fc_tbl = &clp->cl_session->fc_slot_table; - new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS); - if (!new) - return -ENOMEM; - spin_lock(&fc_tbl->slot_tbl_lock); - for (i = 0; i <= fc_tbl->target_highest_slotid; i++) - new[i].seq_nr = fc_tbl->slots[i].seq_nr; - old = fc_tbl->slots; - fc_tbl->slots = new; - fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1; - fc_tbl->max_slotid = fc_tbl->target_highest_slotid; - clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots; - spin_unlock(&fc_tbl->slot_tbl_lock); - - kfree(old); - return 0; + fc_tbl = &clp->cl_session->fc_slot_table; + new_size = fc_tbl->server_highest_slotid + 1; + return nfs4_resize_slot_table(fc_tbl, new_size, 1); } static int nfs4_bind_conn_to_session(struct nfs_client *clp) -- cgit v1.2.3 From afa296103ea3841fdc81d9d66902fe49bb765527 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Nov 2012 20:12:38 -0500 Subject: NFSv4.1: Remove the state manager code to resize the slot table The state manager no longer needs any special machinery to stop the session flow and resize the slot table. It is all done on the fly by the SEQUENCE op code now. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 1 - fs/nfs/nfs4_fs.h | 4 ---- fs/nfs/nfs4proc.c | 17 ----------------- fs/nfs/nfs4state.c | 33 --------------------------------- 4 files changed, 55 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a0546eca6f6..8610bd1d136 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -570,7 +570,6 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, status = htonl(NFS4_OK); nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid); - nfs41_handle_recall_slot(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 856bc496a21..fa1a055a8fe 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -21,7 +21,6 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, NFS4CLNT_SESSION_RESET, - NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, @@ -260,8 +259,6 @@ extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); -extern int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, - u32 max_reqs, u32 ivalue); static inline bool is_ds_only_client(struct nfs_client *clp) @@ -358,7 +355,6 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); -extern void nfs41_handle_recall_slot(struct nfs_client *clp); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fc65300172e..0642e28704d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5830,23 +5830,6 @@ out: return ret; } -int nfs4_resize_slot_table(struct nfs4_slot_table *tbl, - u32 max_reqs, u32 ivalue) -{ - int ret; - - if (max_reqs > NFS4_MAX_SLOT_TABLE) - max_reqs = NFS4_MAX_SLOT_TABLE; - ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); - if (ret) - return ret; - spin_lock(&tbl->slot_tbl_lock); - nfs4_shrink_slot_table(tbl, max_reqs); - tbl->max_slotid = max_reqs - 1; - spin_unlock(&tbl->slot_tbl_lock); - return 0; -} - /* Destroy the slot table */ static void nfs4_destroy_slot_tables(struct nfs4_session *session) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c14b2c7ac8a..3940cd43fa9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -302,7 +302,6 @@ static void nfs41_finish_session_reset(struct nfs_client *clp) clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ - clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); nfs41_setup_state_renewal(clp); } @@ -1905,14 +1904,6 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); -void nfs41_handle_recall_slot(struct nfs_client *clp) -{ - set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); - dprintk("%s: scheduling slot recall for server %s\n", __func__, - clp->cl_hostname); - nfs4_schedule_state_manager(clp); -} - static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { @@ -2022,20 +2013,6 @@ out: return status; } -static int nfs4_recall_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *fc_tbl; - u32 new_size; - - if (!nfs4_has_session(clp)) - return 0; - nfs4_begin_drain_session(clp); - - fc_tbl = &clp->cl_session->fc_slot_table; - new_size = fc_tbl->server_highest_slotid + 1; - return nfs4_resize_slot_table(fc_tbl, new_size, 1); -} - static int nfs4_bind_conn_to_session(struct nfs_client *clp) { struct rpc_cred *cred; @@ -2066,7 +2043,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } -static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { @@ -2126,15 +2102,6 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; } - /* Recall session slots */ - if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) { - section = "recall slot"; - status = nfs4_recall_slot(clp); - if (status < 0) - goto out_error; - continue; - } - /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { section = "reclaim reboot"; -- cgit v1.2.3 From ac0748359a55faf4618f5f0bd9f9bf967c41d218 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Nov 2012 09:06:11 -0500 Subject: NFSv4.1: CB_RECALL_SLOT must schedule a sequence op after updating targets RFC5661 requires us to make sure that the server knows we've updated our slot table size by sending at least one SEQUENCE op containing the new 'highest_slotid' value. We can do so using the 'CHECK_LEASE' functionality of the state manager. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 1 + fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 8610bd1d136..f99faad78c7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -570,6 +570,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, status = htonl(NFS4_OK); nfs41_set_target_slotid(fc_tbl, args->crsa_target_highest_slotid); + nfs41_server_notify_target_slotid_update(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fa1a055a8fe..0a109ec75e6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -334,6 +334,7 @@ struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); +extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3940cd43fa9..896be2126f7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1904,6 +1904,18 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); +static void nfs41_ping_server(struct nfs_client *clp) +{ + /* Use CHECK_LEASE to ping the server with a SEQUENCE */ + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + nfs4_schedule_state_manager(clp); +} + +void nfs41_server_notify_target_slotid_update(struct nfs_client *clp) +{ + nfs41_ping_server(clp); +} + static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { -- cgit v1.2.3 From 69d206b5b39e298755b60e8e7056cb240182eb95 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Nov 2012 13:21:02 -0500 Subject: NFSv4.1: If slot allocation fails due to OOM, retry more quickly If the NFSv4.1 session slot allocation fails due to an ENOMEM condition, then set the task->tk_timeout to 1/4 second to ensure that we do retry the slot allocation more quickly. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0642e28704d..e9e4d6393f1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -662,7 +662,7 @@ static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, return slot; p = &slot->next; } - return NULL; + return ERR_PTR(-ENOMEM); } /* @@ -676,7 +676,7 @@ static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, */ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) { - struct nfs4_slot *ret = NULL; + struct nfs4_slot *ret = ERR_PTR(-EBUSY); u32 slotid; dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", @@ -686,7 +686,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) if (slotid > tbl->max_slotid) goto out; ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); - if (ret == NULL) + if (IS_ERR(ret)) goto out; __set_bit(slotid, tbl->used_slots); if (slotid > tbl->highest_used_slotid || @@ -698,7 +698,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) out: dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, - ret ? ret->slot_nr : -1); + !IS_ERR(ret) ? ret->slot_nr : -1); return ret; } @@ -727,6 +727,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, tbl = &session->fc_slot_table; + task->tk_timeout = 0; + spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { @@ -746,7 +748,10 @@ int nfs41_setup_sequence(struct nfs4_session *session, } slot = nfs4_alloc_slot(tbl); - if (slot == NULL) { + if (IS_ERR(slot)) { + /* If out of memory, try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); @@ -5778,7 +5783,7 @@ static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, { if (max_reqs <= tbl->max_slots) return 0; - if (nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS)) + if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))) return 0; return -ENOMEM; } -- cgit v1.2.3 From 5d63360dd8daffc2bc86531e9a44ff9d4881b102 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Nov 2012 13:09:38 -0500 Subject: NFSv4.1: Clean up session draining Coalesce nfs4_check_drain_bc_complete and nfs4_check_drain_fc_complete into a single function that can be called when the slot table is known to be empty, then change nfs4_callback_free_slot() and nfs4_free_slot() to use it. Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 -- fs/nfs/callback_xdr.c | 2 +- fs/nfs/nfs4_fs.h | 8 ++++++++ fs/nfs/nfs4proc.c | 38 ++++++-------------------------------- fs/nfs/nfs4state.c | 10 ++++++++++ 5 files changed, 25 insertions(+), 35 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index e75631e264f..efd54f0a4c4 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -167,8 +167,6 @@ extern __be32 nfs4_callback_layoutrecall( struct cb_layoutrecallargs *args, void *dummy, struct cb_process_state *cps); -extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); - struct cb_devicenotifyitem { uint32_t cbd_notify_type; uint32_t cbd_layout_type; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 81e8c7d4c2e..ea6a7b190e6 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -762,7 +762,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * A single slot, so highest used slotid is either 0 or -1 */ tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_check_drain_bc_complete(session); + nfs4_session_drain_complete(session, tbl); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0a109ec75e6..16b19372c4b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -335,6 +335,14 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); + +extern void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl); + +static inline bool nfs4_session_draining(struct nfs4_session *session) +{ + return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); +} #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9e4d6393f1..0b0f11be40f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -445,8 +445,10 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) u32 new_max = find_last_bit(tbl->used_slots, slotid); if (new_max < slotid) tbl->highest_used_slotid = new_max; - else + else { tbl->highest_used_slotid = NFS4_NO_SLOT; + nfs4_session_drain_complete(tbl->session, tbl); + } } dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, slotid, tbl->highest_used_slotid); @@ -458,36 +460,6 @@ bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) return true; } -/* - * Signal state manager thread if session fore channel is drained - */ -static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) -{ - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, - nfs4_set_task_privileged, NULL); - return; - } - - if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) - return; - - dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); - complete(&ses->fc_slot_table.complete); -} - -/* - * Signal state manager thread if session back channel is drained - */ -void nfs4_check_drain_bc_complete(struct nfs4_session *ses) -{ - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || - ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) - return; - dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); - complete(&ses->bc_slot_table.complete); -} - static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_session *session; @@ -504,7 +476,9 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) spin_lock(&tbl->slot_tbl_lock); nfs4_free_slot(tbl, res->sr_slot); - nfs4_check_drain_fc_complete(session); + if (!nfs4_session_draining(session)) + rpc_wake_up_first(&tbl->slot_tbl_waitq, + nfs4_set_task_privileged, NULL); spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 896be2126f7..1fb3e6c6f99 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -271,6 +271,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } +/* + * Signal state manager thread if session fore channel is drained + */ +void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl) +{ + if (nfs4_session_draining(session)) + complete(&tbl->complete); +} + static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { spin_lock(&tbl->slot_tbl_lock); -- cgit v1.2.3 From 330212796756ca2752b2a70a83860e145b77487c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 13:13:29 -0500 Subject: NFSv4: Move nfs4_wait_clnt_recover and nfs4_client_recover_expired_lease nfs4_wait_clnt_recover and nfs4_client_recover_expired_lease are both generic state related functions. As such, they belong in nfs4state.c, and not nfs4proc.c Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4proc.c | 36 ------------------------------------ fs/nfs/nfs4state.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 16b19372c4b..2f6a9f9d929 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -11,6 +11,8 @@ #if IS_ENABLED(CONFIG_NFS_V4) +#define NFS4_MAX_LOOP_ON_RECOVER (10) + struct idmap; enum nfs4_client_state { @@ -360,6 +362,8 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); +extern int nfs4_wait_clnt_recover(struct nfs_client *clp); +extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b0f11be40f..d75e2a2576e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -70,8 +70,6 @@ #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) -#define NFS4_MAX_LOOP_ON_RECOVER (10) - struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); @@ -255,22 +253,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start); } -static int nfs4_wait_clnt_recover(struct nfs_client *clp) -{ - int res; - - might_sleep(); - - res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (res) - return res; - - if (clp->cl_cons_state < 0) - return clp->cl_cons_state; - return 0; -} - static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) { int res = 0; @@ -1883,24 +1865,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int nfs4_client_recover_expired_lease(struct nfs_client *clp) -{ - unsigned int loop; - int ret; - - for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { - ret = nfs4_wait_clnt_recover(clp); - if (ret != 0) - break; - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && - !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) - break; - nfs4_schedule_state_manager(clp); - ret = -EIO; - } - return ret; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { return nfs4_client_recover_expired_lease(server->nfs_client); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fb3e6c6f99..1077b969838 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1216,6 +1216,40 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); +int nfs4_wait_clnt_recover(struct nfs_client *clp) +{ + int res; + + might_sleep(); + + res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (res) + return res; + + if (clp->cl_cons_state < 0) + return clp->cl_cons_state; + return 0; +} + +int nfs4_client_recover_expired_lease(struct nfs_client *clp) +{ + unsigned int loop; + int ret; + + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { + ret = nfs4_wait_clnt_recover(clp); + if (ret != 0) + break; + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && + !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) + break; + nfs4_schedule_state_manager(clp); + ret = -EIO; + } + return ret; +} + /* * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN * @clp: client to process -- cgit v1.2.3 From 73e39aaa8366694450cd6034050f542f965e277d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 12:49:34 -0500 Subject: NFSv4.1: Cleanup move session slot management to fs/nfs/nfs4session.c NFSv4.1 session management is getting complex enough to deserve a separate file. Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 2 +- fs/nfs/callback_proc.c | 1 + fs/nfs/internal.h | 2 - fs/nfs/nfs4_fs.h | 11 -- fs/nfs/nfs4client.c | 1 + fs/nfs/nfs4filelayoutdev.c | 1 + fs/nfs/nfs4proc.c | 415 +----------------------------------------- fs/nfs/nfs4session.c | 436 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4session.h | 35 ++++ 9 files changed, 477 insertions(+), 427 deletions(-) create mode 100644 fs/nfs/nfs4session.c create mode 100644 fs/nfs/nfs4session.h (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b7db60897f9..cce2c057bd2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -24,7 +24,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o nfs4getroot.o nfs4client.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f99faad78c7..c89b26bc975 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -14,6 +14,7 @@ #include "delegation.h" #include "internal.h" #include "pnfs.h" +#include "nfs4session.h" #ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 05521cadac2..8965a998b30 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -276,8 +276,6 @@ extern const u32 nfs41_maxwrite_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif -extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); - /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2f6a9f9d929..cd3e3096b60 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -249,19 +249,13 @@ extern int nfs4_setup_sequence(const struct nfs_server *server, extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); -extern void nfs4_destroy_session(struct nfs4_session *session); -extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); -extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); -extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, - u32 target_highest_slotid); - static inline bool is_ds_only_client(struct nfs_client *clp) { @@ -287,11 +281,6 @@ static inline int nfs4_setup_sequence(const struct nfs_server *server, return 0; } -static inline int nfs4_init_session(struct nfs_server *server) -{ - return 0; -} - static inline bool is_ds_only_client(struct nfs_client *clp) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 72717e67b34..acc34726812 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -12,6 +12,7 @@ #include "internal.h" #include "callback.h" #include "delegation.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 93e2530d709..b720064bcd7 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -33,6 +33,7 @@ #include #include "internal.h" +#include "nfs4session.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d75e2a2576e..a0c35ab12a6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -64,6 +63,8 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "nfs4session.h" + #define NFSDBG_FACILITY NFSDBG_PROC @@ -378,64 +379,6 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp #if defined(CONFIG_NFS_V4_1) -/* - * nfs4_shrink_slot_table - free retired slots from the slot table - */ -static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) -{ - struct nfs4_slot **p; - if (newsize >= tbl->max_slots) - return; - - p = &tbl->slots; - while (newsize--) - p = &(*p)->next; - while (*p) { - struct nfs4_slot *slot = *p; - - *p = slot->next; - kfree(slot); - tbl->max_slots--; - } -} - -/* - * nfs4_free_slot - free a slot and efficiently update slot table. - * - * freeing a slot is trivially done by clearing its respective bit - * in the bitmap. - * If the freed slotid equals highest_used_slotid we want to update it - * so that the server would be able to size down the slot table if needed, - * otherwise we know that the highest_used_slotid is still in use. - * When updating highest_used_slotid there may be "holes" in the bitmap - * so we need to scan down from highest_used_slotid to 0 looking for the now - * highest slotid in use. - * If none found, highest_used_slotid is set to NFS4_NO_SLOT. - * - * Must be called while holding tbl->slot_tbl_lock - */ -static void -nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) -{ - u32 slotid = slot->slot_nr; - - /* clear used bit in bitmap */ - __clear_bit(slotid, tbl->used_slots); - - /* update highest_used_slotid when it is freed */ - if (slotid == tbl->highest_used_slotid) { - u32 new_max = find_last_bit(tbl->used_slots, slotid); - if (new_max < slotid) - tbl->highest_used_slotid = new_max; - else { - tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_session_drain_complete(tbl->session, tbl); - } - } - dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, - slotid, tbl->highest_used_slotid); -} - bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) { rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); @@ -465,56 +408,6 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) res->sr_slot = NULL; } -/* Update the client's idea of target_highest_slotid */ -static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, - u32 target_highest_slotid) -{ - unsigned int max_slotid, i; - - if (tbl->target_highest_slotid == target_highest_slotid) - return; - tbl->target_highest_slotid = target_highest_slotid; - tbl->generation++; - - max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); - for (i = tbl->max_slotid + 1; i <= max_slotid; i++) - rpc_wake_up_next(&tbl->slot_tbl_waitq); - tbl->max_slotid = max_slotid; -} - -void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, - u32 target_highest_slotid) -{ - spin_lock(&tbl->slot_tbl_lock); - nfs41_set_target_slotid_locked(tbl, target_highest_slotid); - spin_unlock(&tbl->slot_tbl_lock); -} - -static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, - u32 highest_slotid) -{ - if (tbl->server_highest_slotid == highest_slotid) - return; - if (tbl->highest_used_slotid > highest_slotid) - return; - /* Deallocate slots */ - nfs4_shrink_slot_table(tbl, highest_slotid + 1); - tbl->server_highest_slotid = highest_slotid; -} - -static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, - struct nfs4_slot *slot, - struct nfs4_sequence_res *res) -{ - spin_lock(&tbl->slot_tbl_lock); - if (tbl->generation != slot->generation) - goto out; - nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); - nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); -out: - spin_unlock(&tbl->slot_tbl_lock); -} - static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; @@ -585,79 +478,6 @@ static int nfs4_sequence_done(struct rpc_task *task, return nfs41_sequence_done(task, res); } -static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, - u32 slotid, u32 seq_init, gfp_t gfp_mask) -{ - struct nfs4_slot *slot; - - slot = kzalloc(sizeof(*slot), gfp_mask); - if (slot) { - slot->table = tbl; - slot->slot_nr = slotid; - slot->seq_nr = seq_init; - } - return slot; -} - -static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, - u32 slotid, u32 seq_init, gfp_t gfp_mask) -{ - struct nfs4_slot **p, *slot; - - p = &tbl->slots; - for (;;) { - if (*p == NULL) { - *p = nfs4_new_slot(tbl, tbl->max_slots, - seq_init, gfp_mask); - if (*p == NULL) - break; - tbl->max_slots++; - } - slot = *p; - if (slot->slot_nr == slotid) - return slot; - p = &slot->next; - } - return ERR_PTR(-ENOMEM); -} - -/* - * nfs4_alloc_slot - efficiently look for a free slot - * - * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. - * If found, we mark the slot as used, update the highest_used_slotid, - * and respectively set up the sequence operation args. - * - * Note: must be called with under the slot_tbl_lock. - */ -static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) -{ - struct nfs4_slot *ret = ERR_PTR(-EBUSY); - u32 slotid; - - dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", - __func__, tbl->used_slots[0], tbl->highest_used_slotid, - tbl->max_slotid + 1); - slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); - if (slotid > tbl->max_slotid) - goto out; - ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); - if (IS_ERR(ret)) - goto out; - __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid || - tbl->highest_used_slotid == NFS4_NO_SLOT) - tbl->highest_used_slotid = slotid; - ret->renewal_time = jiffies; - ret->generation = tbl->generation; - -out: - dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", - __func__, tbl->used_slots[0], tbl->highest_used_slotid, - !IS_ERR(ret) ? ret->slot_nr : -1); - return ret; -} - static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { @@ -5716,143 +5536,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } -static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, - u32 max_reqs, u32 ivalue) -{ - if (max_reqs <= tbl->max_slots) - return 0; - if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))) - return 0; - return -ENOMEM; -} - -static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, - u32 server_highest_slotid, - u32 ivalue) -{ - struct nfs4_slot **p; - - nfs4_shrink_slot_table(tbl, server_highest_slotid + 1); - p = &tbl->slots; - while (*p) { - (*p)->seq_nr = ivalue; - p = &(*p)->next; - } - tbl->highest_used_slotid = NFS4_NO_SLOT; - tbl->target_highest_slotid = server_highest_slotid; - tbl->server_highest_slotid = server_highest_slotid; - tbl->max_slotid = server_highest_slotid; -} - -/* - * (re)Initialise a slot table - */ -static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, - u32 max_reqs, u32 ivalue) -{ - int ret; - - dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, - max_reqs, tbl->max_slots); - - if (max_reqs > NFS4_MAX_SLOT_TABLE) - max_reqs = NFS4_MAX_SLOT_TABLE; - - ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); - if (ret) - goto out; - - spin_lock(&tbl->slot_tbl_lock); - nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); - spin_unlock(&tbl->slot_tbl_lock); - - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, - tbl, tbl->slots, tbl->max_slots); -out: - dprintk("<-- %s: return %d\n", __func__, ret); - return ret; -} - -/* Destroy the slot table */ -static void nfs4_destroy_slot_tables(struct nfs4_session *session) -{ - nfs4_shrink_slot_table(&session->fc_slot_table, 0); - nfs4_shrink_slot_table(&session->bc_slot_table, 0); -} - -/* - * Initialize or reset the forechannel and backchannel tables - */ -static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) -{ - struct nfs4_slot_table *tbl; - int status; - - dprintk("--> %s\n", __func__); - /* Fore channel */ - tbl = &ses->fc_slot_table; - tbl->session = ses; - status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) /* -ENOMEM */ - return status; - /* Back channel */ - tbl = &ses->bc_slot_table; - tbl->session = ses; - status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); - if (status && tbl->slots == NULL) - /* Fore and back channel share a connection so get - * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); - return status; -} - -struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) -{ - struct nfs4_session *session; - struct nfs4_slot_table *tbl; - - session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); - if (!session) - return NULL; - - tbl = &session->fc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); - init_completion(&tbl->complete); - - tbl = &session->bc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); - init_completion(&tbl->complete); - - session->session_state = 1<clp = clp; - return session; -} - -void nfs4_destroy_session(struct nfs4_session *session) -{ - struct rpc_xprt *xprt; - struct rpc_cred *cred; - - cred = nfs4_get_exchange_id_cred(session->clp); - nfs4_proc_destroy_session(session, cred); - if (cred) - put_rpccred(cred); - - rcu_read_lock(); - xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); - rcu_read_unlock(); - dprintk("%s Destroy backchannel for xprt %p\n", - __func__, xprt); - xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); - nfs4_destroy_slot_tables(session); - kfree(session); -} - /* * Initialize the values to be used by the client in CREATE_SESSION * If nfs4_init_session set the fore channel request and response sizes, @@ -6046,100 +5729,6 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, return status; } -/* - * With sessions, the client is not marked ready until after a - * successful EXCHANGE_ID and CREATE_SESSION. - * - * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate - * other versions of NFS can be tried. - */ -static int nfs41_check_session_ready(struct nfs_client *clp) -{ - int ret; - - if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { - ret = nfs4_client_recover_expired_lease(clp); - if (ret) - return ret; - } - if (clp->cl_cons_state < NFS_CS_READY) - return -EPROTONOSUPPORT; - smp_rmb(); - return 0; -} - -int nfs4_init_session(struct nfs_server *server) -{ - struct nfs_client *clp = server->nfs_client; - struct nfs4_session *session; - unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; - unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; - - if (!nfs4_has_session(clp)) - return 0; - - if (server->rsize != 0) - target_max_resp_sz = server->rsize; - target_max_resp_sz += nfs41_maxread_overhead; - - if (server->wsize != 0) - target_max_rqst_sz = server->wsize; - target_max_rqst_sz += nfs41_maxwrite_overhead; - - session = clp->cl_session; - spin_lock(&clp->cl_lock); - if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - /* Initialise targets and channel attributes */ - session->fc_target_max_rqst_sz = target_max_rqst_sz; - session->fc_attrs.max_rqst_sz = target_max_rqst_sz; - session->fc_target_max_resp_sz = target_max_resp_sz; - session->fc_attrs.max_resp_sz = target_max_resp_sz; - } else { - /* Just adjust the targets */ - if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { - session->fc_target_max_rqst_sz = target_max_rqst_sz; - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - } - if (target_max_resp_sz > session->fc_target_max_resp_sz) { - session->fc_target_max_resp_sz = target_max_resp_sz; - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - } - } - spin_unlock(&clp->cl_lock); - - if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) - nfs4_schedule_lease_recovery(clp); - - return nfs41_check_session_ready(clp); -} - -int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) -{ - struct nfs4_session *session = clp->cl_session; - int ret; - - spin_lock(&clp->cl_lock); - if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - /* - * Do not set NFS_CS_CHECK_LEASE_TIME instead set the - * DS lease to be equal to the MDS lease. - */ - clp->cl_lease_time = lease_time; - clp->cl_last_renewal = jiffies; - } - spin_unlock(&clp->cl_lock); - - ret = nfs41_check_session_ready(clp); - if (ret) - return ret; - /* Test for the DS role */ - if (!is_ds_client(clp)) - return -ENODEV; - return 0; -} -EXPORT_SYMBOL_GPL(nfs4_init_ds_session); - - /* * Renew the cl_session lease. */ diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c new file mode 100644 index 00000000000..701170293ce --- /dev/null +++ b/fs/nfs/nfs4session.c @@ -0,0 +1,436 @@ +/* + * fs/nfs/nfs4session.c + * + * Copyright (c) 2012 Trond Myklebust + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nfs4_fs.h" +#include "internal.h" +#include "nfs4session.h" +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_STATE + +/* + * nfs4_shrink_slot_table - free retired slots from the slot table + */ +static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) +{ + struct nfs4_slot **p; + if (newsize >= tbl->max_slots) + return; + + p = &tbl->slots; + while (newsize--) + p = &(*p)->next; + while (*p) { + struct nfs4_slot *slot = *p; + + *p = slot->next; + kfree(slot); + tbl->max_slots--; + } +} + +/* + * nfs4_free_slot - free a slot and efficiently update slot table. + * + * freeing a slot is trivially done by clearing its respective bit + * in the bitmap. + * If the freed slotid equals highest_used_slotid we want to update it + * so that the server would be able to size down the slot table if needed, + * otherwise we know that the highest_used_slotid is still in use. + * When updating highest_used_slotid there may be "holes" in the bitmap + * so we need to scan down from highest_used_slotid to 0 looking for the now + * highest slotid in use. + * If none found, highest_used_slotid is set to NFS4_NO_SLOT. + * + * Must be called while holding tbl->slot_tbl_lock + */ +void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) +{ + u32 slotid = slot->slot_nr; + + /* clear used bit in bitmap */ + __clear_bit(slotid, tbl->used_slots); + + /* update highest_used_slotid when it is freed */ + if (slotid == tbl->highest_used_slotid) { + u32 new_max = find_last_bit(tbl->used_slots, slotid); + if (new_max < slotid) + tbl->highest_used_slotid = new_max; + else { + tbl->highest_used_slotid = NFS4_NO_SLOT; + nfs4_session_drain_complete(tbl->session, tbl); + } + } + dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + slotid, tbl->highest_used_slotid); +} + +static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot *slot; + + slot = kzalloc(sizeof(*slot), gfp_mask); + if (slot) { + slot->table = tbl; + slot->slot_nr = slotid; + slot->seq_nr = seq_init; + } + return slot; +} + +static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl, + u32 slotid, u32 seq_init, gfp_t gfp_mask) +{ + struct nfs4_slot **p, *slot; + + p = &tbl->slots; + for (;;) { + if (*p == NULL) { + *p = nfs4_new_slot(tbl, tbl->max_slots, + seq_init, gfp_mask); + if (*p == NULL) + break; + tbl->max_slots++; + } + slot = *p; + if (slot->slot_nr == slotid) + return slot; + p = &slot->next; + } + return ERR_PTR(-ENOMEM); +} + +/* + * nfs4_alloc_slot - efficiently look for a free slot + * + * nfs4_alloc_slot looks for an unset bit in the used_slots bitmap. + * If found, we mark the slot as used, update the highest_used_slotid, + * and respectively set up the sequence operation args. + * + * Note: must be called with under the slot_tbl_lock. + */ +struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) +{ + struct nfs4_slot *ret = ERR_PTR(-EBUSY); + u32 slotid; + + dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + tbl->max_slotid + 1); + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1); + if (slotid > tbl->max_slotid) + goto out; + ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + if (IS_ERR(ret)) + goto out; + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid || + tbl->highest_used_slotid == NFS4_NO_SLOT) + tbl->highest_used_slotid = slotid; + ret->renewal_time = jiffies; + ret->generation = tbl->generation; + +out: + dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + !IS_ERR(ret) ? ret->slot_nr : -1); + return ret; +} + +static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + if (max_reqs <= tbl->max_slots) + return 0; + if (!IS_ERR(nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))) + return 0; + return -ENOMEM; +} + +static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, + u32 server_highest_slotid, + u32 ivalue) +{ + struct nfs4_slot **p; + + nfs4_shrink_slot_table(tbl, server_highest_slotid + 1); + p = &tbl->slots; + while (*p) { + (*p)->seq_nr = ivalue; + p = &(*p)->next; + } + tbl->highest_used_slotid = NFS4_NO_SLOT; + tbl->target_highest_slotid = server_highest_slotid; + tbl->server_highest_slotid = server_highest_slotid; + tbl->max_slotid = server_highest_slotid; +} + +/* + * (re)Initialise a slot table + */ +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, + u32 max_reqs, u32 ivalue) +{ + int ret; + + dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + max_reqs, tbl->max_slots); + + if (max_reqs > NFS4_MAX_SLOT_TABLE) + max_reqs = NFS4_MAX_SLOT_TABLE; + + ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue); + if (ret) + goto out; + + spin_lock(&tbl->slot_tbl_lock); + nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); + spin_unlock(&tbl->slot_tbl_lock); + + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + nfs4_shrink_slot_table(&session->fc_slot_table, 0); + nfs4_shrink_slot_table(&session->bc_slot_table, 0); +} + +/* Update the client's idea of target_highest_slotid */ +static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + unsigned int max_slotid, i; + + if (tbl->target_highest_slotid == target_highest_slotid) + return; + tbl->target_highest_slotid = target_highest_slotid; + tbl->generation++; + + max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); + for (i = tbl->max_slotid + 1; i <= max_slotid; i++) + rpc_wake_up_next(&tbl->slot_tbl_waitq); + tbl->max_slotid = max_slotid; +} + +void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + spin_lock(&tbl->slot_tbl_lock); + nfs41_set_target_slotid_locked(tbl, target_highest_slotid); + spin_unlock(&tbl->slot_tbl_lock); +} + +static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, + u32 highest_slotid) +{ + if (tbl->server_highest_slotid == highest_slotid) + return; + if (tbl->highest_used_slotid > highest_slotid) + return; + /* Deallocate slots */ + nfs4_shrink_slot_table(tbl, highest_slotid + 1); + tbl->server_highest_slotid = highest_slotid; +} + +void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res) +{ + spin_lock(&tbl->slot_tbl_lock); + if (tbl->generation != slot->generation) + goto out; + nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); + nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); +out: + spin_unlock(&tbl->slot_tbl_lock); +} + +/* + * Initialize or reset the forechannel and backchannel tables + */ +int nfs4_setup_session_slot_tables(struct nfs4_session *ses) +{ + struct nfs4_slot_table *tbl; + int status; + + dprintk("--> %s\n", __func__); + /* Fore channel */ + tbl = &ses->fc_slot_table; + tbl->session = ses; + status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); + if (status) /* -ENOMEM */ + return status; + /* Back channel */ + tbl = &ses->bc_slot_table; + tbl->session = ses; + status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + if (status && tbl->slots == NULL) + /* Fore and back channel share a connection so get + * both slot tables or neither */ + nfs4_destroy_slot_tables(ses); + return status; +} + +struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) +{ + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + + session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); + if (!session) + return NULL; + + tbl = &session->fc_slot_table; + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + init_completion(&tbl->complete); + + tbl = &session->bc_slot_table; + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + init_completion(&tbl->complete); + + session->session_state = 1<clp = clp; + return session; +} + +void nfs4_destroy_session(struct nfs4_session *session) +{ + struct rpc_xprt *xprt; + struct rpc_cred *cred; + + cred = nfs4_get_exchange_id_cred(session->clp); + nfs4_proc_destroy_session(session, cred); + if (cred) + put_rpccred(cred); + + rcu_read_lock(); + xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); + rcu_read_unlock(); + dprintk("%s Destroy backchannel for xprt %p\n", + __func__, xprt); + xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + nfs4_destroy_slot_tables(session); + kfree(session); +} + +/* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +static int nfs41_check_session_ready(struct nfs_client *clp) +{ + int ret; + + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) { + ret = nfs4_client_recover_expired_lease(clp); + if (ret) + return ret; + } + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + smp_rmb(); + return 0; +} + +int nfs4_init_session(struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_session *session; + unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; + unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; + + if (!nfs4_has_session(clp)) + return 0; + + if (server->rsize != 0) + target_max_resp_sz = server->rsize; + target_max_resp_sz += nfs41_maxread_overhead; + + if (server->wsize != 0) + target_max_rqst_sz = server->wsize; + target_max_rqst_sz += nfs41_maxwrite_overhead; + + session = clp->cl_session; + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { + /* Initialise targets and channel attributes */ + session->fc_target_max_rqst_sz = target_max_rqst_sz; + session->fc_attrs.max_rqst_sz = target_max_rqst_sz; + session->fc_target_max_resp_sz = target_max_resp_sz; + session->fc_attrs.max_resp_sz = target_max_resp_sz; + } else { + /* Just adjust the targets */ + if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { + session->fc_target_max_rqst_sz = target_max_rqst_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } + if (target_max_resp_sz > session->fc_target_max_resp_sz) { + session->fc_target_max_resp_sz = target_max_resp_sz; + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + } + } + spin_unlock(&clp->cl_lock); + + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + nfs4_schedule_lease_recovery(clp); + + return nfs41_check_session_ready(clp); +} + +int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) +{ + struct nfs4_session *session = clp->cl_session; + int ret; + + spin_lock(&clp->cl_lock); + if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { + /* + * Do not set NFS_CS_CHECK_LEASE_TIME instead set the + * DS lease to be equal to the MDS lease. + */ + clp->cl_lease_time = lease_time; + clp->cl_last_renewal = jiffies; + } + spin_unlock(&clp->cl_lock); + + ret = nfs41_check_session_ready(clp); + if (ret) + return ret; + /* Test for the DS role */ + if (!is_ds_client(clp)) + return -ENODEV; + return 0; +} +EXPORT_SYMBOL_GPL(nfs4_init_ds_session); + + diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h new file mode 100644 index 00000000000..cb47b1eb088 --- /dev/null +++ b/fs/nfs/nfs4session.h @@ -0,0 +1,35 @@ +/* + * fs/nfs/nfs4session.h + * + * Copyright (c) 2012 Trond Myklebust + * + */ +#ifndef __LINUX_FS_NFS_NFS4SESSION_H +#define __LINUX_FS_NFS_NFS4SESSION_H + +#if defined(CONFIG_NFS_V4_1) +extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); +extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); + +extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, + u32 target_highest_slotid); +extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot, + struct nfs4_sequence_res *res); + +extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); + +extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); +extern void nfs4_destroy_session(struct nfs4_session *session); +extern int nfs4_init_session(struct nfs_server *server); +extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); + +#else /* defined(CONFIG_NFS_V4_1) */ + +static inline int nfs4_init_session(struct nfs_server *server) +{ + return 0; +} + +#endif /* defined(CONFIG_NFS_V4_1) */ +#endif /* __LINUX_FS_NFS_NFS4SESSION_H */ -- cgit v1.2.3 From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 14:20:49 -0500 Subject: NFSv4.1: Move slot table and session struct definitions to nfs4session.h Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h. Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + fs/nfs/callback_xdr.c | 1 + fs/nfs/internal.h | 21 -------- fs/nfs/nfs4_fs.h | 12 ----- fs/nfs/nfs4filelayout.c | 1 + fs/nfs/nfs4session.h | 101 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4state.c | 1 + fs/nfs/nfs4xdr.c | 1 + fs/nfs/super.c | 1 + 9 files changed, 107 insertions(+), 33 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index f1027b06a1a..4fa788c93f4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -40,6 +40,7 @@ #include #include "../pnfs.h" +#include "../nfs4session.h" #include "../internal.h" #include "blocklayout.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index ea6a7b190e6..59461c957d9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -16,6 +16,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "nfs4session.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8965a998b30..9bdbfc3884a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -18,27 +18,6 @@ struct nfs_string; */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -/* - * Determine if sessions are in use. - */ -static inline int nfs4_has_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (clp->cl_session) - return 1; -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - -static inline int nfs4_has_persistent_session(const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) - return (clp->cl_session->flags & SESSION4_PERSIST); -#endif /* CONFIG_NFS_V4_1 */ - return 0; -} - static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index cd3e3096b60..322bd0168eb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -29,11 +29,6 @@ enum nfs4_client_state { NFS4CLNT_BIND_CONN_TO_SESSION, }; -enum nfs4_session_state { - NFS4_SESSION_INITING, - NFS4_SESSION_DRAINING, -}; - #define NFS4_RENEW_TIMEOUT 0x01 #define NFS4_RENEW_DELEGATION_CB 0x02 @@ -327,13 +322,6 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); -extern void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl); - -static inline bool nfs4_session_draining(struct nfs4_session *session) -{ - return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); -} #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index bfb28fa38e7..591a1a7f8f9 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -35,6 +35,7 @@ #include +#include "nfs4session.h" #include "internal.h" #include "delegation.h" #include "nfs4filelayout.h" diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index cb47b1eb088..e96323ff1d9 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -7,6 +7,68 @@ #ifndef __LINUX_FS_NFS_NFS4SESSION_H #define __LINUX_FS_NFS_NFS4SESSION_H +/* maximum number of slots to use */ +#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_MAX_SLOT_TABLE (256U) +#define NFS4_NO_SLOT ((u32)-1) + +#if IS_ENABLED(CONFIG_NFS_V4) + +/* Sessions slot seqid */ +struct nfs4_slot { + struct nfs4_slot_table *table; + struct nfs4_slot *next; + unsigned long generation; + unsigned long renewal_time; + u32 slot_nr; + u32 seq_nr; +}; + +/* Sessions */ +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +struct nfs4_slot_table { + struct nfs4_session *session; /* Parent session */ + struct nfs4_slot *slots; /* seqid per slot */ + unsigned long used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */ + spinlock_t slot_tbl_lock; + struct rpc_wait_queue slot_tbl_waitq; /* allocators may wait here */ + u32 max_slots; /* # slots in table */ + u32 max_slotid; /* Max allowed slotid value */ + u32 highest_used_slotid; /* sent to server on each SEQ. + * op for dynamic resizing */ + u32 target_highest_slotid; /* Server max_slot target */ + u32 server_highest_slotid; /* Server highest slotid */ + unsigned long generation; /* Generation counter for + target_highest_slotid */ + struct completion complete; +}; + +/* + * Session related parameters + */ +struct nfs4_session { + struct nfs4_sessionid sess_id; + u32 flags; + unsigned long session_state; + u32 hash_alg; + u32 ssv_len; + + /* The fore and back channel */ + struct nfs4_channel_attrs fc_attrs; + struct nfs4_slot_table fc_slot_table; + struct nfs4_channel_attrs bc_attrs; + struct nfs4_slot_table bc_slot_table; + struct nfs_client *clp; + /* Create session arguments */ + unsigned int fc_target_max_rqst_sz; + unsigned int fc_target_max_resp_sz; +}; + +enum nfs4_session_state { + NFS4_SESSION_INITING, + NFS4_SESSION_DRAINING, +}; + #if defined(CONFIG_NFS_V4_1) extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); @@ -24,6 +86,31 @@ extern void nfs4_destroy_session(struct nfs4_session *session); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); +extern void nfs4_session_drain_complete(struct nfs4_session *session, + struct nfs4_slot_table *tbl); + +static inline bool nfs4_session_draining(struct nfs4_session *session) +{ + return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); +} + +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + if (clp->cl_session) + return 1; + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) + return (clp->cl_session->flags & SESSION4_PERSIST); + return 0; +} + #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_server *server) @@ -31,5 +118,19 @@ static inline int nfs4_init_session(struct nfs_server *server) return 0; } +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ + return 0; +} + +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ + return 0; +} + #endif /* defined(CONFIG_NFS_V4_1) */ +#endif /* IS_ENABLED(CONFIG_NFS_V4) */ #endif /* __LINUX_FS_NFS_NFS4SESSION_H */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1077b969838..1402283d152 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -57,6 +57,7 @@ #include "callback.h" #include "delegation.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a67040f5159..e786dc7582b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -56,6 +56,7 @@ #include "nfs4_fs.h" #include "internal.h" +#include "nfs4session.h" #include "pnfs.h" #include "netns.h" diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 652d3f7176a..e12cea4b36a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -64,6 +64,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "nfs4session.h" #include "pnfs.h" #include "nfs.h" -- cgit v1.2.3 From 0ca3f4825ac92a10aa8f6534f765c44f22778dd3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Nov 2012 22:34:45 -0500 Subject: NFSv4.1: Set the maximum slot table size to 1024 slots This means that we end up statically allocating 128 bytes for the bitmap on each slot table. For a server that supports 1MB write and read I/O sizes this means that we can completely fill the maximum 1GB TCP send/receive windows. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index e96323ff1d9..bdd14a60722 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -9,7 +9,7 @@ /* maximum number of slots to use */ #define NFS4_DEF_SLOT_TABLE_SIZE (16U) -#define NFS4_MAX_SLOT_TABLE (256U) +#define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) #if IS_ENABLED(CONFIG_NFS_V4) -- cgit v1.2.3 From c10e449827e6008ef5a4a71c0247c7eb73948e1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Nov 2012 16:16:54 -0500 Subject: NFSv4.1: Ping server when our session table limits are too high If the server requests a lower target_highest_slotid, then ensure that we ping it with at least one RPC call containing an appropriate SEQUENCE op. This ensures that the server won't need to send a recall callback in order to shrink the slot table. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 20 +++++++++++++++++--- fs/nfs/nfs4state.c | 5 +++++ 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 322bd0168eb..8fe155ba16d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -321,6 +321,7 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp); +extern void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0c35ab12a6..ecd4ed3a4f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -389,6 +389,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot_table *tbl; + bool send_new_highest_used_slotid = false; if (!res->sr_slot) { /* just wake up the next guy waiting since @@ -400,12 +401,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) session = tbl->session; spin_lock(&tbl->slot_tbl_lock); + /* Be nice to the server: try to ensure that the last transmitted + * value for highest_user_slotid <= target_highest_slotid + */ + if (tbl->highest_used_slotid > tbl->target_highest_slotid) + send_new_highest_used_slotid = true; + nfs4_free_slot(tbl, res->sr_slot); - if (!nfs4_session_draining(session)) - rpc_wake_up_first(&tbl->slot_tbl_waitq, - nfs4_set_task_privileged, NULL); + + if (tbl->highest_used_slotid != NFS4_NO_SLOT) + send_new_highest_used_slotid = false; + if (!nfs4_session_draining(session)) { + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, + nfs4_set_task_privileged, NULL) != NULL) + send_new_highest_used_slotid = false; + } spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; + if (send_new_highest_used_slotid) + nfs41_server_notify_highest_slotid_update(session->clp); } static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1402283d152..c137421f212 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1961,6 +1961,11 @@ void nfs41_server_notify_target_slotid_update(struct nfs_client *clp) nfs41_ping_server(clp); } +void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp) +{ + nfs41_ping_server(clp); +} + static void nfs4_reset_all_state(struct nfs_client *clp) { if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { -- cgit v1.2.3 From 6ba7db3420c0dbf3ede16f19a593e6a80edc043f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Oct 2012 20:07:20 -0400 Subject: NFSv4.1: Use nfs41_setup_sequence where appropriate There is no point in using nfs4_setup_sequence or nfs4_sequence_done in pure NFSv4.1 functions. We already know that those have sessions... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ecd4ed3a4f6..39d24158f97 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -597,10 +597,11 @@ struct nfs41_call_sync_data { static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { struct nfs41_call_sync_data *data = calldata; + struct nfs4_session *session = nfs4_get_session(data->seq_server); dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); - if (nfs4_setup_sequence(data->seq_server, data->seq_args, + if (nfs41_setup_sequence(session, data->seq_args, data->seq_res, task)) return; rpc_call_start(task); @@ -6018,6 +6019,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct nfs4_session *session = nfs4_get_session(server); dprintk("--> %s\n", __func__); /* Note the is a race here, where a CB_LAYOUTRECALL can come in @@ -6025,7 +6027,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) * However, that is not so catastrophic, and there seems * to be no way to prevent it completely. */ - if (nfs4_setup_sequence(server, &lgp->args.seq_args, + if (nfs41_setup_sequence(session, &lgp->args.seq_args, &lgp->res.seq_res, task)) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, @@ -6047,7 +6049,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &lgp->res.seq_res)) + if (!nfs41_sequence_done(task, &lgp->res.seq_res)) goto out; switch (task->tk_status) { @@ -6211,7 +6213,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &lrp->res.seq_res)) + if (!nfs41_sequence_done(task, &lrp->res.seq_res)) return; server = NFS_SERVER(lrp->args.inode); @@ -6360,8 +6362,9 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); + struct nfs4_session *session = nfs4_get_session(server); - if (nfs4_setup_sequence(server, &data->args.seq_args, + if (nfs41_setup_sequence(session, &data->args.seq_args, &data->res.seq_res, task)) return; rpc_call_start(task); @@ -6373,7 +6376,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs41_sequence_done(task, &data->res.seq_res)) return; switch (task->tk_status) { /* Just ignore these failures */ -- cgit v1.2.3 From d9afbd1b0889e7da6742e9c67ccc7becc4161f65 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Oct 2012 20:28:44 -0400 Subject: NFSv4.1: Simplify the sequence setup Nobody calls nfs4_setup_sequence or nfs41_setup_sequence without also calling rpc_call_start() on success. This commit therefore folds the rpc_call_start call into nfs41_setup_sequence(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4filelayout.c | 30 +++++------- fs/nfs/nfs4proc.c | 125 +++++++++++++++++++----------------------------- 3 files changed, 62 insertions(+), 94 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8fe155ba16d..8022adec34c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -273,6 +273,7 @@ static inline int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) { + rpc_call_start(task); return 0; } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 591a1a7f8f9..1e42413fab8 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -307,12 +307,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) } rdata->read_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, &rdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(rdata->ds_clp->cl_session, + &rdata->args.seq_args, + &rdata->res.seq_res, + task); } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -409,12 +407,10 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, &wdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); } static void filelayout_write_call_done(struct rpc_task *task, void *data) @@ -450,12 +446,10 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, &wdata->res.seq_res, - task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); } static void filelayout_write_commit_done(struct rpc_task *task, void *data) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39d24158f97..23b0c2fcb05 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -513,7 +513,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, dprintk("--> %s\n", __func__); /* slot already allocated? */ if (res->sr_slot != NULL) - return 0; + goto out_success; tbl = &session->fc_slot_table; @@ -563,6 +563,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, * set to 1 if an rpc level failure occurs. */ res->sr_status = 1; +out_success: + rpc_call_start(task); return 0; } EXPORT_SYMBOL_GPL(nfs41_setup_sequence); @@ -575,8 +577,10 @@ int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) + if (session == NULL) { + rpc_call_start(task); goto out; + } dprintk("--> %s clp %p session %p sr_slot %d\n", __func__, session->clp, session, res->sr_slot ? @@ -601,10 +605,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); - if (nfs41_setup_sequence(session, data->seq_args, - data->seq_res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); } static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata) @@ -1485,8 +1486,6 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) &data->o_res.seq_res, task) != 0) nfs_release_seqid(data->o_arg.seqid); - else - rpc_call_start(task); return; unlock_no_action: rcu_read_unlock(); @@ -2192,8 +2191,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - else - rpc_call_start(task); out: dprintk("%s: done!\n", __func__); } @@ -2932,12 +2929,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->dir), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) @@ -2965,12 +2960,10 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->old_dir), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3459,12 +3452,10 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->header->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3525,22 +3516,18 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->header->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task); } static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) @@ -4187,11 +4174,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (nfs4_setup_sequence(d_data->res.server, - &d_data->args.seq_args, - &d_data->res.seq_res, task)) - return; - rpc_call_start(task); + nfs4_setup_sequence(d_data->res.server, + &d_data->args.seq_args, + &d_data->res.seq_res, + task); } #endif /* CONFIG_NFS_V4_1 */ @@ -4445,8 +4431,6 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - else - rpc_call_start(task); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4601,10 +4585,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) if (nfs4_setup_sequence(data->server, &data->arg.seq_args, &data->res.seq_res, - task) == 0) { - rpc_call_start(task); + task) == 0) return; - } nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); @@ -5462,7 +5444,6 @@ struct nfs4_get_lease_time_data { static void nfs4_get_lease_time_prepare(struct rpc_task *task, void *calldata) { - int ret; struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; @@ -5470,12 +5451,10 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); /* just setup sequence, do not trigger session recovery since we're invoked within one */ - ret = nfs41_setup_sequence(data->clp->cl_session, - &data->args->la_seq_args, - &data->res->lr_seq_res, task); - - if (ret != -EAGAIN) - rpc_call_start(task); + nfs41_setup_sequence(data->clp->cl_session, + &data->args->la_seq_args, + &data->res->lr_seq_res, + task); dprintk("<-- %s\n", __func__); } @@ -5809,9 +5788,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - if (nfs41_setup_sequence(clp->cl_session, args, res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(clp->cl_session, args, res, task); } static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data) @@ -5914,12 +5891,10 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) struct nfs4_reclaim_complete_data *calldata = data; rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - if (nfs41_setup_sequence(calldata->clp->cl_session, - &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - - rpc_call_start(task); + nfs41_setup_sequence(calldata->clp->cl_session, + &calldata->arg.seq_args, + &calldata->res.seq_res, + task); } static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp) @@ -6034,9 +6009,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) NFS_I(lgp->args.inode)->layout, lgp->args.ctx->state)) { rpc_exit(task, NFS4_OK); - return; } - rpc_call_start(task); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) @@ -6200,10 +6173,10 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, - &lrp->res.seq_res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(lrp->clp->cl_session, + &lrp->args.seq_args, + &lrp->res.seq_res, + task); } static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) @@ -6364,10 +6337,10 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->args.inode); struct nfs4_session *session = nfs4_get_session(server); - if (nfs41_setup_sequence(session, &data->args.seq_args, - &data->res.seq_res, task)) - return; - rpc_call_start(task); + nfs41_setup_sequence(session, + &data->args.seq_args, + &data->res.seq_res, + task); } static void -- cgit v1.2.3 From fd0c09537a8494e9dccf3856b90058e1f97f1d62 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Nov 2012 14:43:38 -0400 Subject: NFSv4: Simplify the NFSv4/v4.1 synchronous call switch We shouldn't need to pass the 'cache_reply' parameter if we initialise the sequence_args/sequence_res in the caller. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 12 ------------ fs/nfs/nfs4_fs.h | 3 +-- fs/nfs/nfs4proc.c | 15 +++++++-------- 3 files changed, 8 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9bdbfc3884a..fb994471bd3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -451,18 +451,6 @@ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, rpc_authflavor_t authflavour); -extern int _nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); -extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8022adec34c..4f0cdc1b714 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -39,8 +39,7 @@ struct nfs4_minor_version_ops { struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply); + struct nfs4_sequence_res *res); bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 23b0c2fcb05..4aaaa3ba308 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -664,14 +664,13 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } +static int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply) + struct nfs4_sequence_res *res) { - nfs41_init_sequence(args, res, cache_reply); return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); } @@ -689,18 +688,17 @@ static int nfs4_sequence_done(struct rpc_task *task, } #endif /* CONFIG_NFS_V4_1 */ +static int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply) + struct nfs4_sequence_res *res) { - nfs41_init_sequence(args, res, cache_reply); return rpc_call_sync(clnt, msg, 0); } -static inline +static int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -708,8 +706,9 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { + nfs41_init_sequence(args, res, cache_reply); return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, - args, res, cache_reply); + args, res); } static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) -- cgit v1.2.3 From 7b939a3f44293516c4225f640e8c4b9200beeabc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Nov 2012 15:19:46 -0400 Subject: NFSv4.1: Clean up nfs41_setup_sequence Move all the sleep-and-exit cases into a single section of code. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4aaaa3ba308..87525eb60bd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -523,18 +523,14 @@ int nfs41_setup_sequence(struct nfs4_session *session, if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { /* The state manager will wait until the slot table is empty */ - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); dprintk("%s session is draining\n", __func__); - return -EAGAIN; + goto out_sleep; } if (!rpc_queue_empty(&tbl->slot_tbl_waitq) && !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); dprintk("%s enforce FIFO order\n", __func__); - return -EAGAIN; + goto out_sleep; } slot = nfs4_alloc_slot(tbl); @@ -542,10 +538,8 @@ int nfs41_setup_sequence(struct nfs4_session *session, /* If out of memory, try again in 1/4 second */ if (slot == ERR_PTR(-ENOMEM)) task->tk_timeout = HZ >> 2; - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); - return -EAGAIN; + goto out_sleep; } spin_unlock(&tbl->slot_tbl_lock); @@ -566,6 +560,10 @@ int nfs41_setup_sequence(struct nfs4_session *session, out_success: rpc_call_start(task); return 0; +out_sleep: + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; } EXPORT_SYMBOL_GPL(nfs41_setup_sequence); -- cgit v1.2.3 From 275e7e20aa8599719729f8ef4c09c9bfc4895642 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Nov 2012 17:07:07 -0400 Subject: NFSv4.1: Remove the 'FIFO' behaviour for nfs41_setup_sequence It is more important to preserve the task priority behaviour, which ensures that things like reclaim writes take precedence over background and kupdate writes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4proc.c | 15 +-------------- fs/nfs/nfs4state.c | 4 +--- 3 files changed, 2 insertions(+), 18 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4f0cdc1b714..4635bf51b3e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -236,7 +236,6 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } -extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87525eb60bd..4b1635ce658 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -379,12 +379,6 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp #if defined(CONFIG_NFS_V4_1) -bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - return true; -} - static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_session *session; @@ -412,8 +406,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) if (tbl->highest_used_slotid != NFS4_NO_SLOT) send_new_highest_used_slotid = false; if (!nfs4_session_draining(session)) { - if (rpc_wake_up_first(&tbl->slot_tbl_waitq, - nfs4_set_task_privileged, NULL) != NULL) + if (rpc_wake_up_next(&tbl->slot_tbl_waitq) != NULL) send_new_highest_used_slotid = false; } spin_unlock(&tbl->slot_tbl_lock); @@ -527,12 +520,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, goto out_sleep; } - if (!rpc_queue_empty(&tbl->slot_tbl_waitq) && - !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { - dprintk("%s enforce FIFO order\n", __func__); - goto out_sleep; - } - slot = nfs4_alloc_slot(tbl); if (IS_ERR(slot)) { /* If out of memory, try again in 1/4 second */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c137421f212..7d73df5a05d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -263,9 +263,7 @@ static void nfs4_end_drain_session(struct nfs_client *clp) if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { spin_lock(&tbl->slot_tbl_lock); for (i = 0; i <= tbl->max_slotid; i++) { - if (rpc_wake_up_first(&tbl->slot_tbl_waitq, - nfs4_set_task_privileged, - NULL) == NULL) + if (rpc_wake_up_next(&tbl->slot_tbl_waitq) == NULL) break; } spin_unlock(&tbl->slot_tbl_lock); -- cgit v1.2.3 From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Oct 2012 19:02:20 -0400 Subject: NFSv4: Clean up handling of privileged operations Privileged rpc calls are those that are run by the state recovery thread, in cases where we're trying to recover the system after a server reboot or a network partition. In those cases, we want to fence off all other rpc calls (see nfs4_begin_drain_session()) so that they don't end up using stateids or clientids that are in the process of being recovered. Prior to this patch, we had to set up special callback functions in order to declare an rpc call as being privileged. By adding a new field to the sequence arguments, this patch simplifies things considerably, and allows us to declare the rpc call as privileged before it is run. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 114 ++++++++++++++++++++---------------------------------- 1 file changed, 42 insertions(+), 72 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b1635ce658..38a709d7859 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -490,11 +490,17 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args, { args->sa_slot = NULL; args->sa_cache_this = 0; + args->sa_privileged = 0; if (cache_reply) args->sa_cache_this = 1; res->sr_slot = NULL; } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ + args->sa_privileged = 1; +} + int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -514,7 +520,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && - !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { + !args->sa_privileged) { /* The state manager will wait until the slot table is empty */ dprintk("%s session is draining\n", __func__); goto out_sleep; @@ -548,6 +554,9 @@ out_success: rpc_call_start(task); return 0; out_sleep: + /* Privileged tasks are queued with top priority */ + if (args->sa_privileged) + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; @@ -593,12 +602,6 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); } -static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs41_call_sync_prepare(task, calldata); -} - static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) { struct nfs41_call_sync_data *data = calldata; @@ -611,17 +614,11 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -static const struct rpc_call_ops nfs41_call_priv_sync_ops = { - .rpc_call_prepare = nfs41_call_priv_sync_prepare, - .rpc_call_done = nfs41_call_sync_done, -}; - static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int privileged) + struct nfs4_sequence_res *res) { int ret; struct rpc_task *task; @@ -637,8 +634,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; - if (privileged) - task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); if (IS_ERR(task)) ret = PTR_ERR(task); @@ -656,16 +651,21 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res) { - return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); + return nfs4_call_sync_sequence(clnt, server, msg, args, res); } #else -static inline +static void nfs41_init_sequence(struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply) { } +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ +} + + static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -1475,13 +1475,6 @@ unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; - -} - -static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_open_prepare(task, calldata); } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -1542,12 +1535,6 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static const struct rpc_call_ops nfs4_recover_open_ops = { - .rpc_call_prepare = nfs4_recover_open_prepare, - .rpc_call_done = nfs4_open_done, - .rpc_release = nfs4_open_release, -}; - static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) { struct inode *dir = data->dir->d_inode; @@ -1577,7 +1564,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) data->rpc_status = 0; data->cancelled = 0; if (isrecover) - task_setup_data.callback_ops = &nfs4_recover_open_ops; + nfs4_set_sequence_privileged(&o_arg->seq_args); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -4558,8 +4545,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) return; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { - if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) + if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; + } data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4574,13 +4562,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); - dprintk("%s: done!, ret = %d\n", __func__, task->tk_status); -} - -static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs4_lock_prepare(task, calldata); + dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) @@ -4635,12 +4617,6 @@ static const struct rpc_call_ops nfs4_lock_ops = { .rpc_release = nfs4_lock_release, }; -static const struct rpc_call_ops nfs4_recover_lock_ops = { - .rpc_call_prepare = nfs4_recover_lock_prepare, - .rpc_call_done = nfs4_lock_done, - .rpc_release = nfs4_lock_release, -}; - static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) { switch (error) { @@ -4683,15 +4659,15 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - if (recovery_type > NFS_LOCK_NEW) { - if (recovery_type == NFS_LOCK_RECLAIM) - data->arg.reclaim = NFS_LOCK_RECLAIM; - task_setup_data.callback_ops = &nfs4_recover_lock_ops; - } nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; + if (recovery_type > NFS_LOCK_NEW) { + if (recovery_type == NFS_LOCK_RECLAIM) + data->arg.reclaim = NFS_LOCK_RECLAIM; + nfs4_set_sequence_privileged(&data->arg.seq_args); + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5432,7 +5408,6 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, (struct nfs4_get_lease_time_data *)calldata; dprintk("--> %s\n", __func__); - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); /* just setup sequence, do not trigger session recovery since we're invoked within one */ nfs41_setup_sequence(data->clp->cl_session, @@ -5500,6 +5475,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) int status; nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); + nfs4_set_sequence_privileged(&args.la_seq_args); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5775,26 +5751,15 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) nfs41_setup_sequence(clp->cl_session, args, res, task); } -static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data) -{ - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - nfs41_sequence_prepare(task, data); -} - static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, .rpc_release = nfs41_sequence_release, }; -static const struct rpc_call_ops nfs41_sequence_privileged_ops = { - .rpc_call_done = nfs41_sequence_call_done, - .rpc_call_prepare = nfs41_sequence_prepare_privileged, - .rpc_release = nfs41_sequence_release, -}; - -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, - const struct rpc_call_ops *seq_ops) +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + bool is_privileged) { struct nfs4_sequence_data *calldata; struct rpc_message msg = { @@ -5804,7 +5769,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ struct rpc_task_setup task_setup_data = { .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, - .callback_ops = seq_ops, + .callback_ops = &nfs41_sequence_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, }; @@ -5816,6 +5781,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return ERR_PTR(-ENOMEM); } nfs41_init_sequence(&calldata->args, &calldata->res, 0); + if (is_privileged) + nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5831,7 +5798,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return 0; - task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops); + task = _nfs41_proc_sequence(clp, cred, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -5845,7 +5812,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops); + task = _nfs41_proc_sequence(clp, cred, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; @@ -5874,7 +5841,6 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) { struct nfs4_reclaim_complete_data *calldata = data; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); nfs41_setup_sequence(calldata->clp->cl_session, &calldata->arg.seq_args, &calldata->res.seq_res, @@ -5955,6 +5921,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->arg.one_fs = 0; nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); + nfs4_set_sequence_privileged(&calldata->arg.seq_args); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -6521,7 +6488,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) dprintk("NFS call test_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(server->client, server, &msg, + &args.seq_args, &res.seq_res); if (status != NFS_OK) { dprintk("NFS reply test_stateid: failed, %d\n", status); return status; @@ -6568,8 +6537,9 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) dprintk("NFS call free_stateid %p\n", stateid); nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); status = nfs4_call_sync_sequence(server->client, server, &msg, - &args.seq_args, &res.seq_res, 1); + &args.seq_args, &res.seq_res); dprintk("NFS reply free_stateid: %d\n", status); return status; } -- cgit v1.2.3 From 104287cd4ebb5484c654551c102c25c94227f717 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 12 Nov 2012 14:13:13 -0500 Subject: NFS: Remove _nfs_call_sync_session All it does is pass its arguments through to another function. Let's cut out the middleman... Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 38a709d7859..7f8b4278133 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -644,16 +644,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } -static -int _nfs4_call_sync_session(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) -{ - return nfs4_call_sync_sequence(clnt, server, msg, args, res); -} - #else static void nfs41_init_sequence(struct nfs4_sequence_args *args, @@ -6659,7 +6649,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { #if defined(CONFIG_NFS_V4_1) static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, - .call_sync = _nfs4_call_sync_session, + .call_sync = nfs4_call_sync_sequence, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, -- cgit v1.2.3 From 1e1093c7fd4951bb4272212c238d09cd7a22f5fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Nov 2012 16:44:05 -0400 Subject: NFSv4.1: Don't mess with task priorities in nfs41_setup_sequence We want to preserve the rpc_task priority for things like writebacks, that may have differing levels of urgency. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f8b4278133..99d99a5a3f6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -536,8 +536,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, } spin_unlock(&tbl->slot_tbl_lock); - rpc_task_set_priority(task, RPC_PRIORITY_NORMAL); - args->sa_slot = slot; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, @@ -556,8 +554,10 @@ out_success: out_sleep: /* Privileged tasks are queued with top priority */ if (args->sa_privileged) - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, + NULL, RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; } -- cgit v1.2.3 From b75ad4cda5a6cd3431b1c65c2739c5ebd2c4b9da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Nov 2012 17:27:47 -0500 Subject: NFSv4.1: Ensure smooth handover of slots from one task to the next waiting Currently, we see a lot of bouncing for the value of highest_used_slotid due to the fact that slots are getting freed, instead of getting instantly transmitted to the next waiting task. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++---- fs/nfs/nfs4session.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4session.h | 4 ++++ fs/nfs/nfs4state.c | 6 +----- 4 files changed, 69 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 99d99a5a3f6..992233561db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -401,14 +401,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) if (tbl->highest_used_slotid > tbl->target_highest_slotid) send_new_highest_used_slotid = true; + if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) { + send_new_highest_used_slotid = false; + goto out_unlock; + } nfs4_free_slot(tbl, res->sr_slot); if (tbl->highest_used_slotid != NFS4_NO_SLOT) send_new_highest_used_slotid = false; - if (!nfs4_session_draining(session)) { - if (rpc_wake_up_next(&tbl->slot_tbl_waitq) != NULL) - send_new_highest_used_slotid = false; - } +out_unlock: spin_unlock(&tbl->slot_tbl_lock); res->sr_slot = NULL; if (send_new_highest_used_slotid) @@ -1465,6 +1466,7 @@ unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; + nfs4_sequence_done(task, &data->o_res.seq_res); } static void nfs4_open_done(struct rpc_task *task, void *calldata) @@ -2135,6 +2137,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); goto out; } @@ -4384,6 +4387,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); return; } calldata->timestamp = jiffies; diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 701170293ce..066cfa101b4 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -217,11 +217,65 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session) nfs4_shrink_slot_table(&session->bc_slot_table, 0); } +static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) +{ + struct nfs4_sequence_args *args = task->tk_msg.rpc_argp; + struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; + struct nfs4_slot *slot = pslot; + struct nfs4_slot_table *tbl = slot->table; + + if (nfs4_session_draining(tbl->session) && !args->sa_privileged) + return false; + slot->renewal_time = jiffies; + slot->generation = tbl->generation; + args->sa_slot = slot; + res->sr_slot = slot; + res->sr_status_flags = 0; + res->sr_status = 1; + return true; +} + +static bool __nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot) +{ + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, nfs41_assign_slot, slot)) + return true; + return false; +} + +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot) +{ + if (slot->slot_nr > tbl->max_slotid) + return false; + return __nfs41_wake_and_assign_slot(tbl, slot); +} + +static bool nfs41_try_wake_next_slot_table_entry(struct nfs4_slot_table *tbl) +{ + struct nfs4_slot *slot = nfs4_alloc_slot(tbl); + if (!IS_ERR(slot)) { + bool ret = __nfs41_wake_and_assign_slot(tbl, slot); + if (ret) + return ret; + nfs4_free_slot(tbl, slot); + } + return false; +} + +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl) +{ + for (;;) { + if (!nfs41_try_wake_next_slot_table_entry(tbl)) + break; + } +} + /* Update the client's idea of target_highest_slotid */ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { - unsigned int max_slotid, i; + unsigned int max_slotid; if (tbl->target_highest_slotid == target_highest_slotid) return; @@ -229,9 +283,8 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, tbl->generation++; max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); - for (i = tbl->max_slotid + 1; i <= max_slotid; i++) - rpc_wake_up_next(&tbl->slot_tbl_waitq); tbl->max_slotid = max_slotid; + nfs41_wake_slot_table(tbl); } void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index bdd14a60722..7db73937016 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -94,6 +94,10 @@ static inline bool nfs4_session_draining(struct nfs4_session *session) return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); } +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot); +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); + /* * Determine if sessions are in use. */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7d73df5a05d..78e90a80fc3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -255,17 +255,13 @@ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; struct nfs4_slot_table *tbl; - unsigned int i; if (ses == NULL) return; tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { spin_lock(&tbl->slot_tbl_lock); - for (i = 0; i <= tbl->max_slotid; i++) { - if (rpc_wake_up_next(&tbl->slot_tbl_waitq) == NULL) - break; - } + nfs41_wake_slot_table(tbl); spin_unlock(&tbl->slot_tbl_lock); } } -- cgit v1.2.3 From 1fa8064429d0acbf5bbf3c8a53f65679fdacc75e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Dec 2012 13:54:59 -0500 Subject: NFSv4.1: Try to eliminate outliers when updating target_highest_slotid Look for sudden changes in the first and second derivatives in order to eliminate outlier changes to target_highest_slotid (which are due to out-of-order RPC replies). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++----- fs/nfs/nfs4session.h | 2 ++ 2 files changed, 60 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 066cfa101b4..ed5aa9fa9c7 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -178,6 +178,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, tbl->highest_used_slotid = NFS4_NO_SLOT; tbl->target_highest_slotid = server_highest_slotid; tbl->server_highest_slotid = server_highest_slotid; + tbl->d_target_highest_slotid = 0; + tbl->d2_target_highest_slotid = 0; tbl->max_slotid = server_highest_slotid; } @@ -292,6 +294,8 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, { spin_lock(&tbl->slot_tbl_lock); nfs41_set_target_slotid_locked(tbl, target_highest_slotid); + tbl->d_target_highest_slotid = 0; + tbl->d2_target_highest_slotid = 0; spin_unlock(&tbl->slot_tbl_lock); } @@ -307,16 +311,65 @@ static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl, tbl->server_highest_slotid = highest_slotid; } +static s32 nfs41_derivative_target_slotid(s32 s1, s32 s2) +{ + s1 -= s2; + if (s1 == 0) + return 0; + if (s1 < 0) + return (s1 - 1) >> 1; + return (s1 + 1) >> 1; +} + +static int nfs41_sign_s32(s32 s1) +{ + if (s1 > 0) + return 1; + if (s1 < 0) + return -1; + return 0; +} + +static bool nfs41_same_sign_or_zero_s32(s32 s1, s32 s2) +{ + if (!s1 || !s2) + return true; + return nfs41_sign_s32(s1) == nfs41_sign_s32(s2); +} + +/* Try to eliminate outliers by checking for sharp changes in the + * derivatives and second derivatives + */ +static bool nfs41_is_outlier_target_slotid(struct nfs4_slot_table *tbl, + u32 new_target) +{ + s32 d_target, d2_target; + bool ret = true; + + d_target = nfs41_derivative_target_slotid(new_target, + tbl->target_highest_slotid); + d2_target = nfs41_derivative_target_slotid(d_target, + tbl->d_target_highest_slotid); + /* Is first derivative same sign? */ + if (nfs41_same_sign_or_zero_s32(d_target, tbl->d_target_highest_slotid)) + ret = false; + /* Is second derivative same sign? */ + if (nfs41_same_sign_or_zero_s32(d2_target, tbl->d2_target_highest_slotid)) + ret = false; + tbl->d_target_highest_slotid = d_target; + tbl->d2_target_highest_slotid = d2_target; + return ret; +} + void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, struct nfs4_slot *slot, struct nfs4_sequence_res *res) { spin_lock(&tbl->slot_tbl_lock); - if (tbl->generation != slot->generation) - goto out; - nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); - nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); -out: + if (!nfs41_is_outlier_target_slotid(tbl, res->sr_target_highest_slotid)) + nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); + if (tbl->generation == slot->generation) + nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 7db73937016..04f834cab16 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -38,6 +38,8 @@ struct nfs4_slot_table { * op for dynamic resizing */ u32 target_highest_slotid; /* Server max_slot target */ u32 server_highest_slotid; /* Server highest slotid */ + s32 d_target_highest_slotid; /* Derivative */ + s32 d2_target_highest_slotid; /* 2nd derivative */ unsigned long generation; /* Generation counter for target_highest_slotid */ struct completion complete; -- cgit v1.2.3 From 7d3e91a89b7adbc2831334def9e494dd9892f9af Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Sat, 8 Dec 2012 15:30:18 +0100 Subject: NFSv4: Check for buffer length in __nfs4_get_acl_uncached Commit 1f1ea6c "NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached" accidently dropped the checking for too small result buffer length. If someone uses getxattr on "system.nfs4_acl" on an NFSv4 mount supporting ACLs, the ACL has not been cached and the buffer suplied is too short, we still copy the complete ACL, resulting in kernel and user space memory corruption. Signed-off-by: Sven Wegener Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5eec4429970..05e5f6f9f2b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3937,8 +3937,13 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; } nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); - if (buf) + if (buf) { + if (res.acl_len > buflen) { + ret = -ERANGE; + goto out_free; + } _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len); + } out_ok: ret = res.acl_len; out_free: -- cgit v1.2.3 From 81d9bce5309288086b58b4d97a644e495fef75f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Dec 2012 09:25:48 -0500 Subject: nfs: don't extend writes to cover entire page if pagecache is invalid Jian reported that the following sequence would leave "testfile" with corrupt data: # mount localhost:/export /mnt/nfs/ -o vers=3 # echo abc > /mnt/nfs/testfile; echo def >> /export/testfile; echo ghi >> /mnt/nfs/testfile # cat -v /export/testfile abc ^@^@^@^@ghi While there's no locking involved here, the operations are serialized, so CTO should prevent corruption. The first write to the file is fine and writes 4 bytes. The file is then extended on the server. When it's reopened a GETATTR is issued and the size change is noticed. This causes NFS_INO_INVALID_DATA to be set on the file. Because the file is opened for write only, nfs_want_read_modify_write() returns 0 to nfs_write_begin(). nfs_updatepage then calls nfs_write_pageuptodate() to see if it should extend the nfs_page to cover the whole page. NFS_INO_INVALID_DATA is still set on the file at that point, but that flag is ignored and nfs_pageuptodate erroneously extends the write to cover the whole page, with the write done on the server side filled in with zeroes. This patch just has that function check for NFS_INO_INVALID_DATA in addition to NFS_INO_REVAL_PAGECACHE. This fixes the bug, but looking over the code, I wonder if we might have a similar bug in nfs_revalidate_size(). The difference between those two flags is very subtle, so it seems like we ought to be checking for NFS_INO_INVALID_DATA in most of the places that we look for NFS_INO_REVAL_PAGECACHE. I believe this is regression introduced by commit 8d197a568. The code did check for NFS_INO_INVALID_DATA prior to that patch. Original bug report is here: https://bugzilla.redhat.com/show_bug.cgi?id=885743 Cc: # 3.5+ Reported-by: Jian Li Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f710e39f6ba..eecd8b879af 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -884,7 +884,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { if (nfs_have_delegated_attributes(inode)) goto out; - if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) return false; out: return PageUptodate(page) != 0; -- cgit v1.2.3 From 85563073741bd7935a6900d567ddaf907192270d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Dec 2012 10:31:12 -0500 Subject: NFSv4.1: Handle NFS4ERR_BADSLOT errors correctly Most (all) NFS4ERR_BADSLOT errors are due to the client failing to respect the server's sr_highest_slotid limit. This mainly happens due to reordered RPC requests. The way to handle it is simply to drop the slot that we're using, and retry using the new highest_slotid limits. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92bd799eee0..a4692e97bc1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -422,6 +422,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * struct nfs4_slot *slot; unsigned long timestamp; struct nfs_client *clp; + int ret = 1; /* * sr_status remains 1 if an RPC level error occurred. The server @@ -462,6 +463,16 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * slot->slot_nr, slot->seq_nr); goto out_retry; + case -NFS4ERR_BADSLOT: + /* + * The slot id we used was probably retired. Try again + * using a different slot id. + */ + if (rpc_restart_call_prepare(task)) { + task->tk_status = 0; + ret = 0; + } + break; default: /* Just update the slot sequence no. */ ++slot->seq_nr; @@ -470,7 +481,7 @@ out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); nfs41_sequence_free_slot(res); - return 1; + return ret; out_retry: if (!rpc_restart_call(task)) goto out; -- cgit v1.2.3 From b0ef9647a0cd6cfd63fed48fbbe6005e4ba92571 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Dec 2012 12:10:14 -0500 Subject: NFSv4.1: Be conservative about the client highest slotid If the server sends us a target that looks like an outlier, but is lower than the existing target, then respect it anyway. However defer actually updating the generation counter until we get a target that doesn't look like an outlier. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index ed5aa9fa9c7..1e6c87c443a 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -273,20 +273,28 @@ void nfs41_wake_slot_table(struct nfs4_slot_table *tbl) } } +static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl, + u32 target_highest_slotid) +{ + u32 max_slotid; + + max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, target_highest_slotid); + if (max_slotid > tbl->server_highest_slotid) + max_slotid = tbl->server_highest_slotid; + if (max_slotid > tbl->target_highest_slotid) + max_slotid = tbl->target_highest_slotid; + tbl->max_slotid = max_slotid; + nfs41_wake_slot_table(tbl); +} + /* Update the client's idea of target_highest_slotid */ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { - unsigned int max_slotid; - if (tbl->target_highest_slotid == target_highest_slotid) return; tbl->target_highest_slotid = target_highest_slotid; tbl->generation++; - - max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid); - tbl->max_slotid = max_slotid; - nfs41_wake_slot_table(tbl); } void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, @@ -296,6 +304,7 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, nfs41_set_target_slotid_locked(tbl, target_highest_slotid); tbl->d_target_highest_slotid = 0; tbl->d2_target_highest_slotid = 0; + nfs41_set_max_slotid_locked(tbl, target_highest_slotid); spin_unlock(&tbl->slot_tbl_lock); } @@ -370,6 +379,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid); if (tbl->generation == slot->generation) nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid); + nfs41_set_max_slotid_locked(tbl, res->sr_target_highest_slotid); spin_unlock(&tbl->slot_tbl_lock); } -- cgit v1.2.3 From 67fad106a219e083c91c79695bd1807dde1bf7b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 12 Dec 2012 11:38:44 -0500 Subject: nfs: don't zero out the rest of the page if we hit the EOF on a DIO READ Eryu provided a test program that would segfault when attempting to read past the EOF on file that was opened O_DIRECT. The buffer given to the read() call was on the stack, and when he attempted to read past it it would scribble over the rest of the stack page. If we hit the end of the file on a DIO READ request, then we don't want to zero out the rest of the buffer. These aren't pagecache pages after all, and there's no guarantee that the buffers that were passed in represent entire pages. Cc: # v3.5+ Cc: Fred Isaman Reported-by: Eryu Guan Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index cae26cbd59e..594f4e7e0b9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,14 +266,6 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } if (!PageCompound(page)) { if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes < hdr->good_bytes) -- cgit v1.2.3 From be7e985804c610fcdcee8730cf42718b8a4e1c41 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 12 Dec 2012 12:36:31 -0500 Subject: nfs: fix page dirtying in NFS DIO read codepath The NFS DIO code will dirty pages that catch read responses in order to handle the case where someone is doing DIO reads into an mmapped buffer. The existing code doesn't really do the right thing though since it doesn't take into account the case where we might be attempting to read past the EOF. Fix the logic in that code to only dirty pages that ended up receiving data from the read. Note too that it really doesn't matter if NFS_IOHDR_ERROR is set or not. All that matters is if the page was altered by the read. Cc: Fred Isaman Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 594f4e7e0b9..0bd7a55a5f0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,13 +266,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; - if (!PageCompound(page)) { - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - if (bytes < hdr->good_bytes) - set_page_dirty(page); - } else - set_page_dirty(page); - } + if (!PageCompound(page) && bytes < hdr->good_bytes) + set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); nfs_direct_readpage_release(req); -- cgit v1.2.3 From eb96d5c97b0825d542e9c4ba5e0a22b519355166 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 27 Nov 2012 10:34:19 -0500 Subject: SUNRPC handle EKEYEXPIRED in call_refreshresult Currently, when an RPCSEC_GSS context has expired or is non-existent and the users (Kerberos) credentials have also expired or are non-existent, the client receives the -EKEYEXPIRED error and tries to refresh the context forever. If an application is performing I/O, or other work against the share, the application hangs, and the user is not prompted to refresh/establish their credentials. This can result in a denial of service for other users. Users are expected to manage their Kerberos credential lifetimes to mitigate this issue. Move the -EKEYEXPIRED handling into the RPC layer. Try tk_cred_retry number of times to refresh the gss_context, and then return -EACCES to the application. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 6 +++--- fs/nfs/nfs4filelayout.c | 1 - fs/nfs/nfs4proc.c | 18 ------------------ fs/nfs/nfs4state.c | 23 ----------------------- fs/nfs/proc.c | 43 ------------------------------------------- 5 files changed, 3 insertions(+), 88 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 69322096c32..70efb63b1e4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -24,14 +24,14 @@ #define NFSDBG_FACILITY NFSDBG_PROC -/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */ +/* A wrapper to handle the EJUKEBOX error messages */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { int res; do { res = rpc_call_sync(clnt, msg, flags); - if (res != -EJUKEBOX && res != -EKEYEXPIRED) + if (res != -EJUKEBOX) break; freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; @@ -44,7 +44,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) static int nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode) { - if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED) + if (task->tk_status != -EJUKEBOX) return 0; if (task->tk_status == -EJUKEBOX) nfs_inc_stats(inode, NFSIOS_DELAY); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 1e42413fab8..194c4841033 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -179,7 +179,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, break; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - case -EKEYEXPIRED: rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); break; case -NFS4ERR_RETRY_UNCACHED_REP: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4692e97bc1..b0963aeceed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -333,7 +333,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - case -EKEYEXPIRED: ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; @@ -1343,13 +1342,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ case -ENOMEM: err = 0; goto out; @@ -3946,7 +3938,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: - case -EKEYEXPIRED: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; @@ -4946,15 +4937,6 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) nfs4_schedule_stateid_recovery(server, state); err = 0; goto out; - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ - err = 0; - goto out; case -ENOMEM: case -NFS4ERR_DENIED: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 78e90a80fc3..8dcbd9a0367 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1437,14 +1437,6 @@ restart: /* Mark the file as being 'closed' */ state->state = 0; break; - case -EKEYEXPIRED: - /* - * User RPCSEC_GSS context has expired. - * We cannot recover this stateid now, so - * skip it and allow recovery thread to - * proceed. - */ - break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -1597,14 +1589,6 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } -static void nfs4_warn_keyexpired(const char *s) -{ - printk_ratelimited(KERN_WARNING "Error: state manager" - " encountered RPCSEC_GSS session" - " expired against NFSv4 server %s.\n", - s); -} - static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { @@ -1638,10 +1622,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); break; - case -EKEYEXPIRED: - /* Nothing we can do */ - nfs4_warn_keyexpired(clp->cl_hostname); - break; default: dprintk("%s: failed to handle error %d for server %s\n", __func__, error, clp->cl_hostname); @@ -1758,8 +1738,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) dprintk("%s: exit with error %d for server %s\n", __func__, -EPROTONOSUPPORT, clp->cl_hostname); return -EPROTONOSUPPORT; - case -EKEYEXPIRED: - nfs4_warn_keyexpired(clp->cl_hostname); case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ default: @@ -1912,7 +1890,6 @@ again: break; case -EKEYEXPIRED: - nfs4_warn_keyexpired(clp->cl_hostname); case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ status = -EKEYEXPIRED; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 50a88c3546e..f084dac948e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -46,39 +46,6 @@ #define NFSDBG_FACILITY NFSDBG_PROC -/* - * wrapper to handle the -EKEYEXPIRED error message. This should generally - * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't - * support the NFSERR_JUKEBOX error code, but we handle this situation in the - * same way that we handle that error with NFSv3. - */ -static int -nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) -{ - int res; - do { - res = rpc_call_sync(clnt, msg, flags); - if (res != -EKEYEXPIRED) - break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); - res = -ERESTARTSYS; - } while (!fatal_signal_pending(current)); - return res; -} - -#define rpc_call_sync(clnt, msg, flags) nfs_rpc_wrapper(clnt, msg, flags) - -static int -nfs_async_handle_expired_key(struct rpc_task *task) -{ - if (task->tk_status != -EKEYEXPIRED) - return 0; - task->tk_status = 0; - rpc_restart_call(task); - rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); - return 1; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -364,8 +331,6 @@ static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlink static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { - if (nfs_async_handle_expired_key(task)) - return 0; nfs_mark_for_revalidate(dir); return 1; } @@ -385,8 +350,6 @@ static int nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { - if (nfs_async_handle_expired_key(task)) - return 0; nfs_mark_for_revalidate(old_dir); nfs_mark_for_revalidate(new_dir); return 1; @@ -642,9 +605,6 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { struct inode *inode = data->header->inode; - if (nfs_async_handle_expired_key(task)) - return -EAGAIN; - nfs_invalidate_atime(inode); if (task->tk_status >= 0) { nfs_refresh_inode(inode, data->res.fattr); @@ -671,9 +631,6 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->header->inode; - if (nfs_async_handle_expired_key(task)) - return -EAGAIN; - if (task->tk_status >= 0) nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; -- cgit v1.2.3 From f259613a1e4b44a0cf85a5dafd931be96ee7c9e5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Dec 2012 15:14:36 +1100 Subject: NFS: avoid NULL dereference in nfs_destroy_server In rare circumstances, nfs_clone_server() of a v2 or v3 server can get an error between setting server->destory (to nfs_destroy_server), and calling nfs_start_lockd (which will set server->nlm_host). If this happens, nfs_clone_server will call nfs_free_server which will call nfs_destroy_server and thence nlmclnt_done(NULL). This causes the NULL to be dereferenced. So add a guard to only call nlmclnt_done() if ->nlm_host is not NULL. The other guards there are irrelevant as nlm_host can only be non-NULL if one of these flags are set - so remove those tests. (Thanks to Trond for this suggestion). This is suitable for any stable kernel since 2.6.25. Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c285e0a117e..9f3c66438d0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -615,8 +615,7 @@ EXPORT_SYMBOL_GPL(nfs_create_rpc_client); */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || - !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) + if (server->nlm_host) nlmclnt_done(server->nlm_host); } -- cgit v1.2.3 From aaea7d2f78d008882524eddff0d78098c8fa9496 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Thu, 13 Dec 2012 14:37:34 +0800 Subject: nfs: Remove duplicate function declaration in internal.h Remove duplicate function declaration in internal.h Signed-off-by: Yanchuan Nian [Trond: Added nfs_pageio_init_read, which suffered from the same problem] Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fb994471bd3..89c1ee4a432 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -363,9 +363,6 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); @@ -388,9 +385,6 @@ extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); -extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); -- cgit v1.2.3 From 48d7a57693af660666c4afdc54c09b2f9655e260 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Thu, 13 Dec 2012 14:37:52 +0800 Subject: nfs: Remove unused list nfs4_clientid_list This list was designed to store struct nfs4_client in the client side. But nfs4_client was obsolete and has been removed from the source code. So remove the unused list. Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8dcbd9a0367..9448c579d41 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -67,7 +67,6 @@ const nfs4_stateid zero_stateid; static DEFINE_MUTEX(nfs_clid_init_mutex); -static LIST_HEAD(nfs4_clientid_list); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { -- cgit v1.2.3 From eed9935745cc44071043ec8c4cde64c820b5c601 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Dec 2012 14:36:36 -0500 Subject: NFS: Ensure that we always drop inodes that have been marked as stale There is no need to cache stale inodes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs4super.c | 1 + fs/nfs/super.c | 1 + 4 files changed, 9 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 117183b1ee0..2faae14d89f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -107,6 +107,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } +int nfs_drop_inode(struct inode *inode) +{ + return NFS_STALE(inode) || generic_drop_inode(inode); +} +EXPORT_SYMBOL_GPL(nfs_drop_inode); + void nfs_clear_inode(struct inode *inode) { /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89c1ee4a432..f0e6c7df1a0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,6 +296,7 @@ extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); +extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index bd61221ad2c..84d2e9e2f31 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -51,6 +51,7 @@ static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs4_write_inode, + .drop_inode = nfs_drop_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, .evict_inode = nfs4_evict_inode, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e12cea4b36a..aa5315bb366 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -308,6 +308,7 @@ const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .drop_inode = nfs_drop_inode, .put_super = nfs_put_super, .statfs = nfs_statfs, .evict_inode = nfs_evict_inode, -- cgit v1.2.3 From 1f018458b30b0d5c535c94e577aa0acbb92e1395 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Dec 2012 16:38:46 -0500 Subject: NFS: Fix calls to drop_nlink() It is almost always wrong for NFS to call drop_nlink() after removing a file. What we really want is to mark the inode's attributes for revalidation, and we want to ensure that the VFS drops it if we're reasonably sure that this is the final unlink(). Do the former using the usual cache validity flags, and the latter by testing if inode->i_nlink == 1, and clearing it in that case. This also fixes the following warning reported by Neil Brown and Jeff Layton (among others). [634155.004438] WARNING: at /home/abuild/rpmbuild/BUILD/kernel-desktop-3.5.0/lin [634155.004442] Hardware name: Latitude E6510 [634155.004577] crc_itu_t crc32c_intel snd_hwdep snd_pcm snd_timer snd soundcor [634155.004609] Pid: 13402, comm: bash Tainted: G W 3.5.0-36-desktop # [634155.004611] Call Trace: [634155.004630] [] dump_trace+0xaa/0x2b0 [634155.004641] [] dump_stack+0x69/0x6f [634155.004653] [] warn_slowpath_common+0x7b/0xc0 [634155.004662] [] drop_nlink+0x34/0x40 [634155.004687] [] nfs_dentry_iput+0x33/0x70 [nfs] [634155.004714] [] dput+0x12e/0x230 [634155.004726] [] __fput+0x170/0x230 [634155.004735] [] filp_close+0x5f/0x90 [634155.004743] [] sys_close+0x97/0x100 [634155.004754] [] system_call_fastpath+0x16/0x1b [634155.004767] [<00007f2a73a0d110>] 0x7f2a73a0d10f Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [3.3+] --- fs/nfs/dir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce8cb926526..a46a7465448 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1155,11 +1155,14 @@ static int nfs_dentry_delete(const struct dentry *dentry) } +/* Ensure that we revalidate inode->i_nlink */ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); - if (inode->i_nlink > 0) - drop_nlink(inode); + /* drop the inode if we're reasonably sure this is the last link */ + if (inode->i_nlink == 1) + clear_nlink(inode); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } @@ -1174,8 +1177,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - drop_nlink(inode); nfs_complete_unlink(dentry, inode); + nfs_drop_nlink(inode); } iput(inode); } @@ -1646,10 +1649,8 @@ static int nfs_safe_remove(struct dentry *dentry) if (inode != NULL) { NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - /* The VFS may want to delete this inode */ if (error == 0) nfs_drop_nlink(inode); - nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) -- cgit v1.2.3 From 65a0c14954493802de01968a73b849f9fc4b4d1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Dec 2012 17:51:40 -0500 Subject: NFS: nfs_lookup_revalidate should not trust an inode with i_nlink == 0 If the inode has no links, then we should force a new lookup. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a46a7465448..d8e58ed3d45 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -978,10 +978,11 @@ static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) * particular file and the "nocto" mount flag is not set. * */ -static inline +static int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); + int ret; if (IS_AUTOMOUNT(inode)) return 0; @@ -992,9 +993,13 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; - return 0; +out: + return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: - return __nfs_revalidate_inode(server, inode); + ret = __nfs_revalidate_inode(server, inode); + if (ret != 0) + return ret; + goto out; } /* -- cgit v1.2.3 From e8794440849d1d15fa11251ef1622e6160614874 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 13:56:18 -0500 Subject: NFSv4.1: Try to deal with NFS4ERR_SEQ_MISORDERED. If the server returns NFS4ERR_SEQ_MISORDERED, it could be a sign that the slot was retired at some point. Retry the attempt after reinitialising the slot sequence number to 1. Also add a handler for NFS4ERR_SEQ_FALSE_RETRY. Just bump the slot sequence number and retry... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b0963aeceed..9003b8f6b77 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -467,11 +467,19 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * The slot id we used was probably retired. Try again * using a different slot id. */ - if (rpc_restart_call_prepare(task)) { - task->tk_status = 0; - ret = 0; - } - break; + goto retry_nowait; + case -NFS4ERR_SEQ_MISORDERED: + /* + * Could this slot have been previously retired? + * If so, then the server may be expecting seq_nr = 1! + */ + if (slot->seq_nr == 1) + break; + slot->seq_nr = 1; + goto retry_nowait; + case -NFS4ERR_SEQ_FALSE_RETRY: + ++slot->seq_nr; + goto retry_nowait; default: /* Just update the slot sequence no. */ ++slot->seq_nr; @@ -481,6 +489,12 @@ out: dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); nfs41_sequence_free_slot(res); return ret; +retry_nowait: + if (rpc_restart_call_prepare(task)) { + task->tk_status = 0; + ret = 0; + } + goto out; out_retry: if (!rpc_restart_call(task)) goto out; -- cgit v1.2.3 From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 15:21:52 -0500 Subject: NFSv4.1: Move the RPC timestamp out of the slot. Shave a few bytes off the slot table size by moving the RPC timestamp into the sequence results. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 +++++++------- fs/nfs/nfs4session.c | 3 +-- fs/nfs/nfs4session.h | 1 - 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9003b8f6b77..afb428e63b5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -419,7 +419,6 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * { struct nfs4_session *session; struct nfs4_slot *slot; - unsigned long timestamp; struct nfs_client *clp; int ret = 1; @@ -444,9 +443,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * case 0: /* Update the slot's sequence and clientid lease timer */ ++slot->seq_nr; - timestamp = slot->renewal_time; clp = session->clp; - do_renew_lease(clp, timestamp); + do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ if (res->sr_status_flags != 0) nfs4_schedule_lease_recovery(clp); @@ -473,10 +471,11 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! */ - if (slot->seq_nr == 1) - break; - slot->seq_nr = 1; - goto retry_nowait; + if (slot->seq_nr != 1) { + slot->seq_nr = 1; + goto retry_nowait; + } + break; case -NFS4ERR_SEQ_FALSE_RETRY: ++slot->seq_nr; goto retry_nowait; @@ -567,6 +566,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, slot->slot_nr, slot->seq_nr); res->sr_slot = slot; + res->sr_timestamp = jiffies; res->sr_status_flags = 0; /* * sr_status is only set in decode_sequence, and so will remain diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 1e6c87c443a..0e1cc1f4e51 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -143,7 +143,6 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) if (slotid > tbl->highest_used_slotid || tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; - ret->renewal_time = jiffies; ret->generation = tbl->generation; out: @@ -228,9 +227,9 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) if (nfs4_session_draining(tbl->session) && !args->sa_privileged) return false; - slot->renewal_time = jiffies; slot->generation = tbl->generation; args->sa_slot = slot; + res->sr_timestamp = jiffies; res->sr_slot = slot; res->sr_status_flags = 0; res->sr_status = 1; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 04f834cab16..d17b08091d4 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -19,7 +19,6 @@ struct nfs4_slot { struct nfs4_slot_table *table; struct nfs4_slot *next; unsigned long generation; - unsigned long renewal_time; u32 slot_nr; u32 seq_nr; }; -- cgit v1.2.3 From ac20d163fccf9fa6acec8b68f127003635e13b72 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 15:36:07 -0500 Subject: NFSv4.1: Deal effectively with interrupted RPC calls. If an RPC call is interrupted, assume that the server hasn't processed the RPC call so that the next time we use the slot, we know that if we get a NFS4ERR_SEQ_MISORDERED or NFS4ERR_SEQ_FALSE_RETRY, we just have to bump the sequence number. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 32 +++++++++++++++++++++++--------- fs/nfs/nfs4session.c | 1 + fs/nfs/nfs4session.h | 1 + 3 files changed, 25 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index afb428e63b5..493f0f41c55 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -420,17 +420,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * struct nfs4_session *session; struct nfs4_slot *slot; struct nfs_client *clp; + bool interrupted = false; int ret = 1; - /* - * sr_status remains 1 if an RPC level error occurred. The server - * may or may not have processed the sequence operation.. - * Proceed as if the server received and processed the sequence - * operation. - */ - if (res->sr_status == 1) - res->sr_status = NFS_OK; - /* don't increment the sequence number if the task wasn't sent */ if (!RPC_WAS_SENT(task)) goto out; @@ -438,6 +430,11 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * slot = res->sr_slot; session = slot->table->session; + if (slot->interrupted) { + slot->interrupted = 0; + interrupted = true; + } + /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: @@ -450,6 +447,15 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * nfs4_schedule_lease_recovery(clp); nfs41_update_target_slotid(slot->table, slot, res); break; + case 1: + /* + * sr_status remains 1 if an RPC level error occurred. + * The server may or may not have processed the sequence + * operation.. + * Mark the slot as having hosted an interrupted RPC call. + */ + slot->interrupted = 1; + goto out; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and * returned NFS4ERR_DELAY as per Section 2.10.6.2 @@ -467,6 +473,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * */ goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: + /* + * Was the last operation on this sequence interrupted? + * If so, retry after bumping the sequence number. + */ + if (interrupted) { + ++slot->seq_nr; + goto retry_nowait; + } /* * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 0e1cc1f4e51..ebda5f4a031 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -172,6 +172,7 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, p = &tbl->slots; while (*p) { (*p)->seq_nr = ivalue; + (*p)->interrupted = 0; p = &(*p)->next; } tbl->highest_used_slotid = NFS4_NO_SLOT; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index d17b08091d4..6f3cb39386d 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -21,6 +21,7 @@ struct nfs4_slot { unsigned long generation; u32 slot_nr; u32 seq_nr; + unsigned int interrupted : 1; }; /* Sessions */ -- cgit v1.2.3 From ada8e20d044c0fa5610e504ce6fb4578ebd3edd9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 15 Dec 2012 17:12:14 -0500 Subject: NFS: Don't use SetPageError in the NFS writeback code The writeback code is already capable of passing errors back to user space by means of the open_context->error. In the case of ENOSPC, Neil Brown is reporting seeing 2 errors being returned. Neil writes: "e.g. if /mnt2/ if an nfs mounted filesystem that has no space then strace dd if=/dev/zero conv=fsync >> /mnt2/afile count=1 reported Input/output error and the relevant parts of the strace output are: write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 fsync(1) = -1 EIO (Input/output error) close(1) = -1 ENOSPC (No space left on device)" Neil then shows that the duplication of error messages appears to be due to the use of the PageError() mechanism, which causes filemap_fdatawait_range to return the extra EIO. The regression was introduced by commit 7b281ee026552f10862b617a2a51acf49c829554 (NFS: fsync() must exit with an error if page writeback failed). Fix this by removing the call to SetPageError(), and just relying on open_context->error reporting the ENOSPC back to fsync(). Reported-by: Neil Brown Tested-by: Neil Brown Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [3.6+] --- fs/nfs/write.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f608ca606b2..5209916e122 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -202,7 +202,6 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct page *page) { - SetPageError(page); nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } -- cgit v1.2.3 From 965c8e59cfcf845ecde2265a1d1bfee5f011d302 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 17 Dec 2012 15:59:39 -0800 Subject: lseek: the "whence" argument is called "whence" But the kernel decided to call it "origin" instead. Fix most of the sites. Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 6 +++--- fs/nfs/file.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b9e66b7e0c1..1cc71f60b49 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -871,7 +871,7 @@ out: return res; } -static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -880,10 +880,10 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - offset, origin); + offset, whence); mutex_lock(&inode->i_mutex); - switch (origin) { + switch (whence) { case 1: offset += filp->f_pos; case 0: diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 582bb886613..3c2b893665b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -119,18 +119,18 @@ force_reval: return __nfs_revalidate_inode(server, inode); } -loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) +loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", filp->f_path.dentry->d_parent->d_name.name, filp->f_path.dentry->d_name.name, - offset, origin); + offset, whence); /* - * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length */ - if (origin != SEEK_SET && origin != SEEK_CUR) { + if (whence != SEEK_SET && whence != SEEK_CUR) { struct inode *inode = filp->f_mapping->host; int retval = nfs_revalidate_file_size(inode, filp); @@ -138,7 +138,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return (loff_t)retval; } - return generic_file_llseek(filp, offset, origin); + return generic_file_llseek(filp, offset, whence); } EXPORT_SYMBOL_GPL(nfs_file_llseek); -- cgit v1.2.3 From de242c0b8b365a9e348bf53143e18e9d8c9cfae8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:38 +0000 Subject: NFS: Use FS-Cache invalidation Use the new FS-Cache invalidation facility from NFS to deal with foreign changes being detected on the server rather than attempting to retire the old cookie and get a new one. The problem with the old method was that NFS did not wait for all outstanding storage and retrieval ops on the cache to complete. There was no automatic wait between the calls to ->readpages() and calls to invalidate_inode_pages2() as the latter can only wait on locked pages that have been added to the pagecache (which they haven't yet on entry to ->readpages()). This was leading to oopses like the one below when an outstanding read got cut off from its cookie by a premature release. BUG: unable to handle kernel NULL pointer dereference at 00000000000000a8 IP: [] __fscache_read_or_alloc_pages+0x1dd/0x315 [fscache] PGD 15889067 PUD 15890067 PMD 0 Oops: 0000 [#1] SMP CPU 0 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 4544, comm: tar Not tainted 3.1.0-rc4-fsdevel+ #1064 /DG965RY RIP: 0010:[] [] __fscache_read_or_alloc_pages+0x1dd/0x315 [fscache] RSP: 0018:ffff8800158799e8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800070d41e0 RCX: ffff8800083dc1b0 RDX: 0000000000000000 RSI: ffff880015879960 RDI: ffff88003e627b90 RBP: ffff880015879a28 R08: 0000000000000002 R09: 0000000000000002 R10: 0000000000000001 R11: ffff880015879950 R12: ffff880015879aa4 R13: 0000000000000000 R14: ffff8800083dc158 R15: ffff880015879be8 FS: 00007f671e9d87c0(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000000a8 CR3: 000000001587f000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process tar (pid: 4544, threadinfo ffff880015878000, task ffff880015875040) Stack: ffffffffa00b1759 ffff8800070dc158 ffff8800000213da ffff88002a286508 ffff880015879aa4 ffff880015879be8 0000000000000001 ffff88002a2866e8 ffff880015879a88 ffffffffa00b20be 00000000000200da ffff880015875040 Call Trace: [] ? nfs_fscache_wait_bit+0xd/0xd [nfs] [] __nfs_readpages_from_fscache+0x7e/0x13f [nfs] [] ? __alloc_pages_nodemask+0x156/0x662 [] nfs_readpages+0xee/0x187 [nfs] [] __do_page_cache_readahead+0x1be/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? might_fault+0x4e/0x9e [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b Reported-by: Mark Moseley Signed-off-by: David Howells --- fs/nfs/fscache.h | 20 +++++++++++++++++++- fs/nfs/inode.c | 20 ++++++++++++++++---- fs/nfs/nfs4proc.c | 3 ++- 3 files changed, 37 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index c5b11b53ff3..277b0278289 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -152,6 +152,22 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, __nfs_readpage_to_fscache(inode, page, sync); } +/* + * Invalidate the contents of fscache for this inode. This will not sleep. + */ +static inline void nfs_fscache_invalidate(struct inode *inode) +{ + fscache_invalidate(NFS_I(inode)->fscache); +} + +/* + * Wait for an object to finish being invalidated. + */ +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) +{ + fscache_wait_on_invalidate(NFS_I(inode)->fscache); +} + /* * indicate the client caching state as readable text */ @@ -162,7 +178,6 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server) return "no "; } - #else /* CONFIG_NFS_FSCACHE */ static inline int nfs_fscache_register(void) { return 0; } static inline void nfs_fscache_unregister(void) {} @@ -205,6 +220,9 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} + +static inline void nfs_fscache_invalidate(struct inode *inode) {} + static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2faae14d89f..ebeb94ce1b0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -161,10 +161,12 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo_timestamp = jiffies; memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - else + nfs_fscache_invalidate(inode); + } else { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + } } void nfs_zap_caches(struct inode *inode) @@ -179,6 +181,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); spin_unlock(&inode->i_lock); } } @@ -881,7 +884,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - nfs_fscache_reset_inode_cookie(inode); + nfs_fscache_wait_on_invalidate(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -957,6 +960,10 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return ret; } @@ -1205,8 +1212,10 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); + } if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); @@ -1494,6 +1503,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + if (invalid & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return 0; out_err: /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 493f0f41c55..5d864fb3657 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ #include "pnfs.h" #include "netns.h" #include "nfs4session.h" - +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -734,6 +734,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } -- cgit v1.2.3 From 8c209ce721444a61b61d9e772746c721e4d8d1e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:49 +0000 Subject: NFS: nfs_migrate_page() does not wait for FS-Cache to finish with a page nfs_migrate_page() does not wait for FS-Cache to finish with a page, probably leading to the following bad-page-state: BUG: Bad page state in process python-bin pfn:17d39b page:ffffea00053649e8 flags:004000000000100c count:0 mapcount:0 mapping:(null) index:38686 (Tainted: G B ---------------- ) Pid: 31053, comm: python-bin Tainted: G B ---------------- 2.6.32-71.24.1.el6.x86_64 #1 Call Trace: [] bad_page+0x107/0x160 [] free_hot_cold_page+0x1c9/0x220 [] __pagevec_free+0x59/0xb0 [] ? flush_tlb_others_ipi+0x128/0x130 [] release_pages+0x21c/0x250 [] ? remove_migration_pte+0x28a/0x2b0 [] ? mem_cgroup_get_reclaim_stat_from_page+0x18/0x70 [] ____pagevec_lru_add+0x167/0x180 [] __lru_cache_add+0x58/0x70 [] lru_cache_add_lru+0x21/0x40 [] putback_lru_page+0x69/0x100 [] migrate_pages+0x13d/0x5d0 [] ? ____pagevec_lru_add+0x167/0x180 [] ? compaction_alloc+0x0/0x370 [] compact_zone+0x4cc/0x600 [] ? get_page_from_freelist+0x15c/0x820 [] ? check_preempt_wakeup+0x1c4/0x3c0 [] compact_zone_order+0x7e/0xb0 [] try_to_compact_pages+0x109/0x170 [] __alloc_pages_nodemask+0x5ed/0x850 [] ? thread_return+0x4e/0x778 [] alloc_pages_vma+0x93/0x150 [] do_huge_pmd_anonymous_page+0x135/0x340 [] ? rwsem_down_read_failed+0x26/0x30 [] handle_mm_fault+0x245/0x2b0 [] do_page_fault+0x123/0x3a0 [] page_fault+0x25/0x30 nfs_migrate_page() calls nfs_fscache_release_page() which doesn't actually wait - even if __GFP_WAIT is set. The reason that doesn't wait is that fscache_maybe_release_page() might deadlock the allocator as the work threads writing to the cache may all end up sleeping on memory allocation. However, I wonder if that is actually a problem. There are a number of things I can do to deal with this: (1) Make nfs_migrate_page() wait. (2) Make fscache_maybe_release_page() honour the __GFP_WAIT flag. (3) Set a timeout around the wait. (4) Make nfs_migrate_page() return an error if the page is still busy. For the moment, I'll select (2) and (4). Signed-off-by: David Howells Acked-by: Jeff Layton --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5209916e122..b673be31590 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1794,7 +1794,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (PagePrivate(page)) return -EBUSY; - nfs_fscache_release_page(page, GFP_KERNEL); + if (!nfs_fscache_release_page(page, GFP_KERNEL)) + return -EBUSY; return migrate_page(mapping, newpage, page, mode); } -- cgit v1.2.3 From a4ff146881c2764d7c3e4ef710e7c27d521ddd51 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 16:31:49 +0000 Subject: NFS4: Open files for fscaching nfs4_file_open() should open files for fscaching. Signed-off-by: David Howells --- fs/nfs/fscache.c | 1 + fs/nfs/nfs4file.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c817787fbdb..24d1d1c5fca 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -307,6 +307,7 @@ void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) nfs_fscache_inode_unlock(inode); } } +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); /* * Replace a per-inode cookie due to revalidation detecting a file having diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e7699308364..08ddcccb888 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -5,6 +5,7 @@ */ #include #include "internal.h" +#include "fscache.h" #include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -74,6 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); + nfs_fscache_set_inode_cookie(inode, filp); err = 0; out_put_ctx: -- cgit v1.2.3 From c129c29347b6cf0d64bfe53848f68320286612ab Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Dec 2012 12:15:05 +0000 Subject: NFS: Provide stub nfs_fscache_wait_on_invalidate() for when CONFIG_NFS_FSCACHE=n Provide a stub nfs_fscache_wait_on_invalidate() function for when CONFIG_NFS_FSCACHE=n lest the following error appear: fs/nfs/inode.c: In function 'nfs_invalidate_mapping': fs/nfs/inode.c:887:2: error: implicit declaration of function 'nfs_fscache_wait_on_invalidate' [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Reported-by: kbuild test robot Reported-by: Vineet Gupta Reported-by: Borislav Petkov Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/nfs/fscache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 277b0278289..4ecb76652eb 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -222,6 +222,7 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, static inline void nfs_fscache_invalidate(struct inode *inode) {} +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {} static inline const char *nfs_server_fscache_state(struct nfs_server *server) { -- cgit v1.2.3 From c4271c6e37c32105492cbbed35f45330cb327b94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 21 Dec 2012 11:02:32 -0500 Subject: NFS: Kill fscache warnings when mounting without -ofsc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fscache code will currently bleat a "non-unique superblock keys" warning even if the user is mounting without the 'fsc' option. There should be no reason to even initialise the superblock cache cookie unless we're planning on using fscache for something, so ensure that we check for the NFS_OPTION_FSCACHE flag before calling into the fscache code. Reported-by: Paweł Sikora Signed-off-by: Trond Myklebust Cc: David Howells Acked-by: David Howells Signed-off-by: Linus Torvalds --- fs/nfs/super.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index aa5315bb366..c25cadf8f8c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2375,19 +2375,30 @@ static void nfs_get_cache_cookie(struct super_block *sb, struct nfs_parsed_mount_data *parsed, struct nfs_clone_mount *cloned) { + struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; int ulen = 0; - if (parsed && parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); + nfss->fscache_key = NULL; + nfss->fscache = NULL; + + if (parsed) { + if (!(parsed->options & NFS_OPTION_FSCACHE)) + return; + if (parsed->fscache_uniq) { + uniq = parsed->fscache_uniq; + ulen = strlen(parsed->fscache_uniq); + } } else if (cloned) { struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!(mnt_s->options & NFS_OPTION_FSCACHE)) + return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; }; - } + } else + return; nfs_fscache_get_super_cookie(sb, uniq, ulen); } -- cgit v1.2.3 From f8d9a897d4384b77f13781ea813156568f68b83e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 3 Jan 2013 16:42:29 -0500 Subject: NFS: Fix access to suid/sgid executables nfs_open_permission_mask() should only check MAY_EXEC for files that are opened with __FMODE_EXEC. Also fix NFSv4 access-in-open path in a similar way -- openflags must be used because fmode will not always have FMODE_EXEC set. This patch fixes https://bugzilla.kernel.org/show_bug.cgi?id=49101 Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/dir.c | 16 ++++++++++------ fs/nfs/nfs4proc.c | 18 +++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32e6c53520e..1b2d7eb9379 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2153,12 +2153,16 @@ static int nfs_open_permission_mask(int openflags) { int mask = 0; - if ((openflags & O_ACCMODE) != O_WRONLY) - mask |= MAY_READ; - if ((openflags & O_ACCMODE) != O_RDONLY) - mask |= MAY_WRITE; - if (openflags & __FMODE_EXEC) - mask |= MAY_EXEC; + if (openflags & __FMODE_EXEC) { + /* ONLY check exec rights */ + mask = MAY_EXEC; + } else { + if ((openflags & O_ACCMODE) != O_WRONLY) + mask |= MAY_READ; + if ((openflags & O_ACCMODE) != O_RDONLY) + mask |= MAY_WRITE; + } + return mask; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5d864fb3657..cf747ef8665 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1626,7 +1626,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) static int nfs4_opendata_access(struct rpc_cred *cred, struct nfs4_opendata *opendata, - struct nfs4_state *state, fmode_t fmode) + struct nfs4_state *state, fmode_t fmode, + int openflags) { struct nfs_access_entry cache; u32 mask; @@ -1638,11 +1639,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred, mask = 0; /* don't check MAY_WRITE - a newly created file may not have - * write mode bits, but POSIX allows the creating process to write */ - if (fmode & FMODE_READ) - mask |= MAY_READ; - if (fmode & FMODE_EXEC) - mask |= MAY_EXEC; + * write mode bits, but POSIX allows the creating process to write. + * use openflags to check for exec, because fmode won't + * always have FMODE_EXEC set when file open for exec. */ + if (openflags & __FMODE_EXEC) { + /* ONLY check for exec rights */ + mask = MAY_EXEC; + } else if (fmode & FMODE_READ) + mask = MAY_READ; cache.cred = cred; cache.jiffies = jiffies; @@ -1896,7 +1900,7 @@ static int _nfs4_do_open(struct inode *dir, if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); - status = nfs4_opendata_access(cred, opendata, state, fmode); + status = nfs4_opendata_access(cred, opendata, state, fmode, flags); if (status != 0) goto err_opendata_put; -- cgit v1.2.3 From 39e88fcfb1d5c6c4b1ff76ca2ab76cf449b850e8 Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Fri, 4 Jan 2013 20:19:49 +0800 Subject: pnfs: Increase the refcount when LAYOUTGET fails the first time The layout will be set unusable if LAYOUTGET fails. Is it reasonable to increase the refcount iff LAYOUTGET fails the first time? Signed-off-by: Yanchuan Nian Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e7165d91536..d00260b0810 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -254,7 +254,7 @@ static void pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; - if (test_and_set_bit(fail_bit, &lo->plh_flags)) + if (!test_and_set_bit(fail_bit, &lo->plh_flags)) atomic_inc(&lo->plh_refcount); } -- cgit v1.2.3 From e25fbe380c4e3c09afa98bcdcd9d3921443adab8 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 4 Jan 2013 03:22:57 -0500 Subject: nfs: fix null checking in nfs_get_option_str() The following null pointer check is broken. *option = match_strdup(args); return !option; The pointer `option' must be non-null, and thus `!option' is always false. Use `!*option' instead. The bug was introduced in commit c5cb09b6f8 ("Cleanup: Factor out some cut-and-paste code."). Signed-off-by: Xi Wang Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c25cadf8f8c..2e7e8c878e5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1152,7 +1152,7 @@ static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); *option = match_strdup(args); - return !option; + return !*option; } static int nfs_get_option_ul(substring_t args[], unsigned long *option) -- cgit v1.2.3 From 6db6dd7d3fd8f7c765dabc376493d6791ab28bd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Jan 2013 12:47:04 -0500 Subject: NFS: Ensure that we free the rpc_task after read and write cleanups are done This patch ensures that we free the rpc_task after the cleanup callbacks are done in order to avoid a deadlock problem that can be triggered if the callback needs to wait for another workqueue item to complete. Signed-off-by: Trond Myklebust Cc: Weston Andros Adamson Cc: Tejun Heo Cc: Bruce Fields Cc: stable@vger.kernel.org [>= 3.5] --- fs/nfs/read.c | 10 +++++++--- fs/nfs/write.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b6bdb18e892..a5e5d9899d5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata) put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) kfree(rdata->pages.pagevec); - if (rdata != &read_header->rpc_data) - kfree(rdata); - else + if (rdata == &read_header->rpc_data) { rdata->header = NULL; + rdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(rdata); } EXPORT_SYMBOL_GPL(nfs_readdata_release); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b673be31590..c483cc50b82 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata) put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) kfree(wdata->pages.pagevec); - if (wdata != &write_header->rpc_data) - kfree(wdata); - else + if (wdata == &write_header->rpc_data) { wdata->header = NULL; + wdata = NULL; + } if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(wdata); } EXPORT_SYMBOL_GPL(nfs_writedata_release); -- cgit v1.2.3 From ecf0eb9edbb607d74f74b73c14af8b43f3729528 Mon Sep 17 00:00:00 2001 From: Nickolai Zeldovich Date: Sat, 5 Jan 2013 14:19:51 -0500 Subject: nfs: avoid dereferencing null pointer in initiate_bulk_draining Fix an inverted null pointer check in initiate_bulk_draining(). Signed-off-by: Nickolai Zeldovich Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.7] --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index c89b26bc975..264d1aa935f 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -206,7 +206,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, list_for_each_entry(lo, &server->layouts, plh_layouts) { ino = igrab(lo->plh_inode); - if (ino) + if (!ino) continue; spin_lock(&ino->i_lock); /* Is this layout in the process of being freed? */ -- cgit v1.2.3 From dee972b967ae111ad5705733de17a3bfc4632311 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Jan 2013 15:05:44 -0500 Subject: NFS: Fix error reporting in nfs_xdev_mount Currently, nfs_xdev_mount converts all errors from clone_server() to ENOMEM, which can then leak to userspace (for instance to 'mount'). Fix that. Also ensure that if nfs_fs_mount_common() returns an error, we don't dprintk(0)... The regression originated in commit 3d176e3fe4f6dc379b252bf43e2e146a8f7caf01 (NFS: Use nfs_fs_mount_common() for xdev mounts) Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org [>= 3.5] --- fs/nfs/super.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2e7e8c878e5..b056b162872 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2589,27 +2589,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; - int error; - dprintk("--> nfs_xdev_mount_common()\n"); + dprintk("--> nfs_xdev_mount()\n"); mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err; - } - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); - dprintk("<-- nfs_xdev_mount_common() = 0\n"); -out: - return mntroot; + if (IS_ERR(server)) + mntroot = ERR_CAST(server); + else + mntroot = nfs_fs_mount_common(server, flags, + dev_name, &mount_info, nfs_mod); -out_err: - dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); - goto out; + dprintk("<-- nfs_xdev_mount() = %ld\n", + IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); + return mntroot; } #if IS_ENABLED(CONFIG_NFS_V4) -- cgit v1.2.3 From 4ae19c2dd713edb7b8ad3d4ab9d234ed5dcb6b98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jan 2013 22:41:53 -0500 Subject: NFSv4: Fix NFSv4 reference counting for trunked sessions The reference counting in nfs4_init_client assumes wongly that it is safe for nfs4_discover_server_trunking() to return a pointer to a nfs_client prior to bumping the reference count. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Ben Greear Cc: stable@vger.kernel.org [>=3.7] --- fs/nfs/nfs4client.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index acc34726812..65a290a7306 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; + nfs_put_client(clp); if (clp != old) { clp->cl_preserve_clid = true; - nfs_put_client(clp); clp = old; - atomic_inc(&clp->cl_count); } return clp; @@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new, .clientid = new->cl_clientid, .confirm = new->cl_confirm, }; - int status; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { @@ -332,28 +331,28 @@ int nfs40_walk_client_list(struct nfs_client *new, if (prev) nfs_put_client(prev); + prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); - if (status == 0) { + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + break; + case 0: nfs4_swap_callback_idents(pos, new); - nfs_put_client(pos); + prev = NULL; *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - return 0; - } - if (status != -NFS4ERR_STALE_CLIENTID) { - nfs_put_client(pos); - dprintk("NFS: <-- %s status = %d, no result\n", - __func__, status); - return status; + default: + goto out; } spin_lock(&nn->nfs_client_lock); - prev = pos; } + spin_unlock(&nn->nfs_client_lock); +out: /* * No matching nfs_client found. This should be impossible, * because the new nfs_client has already been added to @@ -363,9 +362,8 @@ int nfs40_walk_client_list(struct nfs_client *new, */ if (prev) nfs_put_client(prev); - spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #ifdef CONFIG_NFS_V4_1 @@ -473,6 +471,7 @@ int nfs41_walk_client_list(struct nfs_client *new, if (!nfs4_match_serverowners(pos, new)) continue; + atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); -- cgit v1.2.3 From 202c312dba7d95b96493b412c606163a0cd83984 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jan 2013 22:56:23 -0500 Subject: NFSv4: Fix NFSv4 trunking discovery If walking the list in nfs4[01]_walk_client_list fails, then the most likely explanation is that the server dropped the clientid before we actually managed to confirm it. As long as our nfs_client is the very last one in the list to be tested, the caller can be assured that this is the case when the final return value is NFS4ERR_STALE_CLIENTID. Reported-by: Ben Greear Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: stable@vger.kernel.org [>=3.7] Tested-by: Ben Greear --- fs/nfs/nfs4client.c | 26 +++++++------------------- fs/nfs/nfs4state.c | 8 ++------ 2 files changed, 9 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 65a290a7306..2f21f17fb16 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -352,14 +352,8 @@ int nfs40_walk_client_list(struct nfs_client *new, } spin_unlock(&nn->nfs_client_lock); + /* No match found. The server lost our clientid */ out: - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ if (prev) nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); @@ -430,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new, { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); struct nfs_client *pos, *n, *prev = NULL; - int error; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { @@ -446,8 +440,8 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - error = nfs_wait_client_init_complete(pos); - if (error < 0) { + status = nfs_wait_client_init_complete(pos); + if (status < 0) { nfs_put_client(pos); spin_lock(&nn->nfs_client_lock); continue; @@ -480,16 +474,10 @@ int nfs41_walk_client_list(struct nfs_client *new, return 0; } - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ + /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9448c579d41..f72561ca689 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, clp->cl_confirm = clid.confirm; status = nfs40_walk_client_list(clp, result, cred); - switch (status) { - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - case 0: + if (status == 0) { /* Sustain the lease, even if it's empty. If the clientid4 * goes stale it's of no use for trunking discovery. */ nfs4_schedule_state_renewal(*result); - break; } - out: return status; } @@ -1863,6 +1858,7 @@ again: case -ETIMEDOUT: case -EAGAIN: ssleep(1); + case -NFS4ERR_STALE_CLIENTID: dprintk("NFS: %s after status %d, retrying\n", __func__, status); goto again; -- cgit v1.2.3 From 65436ec0c8e344d9b23302b686e418f2a7b7cf7b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jan 2013 23:01:43 -0500 Subject: NFSv4.1: Ensure that nfs41_walk_client_list() does start lease recovery We do need to start the lease recovery thread prior to waiting for the client initialisation to complete in NFSv4.1. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Ben Greear Cc: stable@vger.kernel.org [>=3.7] --- fs/nfs/nfs4client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2f21f17fb16..2e9779b58b7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -440,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; + nfs4_schedule_lease_recovery(pos); status = nfs_wait_client_init_complete(pos); if (status < 0) { nfs_put_client(pos); spin_lock(&nn->nfs_client_lock); continue; } - + status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); + if (status < 0) + continue; } if (pos->rpc_ops != new->rpc_ops) -- cgit v1.2.3 From ab225417825963b6dc66be7ea80f94ac1378dfdf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Jan 2013 00:17:06 -0500 Subject: NFS: Don't silently fail setattr() requests on mountpoints Ensure that any setattr and getattr requests for junctions and/or mountpoints are sent to the server. Ever since commit 0ec26fd0698 (vfs: automount should ignore LOOKUP_FOLLOW), we have silently dropped any setattr requests to a server-side mountpoint. For referrals, we have silently dropped both getattr and setattr requests. This patch restores the original behaviour for setattr on mountpoints, and tries to do the same for referrals, provided that we have a filehandle... Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/namespace.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index dd057bc6b65..fc8dc20fdeb 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -177,11 +177,31 @@ out_nofree: return mnt; } +static int +nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_getattr(mnt, dentry, stat); + generic_fillattr(dentry->d_inode, stat); + return 0; +} + +static int +nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_setattr(dentry, attr); + return -EACCES; +} + const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, + .setattr = nfs_setattr, }; const struct inode_operations nfs_referral_inode_operations = { + .getattr = nfs_namespace_getattr, + .setattr = nfs_namespace_setattr, }; static void nfs_expire_automounts(struct work_struct *work) -- cgit v1.2.3 From c489ee290bdbbace6bb63ebe6ebd4dd605819495 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jan 2013 13:04:10 -0500 Subject: NFSv4.1: Handle NFS4ERR_DELAY when resetting the NFSv4.1 session NFS4ERR_DELAY is a legal reply when we call DESTROY_SESSION. It usually means that the server is busy handling an unfinished RPC request. Just sleep for a second and then retry. We also need to be able to handle the NFS4ERR_BACK_CHAN_BUSY return value. If the NFS server has outstanding callbacks, we just want to similarly sleep & retry. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4state.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f72561ca689..e61f68d5ef2 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2018,8 +2018,18 @@ static int nfs4_reset_session(struct nfs_client *clp) nfs4_begin_drain_session(clp); cred = nfs4_get_exchange_id_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); - if (status && status != -NFS4ERR_BADSESSION && - status != -NFS4ERR_DEADSESSION) { + switch (status) { + case 0: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + break; + case -NFS4ERR_BACK_CHAN_BUSY: + case -NFS4ERR_DELAY: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + status = 0; + ssleep(1); + goto out; + default: status = nfs4_recovery_handle_error(clp, status); goto out; } -- cgit v1.2.3