diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index 092fad92a3f..01c2db76979 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -39,21 +39,10 @@ interoperability problems with future clients. Known issues: from a linux client are possible, but we aren't really conformant with the spec (for example, we don't use kerberos on the backchannel correctly). - - Incomplete backchannel support: incomplete backchannel gss - support and no support for BACKCHANNEL_CTL mean that - callbacks (hence delegations and layouts) may not be - available and clients confused by the incomplete - implementation may fail. - We do not support SSV, which provides security for shared client-server state (thus preventing unauthorized tampering with locks and opens, for example). It is mandatory for servers to support this, though no clients use it yet. - - Mandatory operations which we do not support, such as - DESTROY_CLIENTID, are not currently used by clients, but will be - (and the spec recommends their uses in common cases), and - clients should not be expected to know how to recover from the - case where they are not supported. This will eventually cause - interoperability failures. In addition, some limitations are inherited from the current NFSv4 implementation: @@ -89,7 +78,7 @@ Operations | | MNI | or OPT) | | +----------------------+------------+--------------+----------------+ | ACCESS | REQ | | Section 18.1 | -NS | BACKCHANNEL_CTL | REQ | | Section 18.33 | +I | BACKCHANNEL_CTL | REQ | | Section 18.33 | I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | | CLOSE | REQ | | Section 18.2 | | COMMIT | REQ | | Section 18.3 | @@ -99,7 +88,7 @@ NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | | DELEGRETURN | OPT | FDELG, | Section 18.6 | | | | DDELG, pNFS | | | | | (REQ) | | -NS | DESTROY_CLIENTID | REQ | | Section 18.50 | +I | DESTROY_CLIENTID | REQ | | Section 18.50 | I | DESTROY_SESSION | REQ | | Section 18.37 | I | EXCHANGE_ID | REQ | | Section 18.35 | I | FREE_STATEID | REQ | | Section 18.38 | @@ -192,7 +181,6 @@ EXCHANGE_ID: CREATE_SESSION: * backchannel attributes are ignored -* backchannel security parameters are ignored SEQUENCE: * no support for dynamic slot table renegotiation (optional) @@ -202,7 +190,7 @@ Nonstandard compound limitations: ca_maxrequestsize request and a ca_maxresponsesize reply, so we may fail to live up to the promise we made in CREATE_SESSION fore channel negotiation. -* No more than one IO operation (read, write, readdir) allowed per - compound. +* No more than one read-like operation allowed per compound; encoding + replies that cross page boundaries (except for read data) not handled. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 606bb074c50..5df4bb4aab1 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid, if (parent && (len < 4)) { *max_len = 4; - return 255; + return FILEID_INVALID; } else if (len < 2) { *max_len = 2; - return 255; + return FILEID_INVALID; } len = 2; diff --git a/fs/fhandle.c b/fs/fhandle.c index cccdc874bb5..999ff5c3cab 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path, handle_bytes = handle_dwords * sizeof(u32); handle->handle_bytes = handle_bytes; if ((handle->handle_bytes > f_handle.handle_bytes) || - (retval == 255) || (retval == -ENOSPC)) { + (retval == FILEID_INVALID) || (retval == -ENOSPC)) { /* As per old exportfs_encode_fh documentation * we could return ENOSPC to indicate overflow * But file system returned 255 always. So handle diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index e6c38159622..e761ee95617 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -8,61 +8,144 @@ #include #include #include +#include +#include +#include #include "state.h" -#include "fault_inject.h" +#include "netns.h" struct nfsd_fault_inject_op { char *file; - void (*func)(u64); + u64 (*forget)(struct nfs4_client *, u64); + u64 (*print)(struct nfs4_client *, u64); }; static struct nfsd_fault_inject_op inject_ops[] = { { .file = "forget_clients", - .func = nfsd_forget_clients, + .forget = nfsd_forget_client, + .print = nfsd_print_client, }, { .file = "forget_locks", - .func = nfsd_forget_locks, + .forget = nfsd_forget_client_locks, + .print = nfsd_print_client_locks, }, { .file = "forget_openowners", - .func = nfsd_forget_openowners, + .forget = nfsd_forget_client_openowners, + .print = nfsd_print_client_openowners, }, { .file = "forget_delegations", - .func = nfsd_forget_delegations, + .forget = nfsd_forget_client_delegations, + .print = nfsd_print_client_delegations, }, { .file = "recall_delegations", - .func = nfsd_recall_delegations, + .forget = nfsd_recall_client_delegations, + .print = nfsd_print_client_delegations, }, }; static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static int nfsd_inject_set(void *op_ptr, u64 val) +static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) { - struct nfsd_fault_inject_op *op = op_ptr; + u64 count = 0; if (val == 0) printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); else printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); - op->func(val); - return 0; + nfs4_lock_state(); + count = nfsd_for_n_state(val, op->forget); + nfs4_unlock_state(); + printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); } -static int nfsd_inject_get(void *data, u64 *val) +static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, + size_t addr_size) { - *val = 0; - return 0; + char buf[INET6_ADDRSTRLEN]; + struct nfs4_client *clp; + u64 count; + + nfs4_lock_state(); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + count = op->forget(clp, 0); + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); + } + nfs4_unlock_state(); } -DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); +static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) +{ + nfs4_lock_state(); + *val = nfsd_for_n_state(0, op->print); + nfs4_unlock_state(); +} + +static ssize_t fault_inject_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + static u64 val; + char read_buf[25]; + size_t size, ret; + loff_t pos = *ppos; + + if (!pos) + nfsd_inject_get(file->f_dentry->d_inode->i_private, &val); + size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); + + if (pos < 0) + return -EINVAL; + if (pos >= size || !len) + return 0; + if (len > size - pos) + len = size - pos; + ret = copy_to_user(buf, read_buf + pos, len); + if (ret == len) + return -EFAULT; + len -= ret; + *ppos = pos + len; + return len; +} + +static ssize_t fault_inject_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + char write_buf[INET6_ADDRSTRLEN]; + size_t size = min(sizeof(write_buf) - 1, len); + struct net *net = current->nsproxy->net_ns; + struct sockaddr_storage sa; + u64 val; + + if (copy_from_user(write_buf, buf, size)) + return -EFAULT; + write_buf[size] = '\0'; + + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); + if (size > 0) + nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size); + else { + val = simple_strtoll(write_buf, NULL, 0); + nfsd_inject_set(file->f_dentry->d_inode->i_private, val); + } + return len; /* on success, claim we got the whole input */ +} + +static const struct file_operations fops_nfsd = { + .owner = THIS_MODULE, + .read = fault_inject_read, + .write = fault_inject_write, +}; void nfsd_fault_inject_cleanup(void) { diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h deleted file mode 100644 index 90bd0570956..00000000000 --- a/fs/nfsd/fault_inject.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2011 Bryan Schumaker - * - * Function definitions for fault injection - */ - -#ifndef LINUX_NFSD_FAULT_INJECT_H -#define LINUX_NFSD_FAULT_INJECT_H - -#ifdef CONFIG_NFSD_FAULT_INJECTION -int nfsd_fault_inject_init(void); -void nfsd_fault_inject_cleanup(void); -void nfsd_forget_clients(u64); -void nfsd_forget_locks(u64); -void nfsd_forget_openowners(u64); -void nfsd_forget_delegations(u64); -void nfsd_recall_delegations(u64); -#else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline int nfsd_fault_inject_init(void) { return 0; } -static inline void nfsd_fault_inject_cleanup(void) {} -static inline void nfsd_forget_clients(u64 num) {} -static inline void nfsd_forget_locks(u64 num) {} -static inline void nfsd_forget_openowners(u64 num) {} -static inline void nfsd_forget_delegations(u64 num) {} -static inline void nfsd_recall_delegations(u64 num) {} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - -#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 65c2431ea32..1051bebff1b 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -24,7 +24,18 @@ #include #include +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) + +#define SESSION_HASH_SIZE 512 + struct cld_net; +struct nfsd4_client_tracking_ops; struct nfsd_net { struct cld_net *cld_net; @@ -38,7 +49,62 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; time_t boot_time; + + /* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_name_tree hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed + * setclientid info. + */ + struct list_head *reclaim_str_hashtbl; + int reclaim_str_hashtbl_size; + struct list_head *conf_id_hashtbl; + struct rb_root conf_name_tree; + struct list_head *unconf_id_hashtbl; + struct rb_root unconf_name_tree; + struct list_head *ownerstr_hashtbl; + struct list_head *lockowner_ino_hashtbl; + struct list_head *sessionid_hashtbl; + /* + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + * + * All of the above fields are protected by the client_mutex. + */ + struct list_head client_lru; + struct list_head close_lru; + + struct delayed_work laundromat_work; + + /* client_lock protects the client lru list and session hash table */ + spinlock_t client_lock; + + struct file *rec_file; + bool in_grace; + struct nfsd4_client_tracking_ops *client_tracking_ops; + + time_t nfsd4_lease; + time_t nfsd4_grace; + + bool nfsd_net_up; + + /* + * Time of server startup + */ + struct timeval nfssvc_boot; + + struct svc_serv *nfsd_serv; }; +/* Simple check to find out if a given net was properly initialized */ +#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) + extern int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index b314888825d..9170861c804 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -253,7 +253,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index a596e9d987e..9cbc1a841f8 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -184,7 +184,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 97d90d1c860..1fc02dfdc5c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -460,7 +460,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, __be32 nfserr; int count = 0; loff_t offset; - int i; + struct page **p; caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", @@ -484,8 +484,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - for (i=1; irq_resused ; i++) { - page_addr = page_address(rqstp->rq_respages[i]); + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); if (((caddr_t)resp->buffer >= page_addr) && ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 43f46cd9ede..324c0baf7cd 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -7,8 +7,10 @@ */ #include +#include #include "xdr3.h" #include "auth.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -323,7 +325,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readargs *args) { unsigned int len; - int v,pn; + int v; u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) @@ -338,8 +340,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, /* set up the kvec */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -461,8 +464,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, len = ntohl(*p++); if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) return 0; - args->tname = new = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->tname = new = page_address(*(rqstp->rq_next_page++)); args->tlen = len; /* first copy and check from the first page */ old = (char*)p; @@ -533,8 +535,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -565,8 +566,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -575,7 +575,7 @@ int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - int len, pn; + int len; u32 max_blocksize = svc_max_payload(rqstp); if (!(p = decode_fh(p, &args->fh))) @@ -590,9 +590,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->count = len; while (len > 0) { - pn = rqstp->rq_resused++; + struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) - args->buffer = page_address(rqstp->rq_respages[pn]); + args->buffer = page_address(p); len -= PAGE_SIZE; } @@ -720,12 +720,14 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } @@ -876,7 +878,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, common); __be32 *p = cd->buffer; caddr_t curr_page_addr = NULL; - int pn; /* current page number */ + struct page ** page; int slen; /* string (name) length */ int elen; /* estimated entry length in words */ int num_entry_words = 0; /* actual number of words */ @@ -913,8 +915,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } /* determine which page in rq_respages[] we are currently filling */ - for (pn=1; pn < cd->rqstp->rq_resused; pn++) { - curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + for (page = cd->rqstp->rq_respages + 1; + page < cd->rqstp->rq_next_page; page++) { + curr_page_addr = page_address(*page); if (((caddr_t)cd->buffer >= curr_page_addr) && ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) @@ -929,14 +932,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, if (plus) p = encode_entryplus_baggage(cd, p, name, namlen); num_entry_words = p - cd->buffer; - } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + } else if (*(page+1) != NULL) { /* temporarily encode entry into next page, then move back to * current and next page in rq_respages[] */ __be32 *p1, *tmp; int len1, len2; /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + p1 = tmp = page_address(*(page+1)); p1 = encode_entry_baggage(cd, p1, name, namlen, ino); @@ -1082,11 +1085,13 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index bdf29c96e4c..99bc85ff021 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -36,6 +36,7 @@ #include #include "nfsd.h" #include "state.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -625,20 +626,46 @@ static const struct rpc_program cb_program = { .pipe_dir_name = "nfsd4_cb", }; -static int max_cb_time(void) +static int max_cb_time(struct net *net) { - return max(nfsd4_lease/10, (time_t)1) * HZ; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) +{ + if (callback_cred) + return 0; + callback_cred = rpc_lookup_machine_cred("nfs"); + if (!callback_cred) + return -ENOMEM; + return 0; +} + +static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) +{ + if (clp->cl_minorversion == 0) { + return get_rpccred(callback_cred); + } else { + struct rpc_auth *auth = client->cl_auth; + struct auth_cred acred = {}; + + acred.uid = ses->se_cb_sec.uid; + acred.gid = ses->se_cb_sec.gid; + return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + } +} static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { struct rpc_timeout timeparms = { - .to_initval = max_cb_time(), + .to_initval = max_cb_time(clp->net), .to_retries = 0, }; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .address = (struct sockaddr *) &conn->cb_addr, .addrsize = conn->cb_addrlen, .saddress = (struct sockaddr *) &conn->cb_saddr, @@ -648,6 +675,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; + struct rpc_cred *cred; if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && @@ -666,7 +694,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; - args.authflavor = RPC_AUTH_UNIX; + args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ client = rpc_create(&args); @@ -675,9 +703,14 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c PTR_ERR(client)); return PTR_ERR(client); } + cred = get_backchannel_cred(clp, client, ses); + if (IS_ERR(cred)) { + rpc_shutdown_client(client); + return PTR_ERR(cred); + } clp->cl_cb_client = client; + clp->cl_cb_cred = cred; return 0; - } static void warn_no_callback_path(struct nfs4_client *clp, int reason) @@ -714,18 +747,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *callback_cred; - -int set_callback_cred(void) -{ - if (callback_cred) - return 0; - callback_cred = rpc_lookup_machine_cred("nfs"); - if (!callback_cred) - return -ENOMEM; - return 0; -} - static struct workqueue_struct *callback_wq; static void run_nfsd4_cb(struct nfsd4_callback *cb) @@ -743,7 +764,6 @@ static void do_probe_callback(struct nfs4_client *clp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; cb->cb_msg.rpc_argp = NULL; cb->cb_msg.rpc_resp = NULL; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_probe_ops; @@ -962,6 +982,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) if (clp->cl_cb_client) { rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; + put_rpccred(clp->cl_cb_cred); + clp->cl_cb_cred = NULL; } if (clp->cl_cb_conn.cb_xprt) { svc_xprt_put(clp->cl_cb_conn.cb_xprt); @@ -995,7 +1017,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -void nfsd4_do_callback_rpc(struct work_struct *w) +static void nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; @@ -1010,10 +1032,16 @@ void nfsd4_do_callback_rpc(struct work_struct *w) nfsd4_release_cb(cb); return; } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops, cb); } +void nfsd4_init_callback(struct nfsd4_callback *cb) +{ + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); +} + void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; @@ -1025,7 +1053,6 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_recall_ops; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6c9a4b291db..9d1c5dba2bb 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -40,6 +40,7 @@ #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -194,6 +195,7 @@ static __be32 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct svc_fh *resfh; + int accmode; __be32 status; resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); @@ -253,9 +255,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* set reply cache */ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, &resfh->fh_handle); - if (!open->op_created) - status = do_open_permission(rqstp, resfh, open, - NFSD_MAY_NOP); + accmode = NFSD_MAY_NOP; + if (open->op_created) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); fh_dup2(current_fh, resfh); out: @@ -304,6 +307,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; struct nfsd4_compoundres *resp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, @@ -331,7 +336,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; - status = nfsd4_process_open1(&resp->cstate, open); + status = nfsd4_process_open1(&resp->cstate, open, nn); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); @@ -354,10 +359,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -370,7 +375,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - status = nfs4_check_open_reclaim(&open->op_clientid, cstate->minorversion); + status = nfs4_check_open_reclaim(&open->op_clientid, + cstate->minorversion, + nn); if (status) goto out; case NFS4_OPEN_CLAIM_FH: @@ -490,12 +497,13 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } -static void gen_boot_verifier(nfs4_verifier *verifier) +static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) { __be32 verf[2]; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nfssvc_boot.tv_sec; - verf[1] = (__be32)nfssvc_boot.tv_usec; + verf[0] = (__be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -503,7 +511,7 @@ static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - gen_boot_verifier(&commit->co_verf); + gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); } @@ -684,6 +692,17 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the + * following compound. + * + * To ensure proper ordering, we therefore turn off zero copy if + * the client wants us to do more in this compound: + */ + if (!nfsd4_last_compound_op(rqstp)) + rqstp->rq_splice_ok = false; + nfs4_lock_state(); /* check stateid */ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), @@ -876,6 +895,24 @@ out: return status; } +static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) +{ + int i = 1; + int buflen = write->wr_buflen; + + vec[0].iov_base = write->wr_head.iov_base; + vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); + buflen -= vec[0].iov_len; + + while (buflen) { + vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); + vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); + buflen -= vec[i].iov_len; + i++; + } + return i; +} + static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_write *write) @@ -884,6 +921,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file *filp = NULL; __be32 status = nfs_ok; unsigned long cnt; + int nvecs; /* no need to check permission - this will be done in nfsd_write() */ @@ -904,10 +942,13 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; - gen_boot_verifier(&write->wr_verifier); + gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); + + nvecs = fill_in_write_vector(rqstp->rq_vec, write); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, write->wr_vlen, + write->wr_offset, rqstp->rq_vec, nvecs, &cnt, &write->wr_how_written); if (filp) fput(filp); @@ -1666,6 +1707,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_EXCHANGE_ID", .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, }, + [OP_BACKCHANNEL_CTL] = { + .op_func = (nfsd4op_func)nfsd4_backchannel_ctl, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_BACKCHANNEL_CTL", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_BIND_CONN_TO_SESSION] = { .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP @@ -1719,6 +1766,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_free_stateid, .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, .op_name = "OP_FREE_STATEID", + .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 43295d45cc2..ba6fdd4a045 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -58,13 +58,11 @@ struct nfsd4_client_tracking_ops { void (*create)(struct nfs4_client *); void (*remove)(struct nfs4_client *); int (*check)(struct nfs4_client *); - void (*grace_done)(struct net *, time_t); + void (*grace_done)(struct nfsd_net *, time_t); }; /* Globals */ -static struct file *rec_file; static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; -static struct nfsd4_client_tracking_ops *client_tracking_ops; static int nfs4_save_creds(const struct cred **original_creds) @@ -102,33 +100,39 @@ md5_to_hex(char *out, char *md5) *out = '\0'; } -__be32 -nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +static int +nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; struct hash_desc desc; struct scatterlist sg; - __be32 status = nfserr_jukebox; + int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) + if (IS_ERR(desc.tfm)) { + status = PTR_ERR(desc.tfm); goto out_no_tfm; + } + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) + if (cksum.data == NULL) { + status = -ENOMEM; goto out; + } sg_init_one(&sg, clname->data, clname->len); - if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) + status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); + if (status) goto out; md5_to_hex(dname, cksum.data); - status = nfs_ok; + status = 0; out: kfree(cksum.data); crypto_free_hash(desc.tfm); @@ -136,29 +140,61 @@ out_no_tfm: return status; } +/* + * If we had an error generating the recdir name for the legacy tracker + * then warn the admin. If the error doesn't appear to be transient, + * then disable recovery tracking. + */ +static void +legacy_recdir_name_error(int error) +{ + printk(KERN_ERR "NFSD: unable to generate recoverydir " + "name (%d).\n", error); + + /* + * if the algorithm just doesn't exist, then disable the recovery + * tracker altogether. The crypto libs will generally return this if + * FIPS is enabled as well. + */ + if (error == -ENOENT) { + printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " + "Reboot recovery will not function correctly!\n"); + + /* the argument is ignored by the legacy exit function */ + nfsd4_client_tracking_exit(NULL); + } +} + static void nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; - char *dname = clp->cl_recdir; + char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; + struct nfs4_client_reclaim *crp; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - if (!rec_file) + if (!nn->rec_file) return; + + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(status); + status = nfs4_save_creds(&original_cred); if (status < 0) return; - status = mnt_want_write_file(rec_file); + status = mnt_want_write_file(nn->rec_file); if (status) return; - dir = rec_file->f_path.dentry; + dir = nn->rec_file->f_path.dentry; /* lock the parent */ mutex_lock(&dir->d_inode->i_mutex); @@ -182,18 +218,24 @@ out_put: dput(dentry); out_unlock: mutex_unlock(&dir->d_inode->i_mutex); - if (status == 0) - vfs_fsync(rec_file, 0); - else + if (status == 0) { + if (nn->in_grace) { + crp = nfs4_client_to_reclaim(dname, nn); + if (crp) + crp->cr_clp = clp; + } + vfs_fsync(nn->rec_file, 0); + } else { printk(KERN_ERR "NFSD: failed to write recovery record" " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); - mnt_drop_write_file(rec_file); + } + mnt_drop_write_file(nn->rec_file); nfs4_reset_creds(original_cred); } -typedef int (recdir_func)(struct dentry *, struct dentry *); +typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); struct name_list { char name[HEXDIR_LEN]; @@ -219,10 +261,10 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, } static int -nfsd4_list_rec_dir(recdir_func *f) +nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; - struct dentry *dir = rec_file->f_path.dentry; + struct dentry *dir = nn->rec_file->f_path.dentry; LIST_HEAD(names); int status; @@ -230,13 +272,13 @@ nfsd4_list_rec_dir(recdir_func *f) if (status < 0) return status; - status = vfs_llseek(rec_file, 0, SEEK_SET); + status = vfs_llseek(nn->rec_file, 0, SEEK_SET); if (status < 0) { nfs4_reset_creds(original_cred); return status; } - status = vfs_readdir(rec_file, nfsd4_build_namelist, &names); + status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { struct name_list *entry; @@ -248,7 +290,7 @@ nfsd4_list_rec_dir(recdir_func *f) status = PTR_ERR(dentry); break; } - status = f(dir, dentry); + status = f(dir, dentry, nn); dput(dentry); } list_del(&entry->list); @@ -260,14 +302,14 @@ nfsd4_list_rec_dir(recdir_func *f) } static int -nfsd4_unlink_clid_dir(char *name, int namlen) +nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - dir = rec_file->f_path.dentry; + dir = nn->rec_file->f_path.dentry; mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { @@ -289,37 +331,52 @@ static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; + struct nfs4_client_reclaim *crp; + char dname[HEXDIR_LEN]; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - status = mnt_want_write_file(rec_file); + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(status); + + status = mnt_want_write_file(nn->rec_file); if (status) goto out; clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); status = nfs4_save_creds(&original_cred); if (status < 0) - goto out; + goto out_drop_write; - status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); + status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); nfs4_reset_creds(original_cred); - if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + if (status == 0) { + vfs_fsync(nn->rec_file, 0); + if (nn->in_grace) { + /* remove reclaim record */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) + nfs4_remove_reclaim_record(crp, nn); + } + } +out_drop_write: + mnt_drop_write_file(nn->rec_file); out: if (status) printk("NFSD: Failed to remove expired client state directory" - " %.*s\n", HEXDIR_LEN, clp->cl_recdir); + " %.*s\n", HEXDIR_LEN, dname); } static int -purge_old(struct dentry *parent, struct dentry *child) +purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; - if (nfs4_has_reclaimed_state(child->d_name.name, false)) + if (nfs4_has_reclaimed_state(child->d_name.name, nn)) return 0; status = vfs_rmdir(parent->d_inode, child); @@ -331,27 +388,29 @@ purge_old(struct dentry *parent, struct dentry *child) } static void -nfsd4_recdir_purge_old(struct net *net, time_t boot_time) +nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) { int status; - if (!rec_file) + nn->in_grace = false; + if (!nn->rec_file) return; - status = mnt_want_write_file(rec_file); + status = mnt_want_write_file(nn->rec_file); if (status) goto out; - status = nfsd4_list_rec_dir(purge_old); + status = nfsd4_list_rec_dir(purge_old, nn); if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + vfs_fsync(nn->rec_file, 0); + mnt_drop_write_file(nn->rec_file); out: + nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); } static int -load_recdir(struct dentry *parent, struct dentry *child) +load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { printk("nfsd4: illegal name %s in recovery directory\n", @@ -359,21 +418,22 @@ load_recdir(struct dentry *parent, struct dentry *child) /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name); + nfs4_client_to_reclaim(child->d_name.name, nn); return 0; } static int -nfsd4_recdir_load(void) { +nfsd4_recdir_load(struct net *net) { int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (!rec_file) + if (!nn->rec_file) return 0; - status = nfsd4_list_rec_dir(load_recdir); + status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); return status; } @@ -382,15 +442,16 @@ nfsd4_recdir_load(void) { */ static int -nfsd4_init_recdir(void) +nfsd4_init_recdir(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct cred *original_cred; int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", user_recovery_dirname); - BUG_ON(rec_file); + BUG_ON(nn->rec_file); status = nfs4_save_creds(&original_cred); if (status < 0) { @@ -400,23 +461,65 @@ nfsd4_init_recdir(void) return status; } - rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); - if (IS_ERR(rec_file)) { + nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); + if (IS_ERR(nn->rec_file)) { printk("NFSD: unable to find recovery directory %s\n", user_recovery_dirname); - status = PTR_ERR(rec_file); - rec_file = NULL; + status = PTR_ERR(nn->rec_file); + nn->rec_file = NULL; } nfs4_reset_creds(original_cred); + if (!status) + nn->in_grace = true; return status; } + +static int +nfs4_legacy_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + + return 0; +} + +static void +nfs4_legacy_state_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + kfree(nn->reclaim_str_hashtbl); +} + static int nfsd4_load_reboot_recovery_data(struct net *net) { int status; + status = nfsd4_init_recdir(net); + if (!status) + status = nfsd4_recdir_load(net); + if (status) + printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; +} + +static int +nfsd4_legacy_tracking_init(struct net *net) +{ + int status; + /* XXX: The legacy code won't work in a container */ if (net != &init_net) { WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " @@ -424,30 +527,37 @@ nfsd4_load_reboot_recovery_data(struct net *net) return -EINVAL; } - nfs4_lock_state(); - status = nfsd4_init_recdir(); - if (!status) - status = nfsd4_recdir_load(); - nfs4_unlock_state(); + status = nfs4_legacy_state_init(net); if (status) - printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; + + status = nfsd4_load_reboot_recovery_data(net); + if (status) + goto err; + return 0; + +err: + nfs4_legacy_state_shutdown(net); return status; } static void -nfsd4_shutdown_recdir(void) +nfsd4_shutdown_recdir(struct nfsd_net *nn) { - if (!rec_file) + if (!nn->rec_file) return; - fput(rec_file); - rec_file = NULL; + fput(nn->rec_file); + nn->rec_file = NULL; } static void nfsd4_legacy_tracking_exit(struct net *net) { - nfs4_release_reclaim(); - nfsd4_shutdown_recdir(); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_shutdown_recdir(nn); + nfs4_legacy_state_shutdown(net); } /* @@ -480,13 +590,26 @@ nfs4_recoverydir(void) static int nfsd4_check_legacy_client(struct nfs4_client *clp) { + int status; + char dname[HEXDIR_LEN]; + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return 0; + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) { + legacy_recdir_name_error(status); + return status; + } + /* look for it in the reclaim hashtable otherwise */ - if (nfsd4_find_reclaim_client(clp)) { + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) { set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp->cr_clp = clp; return 0; } @@ -494,7 +617,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp) } static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { - .init = nfsd4_load_reboot_recovery_data, + .init = nfsd4_legacy_tracking_init, .exit = nfsd4_legacy_tracking_exit, .create = nfsd4_create_clid_dir, .remove = nfsd4_remove_clid_dir, @@ -785,8 +908,7 @@ nfsd4_cld_create(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already stored */ @@ -823,8 +945,7 @@ nfsd4_cld_remove(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if it's already removed */ @@ -861,8 +982,7 @@ nfsd4_cld_check(struct nfs4_client *clp) { int ret; struct cld_upcall *cup; - /* FIXME: determine net from clp */ - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; /* Don't upcall if one was already stored during this grace pd */ @@ -892,11 +1012,10 @@ nfsd4_cld_check(struct nfs4_client *clp) } static void -nfsd4_cld_grace_done(struct net *net, time_t boot_time) +nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) { int ret; struct cld_upcall *cup; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct cld_net *cn = nn->cld_net; cup = alloc_cld_upcall(cn); @@ -926,28 +1045,261 @@ static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { .grace_done = nfsd4_cld_grace_done, }; +/* upcall via usermodehelper */ +static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; +module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), + S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program"); + +static bool cltrack_legacy_disable; +module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_legacy_disable, + "Disable legacy recoverydir conversion. Default: false"); + +#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" +#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" + +static char * +nfsd4_cltrack_legacy_topdir(void) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + len = strlen(LEGACY_TOPDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s", + nfs4_recoverydir()); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + +static char * +nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + /* +1 is for '/' between "topdir" and "recdir" */ + len = strlen(LEGACY_RECDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/", + nfs4_recoverydir()); + if (copied > (len - HEXDIR_LEN)) { + /* just return nothing if output will be truncated */ + kfree(result); + return NULL; + } + + copied = nfs4_make_rec_clidname(result + copied, name); + if (copied) { + kfree(result); + return NULL; + } + + return result; +} + +static int +nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) +{ + char *envp[2]; + char *argv[4]; + int ret; + + if (unlikely(!cltrack_prog[0])) { + dprintk("%s: cltrack_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s: cmd: %s\n", __func__, cmd); + dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); + dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); + + envp[0] = legacy; + envp[1] = NULL; + + argv[0] = (char *)cltrack_prog; + argv[1] = cmd; + argv[2] = arg; + argv[3] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or EACCES + * error. The admin can re-enable it on the fly by using sysfs + * once the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + dprintk("NFSD: %s was not found or isn't executable (%d). " + "Setting cltrack_prog to blank string!", + cltrack_prog, ret); + cltrack_prog[0] = '\0'; + } + dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret); + + return ret; +} + +static char * +bin_to_hex_dup(const unsigned char *src, int srclen) +{ + int i; + char *buf, *hex; + + /* +1 for terminating NULL */ + buf = kmalloc((srclen * 2) + 1, GFP_KERNEL); + if (!buf) + return buf; + + hex = buf; + for (i = 0; i < srclen; i++) { + sprintf(hex, "%2.2x", *src++); + hex += 2; + } + return buf; +} + +static int +nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) +{ + return nfsd4_umh_cltrack_upcall("init", NULL, NULL); +} + +static void +nfsd4_umh_cltrack_create(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("create", hexid, NULL); + kfree(hexid); +} + +static void +nfsd4_umh_cltrack_remove(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("remove", hexid, NULL); + kfree(hexid); +} + +static int +nfsd4_umh_cltrack_check(struct nfs4_client *clp) +{ + int ret; + char *hexid, *legacy; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return -ENOMEM; + } + legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); + ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); + kfree(legacy); + kfree(hexid); + return ret; +} + +static void +nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, + time_t boot_time) +{ + char *legacy; + char timestr[22]; /* FIXME: better way to determine max size? */ + + sprintf(timestr, "%ld", boot_time); + legacy = nfsd4_cltrack_legacy_topdir(); + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); + kfree(legacy); +} + +static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { + .init = nfsd4_umh_cltrack_init, + .exit = NULL, + .create = nfsd4_umh_cltrack_create, + .remove = nfsd4_umh_cltrack_remove, + .check = nfsd4_umh_cltrack_check, + .grace_done = nfsd4_umh_cltrack_grace_done, +}; + int nfsd4_client_tracking_init(struct net *net) { int status; struct path path; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (!client_tracking_ops) { - client_tracking_ops = &nfsd4_cld_tracking_ops; - status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); - if (!status) { - if (S_ISDIR(path.dentry->d_inode->i_mode)) - client_tracking_ops = - &nfsd4_legacy_tracking_ops; - path_put(&path); - } + /* just run the init if it the method is already decided */ + if (nn->client_tracking_ops) + goto do_init; + + /* + * First, try a UMH upcall. It should succeed or fail quickly, so + * there's little harm in trying that first. + */ + nn->client_tracking_ops = &nfsd4_umh_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + + /* + * See if the recoverydir exists and is a directory. If it is, + * then use the legacy ops. + */ + nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); + if (!status) { + status = S_ISDIR(path.dentry->d_inode->i_mode); + path_put(&path); + if (status) + goto do_init; } - status = client_tracking_ops->init(net); + /* Finally, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " + "removed in 3.10. Please transition to using " + "nfsdcltrack.\n"); +do_init: + status = nn->client_tracking_ops->init(net); if (status) { printk(KERN_WARNING "NFSD: Unable to initialize client " "recovery tracking! (%d)\n", status); - client_tracking_ops = NULL; + nn->client_tracking_ops = NULL; } return status; } @@ -955,40 +1307,49 @@ nfsd4_client_tracking_init(struct net *net) void nfsd4_client_tracking_exit(struct net *net) { - if (client_tracking_ops) { - client_tracking_ops->exit(net); - client_tracking_ops = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->client_tracking_ops) { + if (nn->client_tracking_ops->exit) + nn->client_tracking_ops->exit(net); + nn->client_tracking_ops = NULL; } } void nfsd4_client_record_create(struct nfs4_client *clp) { - if (client_tracking_ops) - client_tracking_ops->create(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->create(clp); } void nfsd4_client_record_remove(struct nfs4_client *clp) { - if (client_tracking_ops) - client_tracking_ops->remove(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->remove(clp); } int nfsd4_client_record_check(struct nfs4_client *clp) { - if (client_tracking_ops) - return client_tracking_ops->check(clp); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + return nn->client_tracking_ops->check(clp); return -EOPNOTSUPP; } void -nfsd4_record_grace_done(struct net *net, time_t boot_time) +nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) { - if (client_tracking_ops) - client_tracking_ops->grace_done(net, boot_time); + if (nn->client_tracking_ops) + nn->client_tracking_ops->grace_done(nn, boot_time); } static int diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d0237f872cc..ac8ed96c419 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,16 +44,11 @@ #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" -#include "fault_inject.h" #include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -/* Globals */ -time_t nfsd4_lease = 90; /* default lease time */ -time_t nfsd4_grace = 90; - #define all_ones {{~0,~0},~0} static const stateid_t one_stateid = { .si_generation = ~0, @@ -176,8 +171,6 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; - /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) @@ -192,7 +185,7 @@ static struct list_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { - BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); atomic_inc(&fp->fi_access[oflag]); } @@ -251,7 +244,7 @@ static inline int get_new_stid(struct nfs4_stid *stid) * preallocations that can exist at a time, but the state lock * prevents anyone from using ours before we get here: */ - BUG_ON(error); + WARN_ON_ONCE(error); /* * It shouldn't be a problem to reuse an opaque stateid value. * I don't think it is for 4.1. But with 4.0 I worry that, for @@ -340,7 +333,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&dp->dl_recall); return dp; } @@ -390,14 +383,6 @@ unhash_delegation(struct nfs4_delegation *dp) * SETCLIENTID state */ -/* client_lock protects the client lru list and session hash table */ -static DEFINE_SPINLOCK(client_lock); - -/* Hash tables for nfs4_clientid state */ -#define CLIENT_HASH_BITS 4 -#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) -#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) - static unsigned int clientid_hashval(u32 id) { return id & CLIENT_HASH_MASK; @@ -408,31 +393,6 @@ static unsigned int clientstr_hashval(const char *name) return opaque_hashval(name, 8) & CLIENT_HASH_MASK; } -/* - * reclaim_str_hashtbl[] holds known client info from previous reset/reboot - * used in reboot/reset lease grace period processing - * - * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed - * setclientid_confirmed info. - * - * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed - * setclientid info. - * - * client_lru holds client queue ordered by nfs4_client.cl_time - * for lease renewal. - * - * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time - * for last close replay. - */ -static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; -static int reclaim_str_hashtbl_size = 0; -static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head client_lru; -static struct list_head close_lru; - /* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not @@ -526,7 +486,8 @@ static int nfs4_access_to_omode(u32 access) case NFS4_SHARE_ACCESS_BOTH: return O_RDWR; } - BUG(); + WARN_ON_ONCE(1); + return O_RDONLY; } /* release all access and file references for a given stateid */ @@ -652,9 +613,6 @@ static void release_openowner(struct nfs4_openowner *oo) nfs4_free_openowner(oo); } -#define SESSION_HASH_SIZE 512 -static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; - static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { @@ -785,9 +743,12 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) +static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, + struct nfsd4_channel_attrs *req, + int numslots, int slotsize, + struct nfsd_net *nn) { - u32 maxrpc = nfsd_serv->sv_max_mesg; + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; new->maxreqs = numslots; new->maxresp_cached = min_t(u32, req->maxresp_cached, @@ -906,21 +867,27 @@ static void __free_session(struct nfsd4_session *ses) static void free_session(struct kref *kref) { struct nfsd4_session *ses; + struct nfsd_net *nn; - lockdep_assert_held(&client_lock); ses = container_of(kref, struct nfsd4_session, se_ref); + nn = net_generic(ses->se_client->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); __free_session(ses); } void nfsd4_put_session(struct nfsd4_session *ses) { - spin_lock(&client_lock); + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); + + spin_lock(&nn->client_lock); nfsd4_put_session_locked(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } -static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, + struct nfsd_net *nn) { struct nfsd4_session *new; int numslots, slotsize; @@ -941,13 +908,14 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan) nfsd4_put_drc_mem(slotsize, fchan->maxreqs); return NULL; } - init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); + init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); return new; } -static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) +static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new->se_client = clp; gen_sessionid(new); @@ -957,14 +925,15 @@ static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_s new->se_cb_seq_nr = 1; new->se_flags = cses->flags; new->se_cb_prog = cses->callback_prog; + new->se_cb_sec = cses->cb_sec; kref_init(&new->se_ref); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&client_lock); - list_add(&new->se_hash, &sessionid_hashtbl[idx]); + spin_lock(&nn->client_lock); + list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); @@ -978,20 +947,20 @@ static struct nfsd4_session *init_session(struct svc_rqst *rqstp, struct nfsd4_s rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); } - return new; } /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ - list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { + list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; @@ -1016,6 +985,8 @@ unhash_session(struct nfsd4_session *ses) static inline void renew_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + if (is_client_expired(clp)) { WARN_ON(1); printk("%s: client (clientid %08x/%08x) already expired\n", @@ -1028,16 +999,18 @@ renew_client_locked(struct nfs4_client *clp) dprintk("renewing client (clientid %08x/%08x)\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &client_lru); + list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = get_seconds(); } static inline void renew_client(struct nfs4_client *clp) { - spin_lock(&client_lock); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); renew_client_locked(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -1075,7 +1048,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - lockdep_assert_held(&client_lock); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -1092,15 +1067,16 @@ void release_session_client(struct nfsd4_session *session) { struct nfs4_client *clp = session->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock)) + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) return; if (is_client_expired(clp)) { free_client(clp); session->se_client = NULL; } else renew_client_locked(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } /* must be called under the client_lock */ @@ -1123,6 +1099,7 @@ destroy_client(struct nfs4_client *clp) struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); @@ -1144,12 +1121,15 @@ destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); - list_del(&clp->cl_strhash); - spin_lock(&client_lock); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + spin_lock(&nn->client_lock); unhash_client_locked(clp); if (atomic_read(&clp->cl_refcount) == 0) free_client(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); } static void expire_client(struct nfs4_client *clp) @@ -1187,6 +1167,17 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) return 0; } +static long long +compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) +{ + long long res; + + res = o1->len - o2->len; + if (res) + return res; + return (long long)memcmp(o1->data, o2->data, o1->len); +} + static int same_name(const char *n1, const char *n2) { return 0 == memcmp(n1, n2, HEXDIR_LEN); @@ -1247,10 +1238,9 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } -static void gen_clid(struct nfs4_client *clp) +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); clp->cl_clientid.cl_boot = nn->boot_time; clp->cl_clientid.cl_id = current_clientid++; @@ -1283,12 +1273,14 @@ static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t return NULL; } -static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, +static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); int ret; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -1297,23 +1289,21 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { - spin_lock(&client_lock); + spin_lock(&nn->client_lock); free_client(clp); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); return NULL; } idr_init(&clp->cl_stateids); - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_refcount, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); INIT_LIST_HEAD(&clp->cl_callbacks); spin_lock_init(&clp->cl_lock); - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); @@ -1321,17 +1311,60 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); clp->cl_cb_session = NULL; + clp->net = net; return clp; } static void -add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct nfs4_client *clp; + + while (*new) { + clp = rb_entry(*new, struct nfs4_client, cl_namenode); + parent = *new; + + if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&new_clp->cl_namenode, parent, new); + rb_insert_color(&new_clp->cl_namenode, root); +} + +static struct nfs4_client * +find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) +{ + long long cmp; + struct rb_node *node = root->rb_node; + struct nfs4_client *clp; + + while (node) { + clp = rb_entry(node, struct nfs4_client, cl_namenode); + cmp = compare_blob(&clp->cl_name, name); + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + return clp; + } + return NULL; +} + +static void +add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); - list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); + list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); renew_client(clp); } @@ -1339,22 +1372,23 @@ static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); - unsigned int strhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); - list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); - strhashval = clientstr_hashval(clp->cl_recdir); - list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + add_clp_to_name_tree(clp, &nn->conf_name_tree); + set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); renew_client(clp); } static struct nfs4_client * -find_confirmed_client(clientid_t *clid, bool sessions) +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1366,12 +1400,12 @@ find_confirmed_client(clientid_t *clid, bool sessions) } static struct nfs4_client * -find_unconfirmed_client(clientid_t *clid, bool sessions) +find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; @@ -1387,27 +1421,15 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) } static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval) +find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->unconf_name_tree); } static void @@ -1428,7 +1450,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); @@ -1572,12 +1594,11 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, { struct nfs4_client *unconf, *conf, *new; __be32 status; - unsigned int strhashval; - char dname[HEXDIR_LEN]; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " @@ -1592,24 +1613,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, switch (exid->spa_how) { case SP4_NONE: break; + default: /* checked by xdr code */ + WARN_ON_ONCE(1); case SP4_SSV: - return nfserr_serverfault; - default: - BUG(); /* checked by xdr code */ case SP4_MACH_CRED: return nfserr_serverfault; /* no excuse :-/ */ } - status = nfs4_make_rec_clidname(dname, &exid->clname); - - if (status) - return status; - - strhashval = clientstr_hashval(dname); - /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); bool verfs_match = same_verf(&verf, &conf->cl_verifier); @@ -1654,21 +1667,21 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, goto out; } - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ expire_client(unconf); /* case 1 (normal case) */ out_new: - new = create_client(exid->clname, dname, rqstp, &verf); + new = create_client(exid->clname, rqstp, &verf); if (new == NULL) { status = nfserr_jukebox; goto out; } new->cl_minorversion = 1; - gen_clid(new); - add_to_unconfirmed(new, strhashval); + gen_clid(new, nn); + add_to_unconfirmed(new); out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; @@ -1761,12 +1774,13 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; if (check_forechannel_attrs(cr_ses->fore_channel)) return nfserr_toosmall; - new = alloc_session(&cr_ses->fore_channel); + new = alloc_session(&cr_ses->fore_channel, nn); if (!new) return nfserr_jukebox; status = nfserr_jukebox; @@ -1775,8 +1789,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_session; nfs4_lock_state(); - unconf = find_unconfirmed_client(&cr_ses->clientid, true); - conf = find_confirmed_client(&cr_ses->clientid, true); + unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); + conf = find_confirmed_client(&cr_ses->clientid, true, nn); if (conf) { cs_slot = &conf->cl_cs_slot; @@ -1789,7 +1803,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - unsigned int hash; struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { @@ -1803,8 +1816,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_seq_misordered; goto out_free_conn; } - hash = clientstr_hashval(unconf->cl_recdir); - old = find_confirmed_client_by_str(unconf->cl_recdir, hash); + old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) expire_client(old); move_to_confirmed(unconf); @@ -1843,14 +1855,6 @@ out_free_session: goto out; } -static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) -{ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct nfsd4_compoundargs *argp = rqstp->rq_argp; - - return argp->opcnt == resp->opcnt; -} - static __be32 nfsd4_map_bcts_dir(u32 *dir) { switch (*dir) { @@ -1865,24 +1869,40 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) return nfserr_inval; } +__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc) +{ + struct nfsd4_session *session = cstate->session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + spin_lock(&nn->client_lock); + session->se_cb_prog = bc->bc_cb_program; + session->se_cb_sec = bc->bc_cb_sec; + spin_unlock(&nn->client_lock); + + nfsd4_probe_callback(session->se_client); + + return nfs_ok; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_bind_conn_to_session *bcts) { __be32 status; struct nfsd4_conn *conn; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - spin_lock(&client_lock); - cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); + spin_lock(&nn->client_lock); + cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); /* Sorta weird: we only need the refcnt'ing because new_conn acquires * client_lock iself: */ if (cstate->session) { nfsd4_get_session(cstate->session); atomic_inc(&cstate->session->se_client->cl_refcount); } - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); if (!cstate->session) return nfserr_badsession; @@ -1910,6 +1930,7 @@ nfsd4_destroy_session(struct svc_rqst *r, { struct nfsd4_session *ses; __be32 status = nfserr_badsession; + struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); /* Notes: * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid @@ -1923,24 +1944,24 @@ nfsd4_destroy_session(struct svc_rqst *r, return nfserr_not_only_op; } dump_sessionid(__func__, &sessionid->sessionid); - spin_lock(&client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid); + spin_lock(&nn->client_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); if (!ses) { - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); goto out; } unhash_session(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); nfs4_lock_state(); nfsd4_probe_callback_sync(ses->se_client); nfs4_unlock_state(); - spin_lock(&client_lock); + spin_lock(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_session_locked(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); status = nfs_ok; out: dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -2006,6 +2027,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_slot *slot; struct nfsd4_conn *conn; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2018,9 +2040,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!conn) return nfserr_jukebox; - spin_lock(&client_lock); + spin_lock(&nn->client_lock); status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid); + session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); if (!session) goto out; @@ -2094,7 +2116,7 @@ out: } } kfree(conn); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); dprintk("%s: return %d\n", __func__, ntohl(status)); return status; } @@ -2104,10 +2126,11 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta { struct nfs4_client *conf, *unconf, *clp; __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); - unconf = find_unconfirmed_client(&dc->clientid, true); - conf = find_confirmed_client(&dc->clientid, true); + unconf = find_unconfirmed_client(&dc->clientid, true, nn); + conf = find_confirmed_client(&dc->clientid, true, nn); if (conf) { clp = conf; @@ -2181,20 +2204,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; - char dname[HEXDIR_LEN]; - - status = nfs4_make_rec_clidname(dname, &clname); - if (status) - return status; - - strhashval = clientstr_hashval(dname); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&clname, nn); if (conf) { /* case 0: */ status = nfserr_clid_inuse; @@ -2209,21 +2225,21 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } } - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) expire_client(unconf); status = nfserr_jukebox; - new = create_client(clname, dname, rqstp, &clverifier); + new = create_client(clname, rqstp, &clverifier); if (new == NULL) goto out; if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); else /* case 4 (new client) or cases 2, 3 (client reboot): */ - gen_clid(new); + gen_clid(new, nn); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); - add_to_unconfirmed(new, strhashval); + add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); @@ -2243,14 +2259,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; nfs4_lock_state(); - conf = find_confirmed_client(clid, false); - unconf = find_unconfirmed_client(clid, false); + conf = find_confirmed_client(clid, false, nn); + unconf = find_unconfirmed_client(clid, false, nn); /* * We try hard to give out unique clientid's, so if we get an * attempt to confirm the same clientid with a different cred, @@ -2276,9 +2292,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, nfsd4_probe_callback(conf); expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ - unsigned int hash = clientstr_hashval(unconf->cl_recdir); - - conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); + conf = find_confirmed_client_by_name(&unconf->cl_name, nn); if (conf) expire_client(conf); move_to_confirmed(unconf); @@ -2340,7 +2354,7 @@ nfsd4_init_slabs(void) if (openowner_slab == NULL) goto out_nomem; lockowner_slab = kmem_cache_create("nfsd4_lockowners", - sizeof(struct nfs4_openowner), 0, 0, NULL); + sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) goto out_nomem; file_slab = kmem_cache_create("nfsd4_files", @@ -2404,7 +2418,9 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2444,11 +2460,13 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, } static void -move_to_close_lru(struct nfs4_openowner *oo) +move_to_close_lru(struct nfs4_openowner *oo, struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); - list_move_tail(&oo->oo_close_lru, &close_lru); + list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); } @@ -2462,13 +2480,14 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, } static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, bool sessions) +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) { struct nfs4_stateowner *so; struct nfs4_openowner *oo; struct nfs4_client *clp; - list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (!so->so_is_open_owner) continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { @@ -2555,9 +2574,14 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_delegation *dp; - BUG_ON(!fp); - /* We assume break_lease is only called once per lease: */ - BUG_ON(fp->fi_had_conflict); + if (!fp) { + WARN(1, "(%p)->fl_owner NULL\n", fl); + return; + } + if (fp->fi_had_conflict) { + WARN(1, "duplicate break on %p\n", fp); + return; + } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -2599,14 +2623,13 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, - struct nfsd4_open *open) + struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; struct nfs4_openowner *oo = NULL; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (STALE_CLIENTID(&open->op_clientid, nn)) return nfserr_stale_clientid; @@ -2619,10 +2642,11 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, return nfserr_jukebox; strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open, cstate->minorversion); + oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid, cstate->minorversion); + clp = find_confirmed_client(clientid, cstate->minorversion, + nn); if (clp == NULL) return nfserr_expired; goto new_owner; @@ -2891,7 +2915,7 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) open->op_why_no_deleg = WND4_CANCELLED; break; case NFS4_SHARE_WANT_NO_DELEG: - BUG(); /* not supposed to get here */ + WARN_ON_ONCE(1); } } } @@ -2959,6 +2983,7 @@ out: } return; out_free: + unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: flag = NFS4_OPEN_DELEGATE_NONE; @@ -3104,27 +3129,32 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) free_generic_stateid(open->op_stp); } +static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) +{ + struct nfs4_client *found; + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + found = find_confirmed_client(clid, session, nn); + if (clp) + *clp = found; + return found ? nfs_ok : nfserr_expired; +} + __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) { struct nfs4_client *clp; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid, nn)) + status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + if (status) goto out; - clp = find_confirmed_client(clid, cstate->minorversion); - status = nfserr_expired; - if (clp == NULL) { - /* We assume the client took too long to RENEW. */ - dprintk("nfsd4_renew: clientid not found!\n"); - goto out; - } status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) @@ -3136,44 +3166,42 @@ out: } static void -nfsd4_end_grace(struct net *net) +nfsd4_end_grace(struct nfsd_net *nn) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - /* do nothing if grace period already ended */ if (nn->grace_ended) return; dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; - nfsd4_record_grace_done(net, nn->boot_time); + nfsd4_record_grace_done(nn, nn->boot_time); locks_end_grace(&nn->nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and * to see the (possibly new, possibly shorter) lease time, we * can safely set the next grace time to the current lease time: */ - nfsd4_grace = nfsd4_lease; + nn->nfsd4_grace = nn->nfsd4_lease; } static time_t -nfs4_laundromat(void) +nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nfsd4_lease; - time_t t, clientid_val = nfsd4_lease; - time_t u, test_val = nfsd4_lease; + time_t cutoff = get_seconds() - nn->nfsd4_lease; + time_t t, clientid_val = nn->nfsd4_lease; + time_t u, test_val = nn->nfsd4_lease; nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - nfsd4_end_grace(&init_net); + nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); - spin_lock(&client_lock); - list_for_each_safe(pos, next, &client_lru) { + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; @@ -3189,7 +3217,7 @@ nfs4_laundromat(void) unhash_client_locked(clp); list_add(&clp->cl_lru, &reaplist); } - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", @@ -3199,6 +3227,8 @@ nfs4_laundromat(void) spin_lock(&recall_lock); list_for_each_safe(pos, next, &del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) + continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { u = dp->dl_time - cutoff; if (test_val > u) @@ -3212,8 +3242,8 @@ nfs4_laundromat(void) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); unhash_delegation(dp); } - test_val = nfsd4_lease; - list_for_each_safe(pos, next, &close_lru) { + test_val = nn->nfsd4_lease; + list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { u = oo->oo_time - cutoff; @@ -3231,16 +3261,19 @@ nfs4_laundromat(void) static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); static void -laundromat_main(struct work_struct *not_used) +laundromat_main(struct work_struct *laundry) { time_t t; + struct delayed_work *dwork = container_of(laundry, struct delayed_work, + work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + laundromat_work); - t = nfs4_laundromat(); + t = nfs4_laundromat(nn); dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); - queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); + queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) @@ -3385,16 +3418,17 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) return nfs_ok; } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, bool sessions) +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, bool sessions, + struct nfsd_net *nn) { struct nfs4_client *cl; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; if (STALE_STATEID(stateid, nn)) return nfserr_stale_stateid; - cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions); + cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn); if (!cl) return nfserr_expired; *s = find_stateid_by_type(cl, stateid, typemask); @@ -3416,6 +3450,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp = NULL; struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); __be32 status; if (filpp) @@ -3427,7 +3462,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, cstate->minorversion, nn); if (status) return status; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); @@ -3441,7 +3477,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, goto out; if (filpp) { *filpp = dp->dl_file->fi_deleg_file; - BUG_ON(!*filpp); + if (!*filpp) { + WARN_ON_ONCE(1); + status = nfserr_serverfault; + goto out; + } } break; case NFS4_OPEN_STID: @@ -3568,7 +3608,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, char typemask, - struct nfs4_ol_stateid **stpp) + struct nfs4_ol_stateid **stpp, + struct nfsd_net *nn) { __be32 status; struct nfs4_stid *s; @@ -3577,7 +3618,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, typemask, &s, + cstate->minorversion, nn); if (status) return status; *stpp = openlockstateid(s); @@ -3586,13 +3628,14 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); } -static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_openowner *oo; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp); + NFS4_OPEN_STID, stpp, nn); if (status) return status; oo = openowner((*stpp)->st_stateowner); @@ -3608,6 +3651,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3621,7 +3665,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, - NFS4_OPEN_STID, &stp); + NFS4_OPEN_STID, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -3664,7 +3708,7 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac case NFS4_SHARE_ACCESS_BOTH: break; default: - BUG(); + WARN_ON_ONCE(1); } } @@ -3685,6 +3729,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, { __be32 status; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3697,7 +3742,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, - &od->od_stateid, &stp); + &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; @@ -3760,6 +3805,8 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_close on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3769,7 +3816,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, - &stp); + &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -3791,7 +3838,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * little while to handle CLOSE replay. */ if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo); + move_to_close_lru(oo, SVC_NET(rqstp)); } } out: @@ -3807,15 +3854,15 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; struct nfs4_stid *s; - struct inode *inode; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - inode = cstate->current_fh.fh_dentry->d_inode; nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, cstate->minorversion); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, + cstate->minorversion, nn); if (status) goto out; dp = delegstateid(s); @@ -3833,8 +3880,6 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) static inline u64 @@ -3852,7 +3897,7 @@ last_byte_offset(u64 start, u64 len) { u64 end; - BUG_ON(!len); + WARN_ON_ONCE(!len); end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64; } @@ -3864,8 +3909,6 @@ static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct & LOCKOWNER_INO_HASH_MASK; } -static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3931,12 +3974,12 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner) + struct xdr_netobj *owner, struct nfsd_net *nn) { unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { if (same_lockowner_ino(lo, inode, clid, owner)) return lo; } @@ -3948,9 +3991,10 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s struct inode *inode = open_stp->st_file->fi_inode; unsigned int inohash = lockowner_ino_hashval(inode, clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); + list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -4024,8 +4068,10 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s struct nfs4_client *cl = oo->oo_owner.so_client; struct nfs4_lockowner *lo; unsigned int strhashval; + struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, + &lock->v.new.owner, nn); if (lo) { if (!cstate->minorversion) return nfserr_bad_seqid; @@ -4065,7 +4111,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool new_state = false; int lkflg; int err; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, @@ -4099,7 +4146,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - &open_stp); + &open_stp, nn); if (status) goto out; open_sop = openowner(open_stp->st_stateowner); @@ -4113,7 +4160,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, - NFS4_LOCK_STID, &lock_stp); + NFS4_LOCK_STID, &lock_stp, nn); if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); @@ -4124,10 +4171,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfserr_grace; - if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim) + if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim) + if (!locks_in_grace(net) && lock->lk_reclaim) goto out; file_lock = locks_alloc_lock(); @@ -4238,7 +4285,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo; __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; @@ -4248,9 +4295,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn)) - goto out; + if (!nfsd4_has_session(cstate)) { + status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + if (status) + goto out; + } if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; @@ -4278,7 +4327,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -4313,7 +4362,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock *file_lock = NULL; __be32 status; int err; - + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); @@ -4324,7 +4374,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, - &locku->lu_stateid, NFS4_LOCK_STID, &stp); + &locku->lu_stateid, NFS4_LOCK_STID, + &stp, nn); if (status) goto out; filp = find_any_file(stp->st_file); @@ -4414,23 +4465,21 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct list_head matches; unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - /* XXX check for lease expiration */ - - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid, nn)) - return status; - nfs4_lock_state(); + status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + if (status) + goto out; + status = nfserr_locks_held; INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (sop->so_is_open_owner) continue; if (!same_owner_str(sop, owner, clid)) @@ -4466,73 +4515,74 @@ alloc_reclaim(void) return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } -int -nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) +bool +nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) { - unsigned int strhashval = clientstr_hashval(name); - struct nfs4_client *clp; + struct nfs4_client_reclaim *crp; - clp = find_confirmed_client_by_str(name, strhashval); - if (!clp) - return 0; - return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp = nfsd4_find_reclaim_client(name, nn); + return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... */ -int -nfs4_client_to_reclaim(const char *name) +struct nfs4_client_reclaim * +nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client_reclaim *crp = NULL; + struct nfs4_client_reclaim *crp; dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); crp = alloc_reclaim(); - if (!crp) - return 0; - strhashval = clientstr_hashval(name); - INIT_LIST_HEAD(&crp->cr_strhash); - list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); - reclaim_str_hashtbl_size++; - return 1; + if (crp) { + strhashval = clientstr_hashval(name); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_clp = NULL; + nn->reclaim_str_hashtbl_size++; + } + return crp; } void -nfs4_release_reclaim(void) +nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) +{ + list_del(&crp->cr_strhash); + kfree(crp); + nn->reclaim_str_hashtbl_size--; +} + +void +nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&reclaim_str_hashtbl[i])) { - crp = list_entry(reclaim_str_hashtbl[i].next, + while (!list_empty(&nn->reclaim_str_hashtbl[i])) { + crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); - list_del(&crp->cr_strhash); - kfree(crp); - reclaim_str_hashtbl_size--; + nfs4_remove_reclaim_record(crp, nn); } } - BUG_ON(reclaim_str_hashtbl_size); + WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ struct nfs4_client_reclaim * -nfsd4_find_reclaim_client(struct nfs4_client *clp) +nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", - clp->cl_name.len, clp->cl_name.data, - clp->cl_recdir); + dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); - /* find clp->cl_name in reclaim_str_hashtbl */ - strhashval = clientstr_hashval(clp->cl_recdir); - list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, clp->cl_recdir)) { + strhashval = clientstr_hashval(recdir); + list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { + if (same_name(crp->cr_recdir, recdir)) { return crp; } } @@ -4543,12 +4593,12 @@ nfsd4_find_reclaim_client(struct nfs4_client *clp) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid, bool sessions) +nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct nfs4_client *clp; /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid, sessions); + clp = find_confirmed_client(clid, sessions, nn); if (clp == NULL) return nfserr_reclaim_bad; @@ -4557,83 +4607,41 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions) #ifdef CONFIG_NFSD_FAULT_INJECTION -void nfsd_forget_clients(u64 num) +u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { - struct nfs4_client *clp, *next; - int count = 0; - - nfs4_lock_state(); - list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { - expire_client(clp); - if (++count == num) - break; - } - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d clients", count); + expire_client(clp); + return 1; } -static void release_lockowner_sop(struct nfs4_stateowner *sop) +u64 nfsd_print_client(struct nfs4_client *clp, u64 num) { - release_lockowner(lockowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s\n", buf); + return 1; } -static void release_openowner_sop(struct nfs4_stateowner *sop) +static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, + const char *type) { - release_openowner(openowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static int nfsd_release_n_owners(u64 num, bool is_open_owner, - void (*release_sop)(struct nfs4_stateowner *)) +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) { - int i, count = 0; - struct nfs4_stateowner *sop, *next; + struct nfs4_openowner *oop; + struct nfs4_lockowner *lop, *lo_next; + struct nfs4_ol_stateid *stp, *st_next; + u64 count = 0; - for (i = 0; i < OWNER_HASH_SIZE; i++) { - list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { - if (sop->so_is_open_owner != is_open_owner) - continue; - release_sop(sop); - if (++count == num) - return count; - } - } - return count; -} - -void nfsd_forget_locks(u64 num) -{ - int count; - - nfs4_lock_state(); - count = nfsd_release_n_owners(num, false, release_lockowner_sop); - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d locks", count); -} - -void nfsd_forget_openowners(u64 num) -{ - int count; - - nfs4_lock_state(); - count = nfsd_release_n_owners(num, true, release_openowner_sop); - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d open owners", count); -} - -static int nfsd_process_n_delegations(u64 num, struct list_head *list) -{ - int i, count = 0; - struct nfs4_file *fp, *fnext; - struct nfs4_delegation *dp, *dnext; - - for (i = 0; i < FILE_HASH_SIZE; i++) { - list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { - list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { - list_move(&dp->dl_recall_lru, list); - if (++count == num) + list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { + list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { + if (func) + func(lop); + if (++count == max) return count; } } @@ -4642,39 +4650,134 @@ static int nfsd_process_n_delegations(u64 num, struct list_head *list) return count; } -void nfsd_forget_delegations(u64 num) +u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) { - unsigned int count; - LIST_HEAD(victims); - struct nfs4_delegation *dp, *dnext; - - spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, &victims); - spin_unlock(&recall_lock); - - nfs4_lock_state(); - list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) - unhash_delegation(dp); - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d delegations", count); + return nfsd_foreach_client_lock(clp, max, release_lockowner); } -void nfsd_recall_delegations(u64 num) +u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) { - unsigned int count; + u64 count = nfsd_foreach_client_lock(clp, max, NULL); + nfsd_print_count(clp, count, "locked files"); + return count; +} + +static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) +{ + struct nfs4_openowner *oop, *next; + u64 count = 0; + + list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { + if (func) + func(oop); + if (++count == max) + break; + } + + return count; +} + +u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) +{ + return nfsd_foreach_client_open(clp, max, release_openowner); +} + +u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_open(clp, max, NULL); + nfsd_print_count(clp, count, "open files"); + return count; +} + +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) +{ + struct nfs4_delegation *dp, *next; + u64 count = 0; + + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) + list_move(&dp->dl_recall_lru, victims); + if (++count == max) + break; + } + return count; +} + +u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) +{ + struct nfs4_delegation *dp, *next; LIST_HEAD(victims); - struct nfs4_delegation *dp, *dnext; + u64 count; spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, &victims); - list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) { - list_del(&dp->dl_recall_lru); - nfsd_break_one_deleg(dp); - } + count = nfsd_find_all_delegations(clp, max, &victims); spin_unlock(&recall_lock); - printk(KERN_INFO "NFSD: Recalled %d delegations", count); + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + unhash_delegation(dp); + + return count; +} + +u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) +{ + struct nfs4_delegation *dp, *next; + LIST_HEAD(victims); + u64 count; + + spin_lock(&recall_lock); + count = nfsd_find_all_delegations(clp, max, &victims); + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + nfsd_break_one_deleg(dp); + spin_unlock(&recall_lock); + + return count; +} + +u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +{ + u64 count = 0; + + spin_lock(&recall_lock); + count = nfsd_find_all_delegations(clp, max, NULL); + spin_unlock(&recall_lock); + + nfsd_print_count(clp, count, "delegations"); + return count; +} + +u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) +{ + struct nfs4_client *clp, *next; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += func(clp, max - count); + if ((max != 0) && (count >= max)) + break; + } + + return count; +} + +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +{ + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; } #endif /* CONFIG_NFSD_FAULT_INJECTION */ @@ -4686,27 +4789,10 @@ nfs4_state_init(void) { int i; - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - INIT_LIST_HEAD(&conf_id_hashtbl[i]); - INIT_LIST_HEAD(&conf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_id_hashtbl[i]); - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); - } - for (i = 0; i < SESSION_HASH_SIZE; i++) - INIT_LIST_HEAD(&sessionid_hashtbl[i]); for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&ownerstr_hashtbl[i]); - } - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); - INIT_LIST_HEAD(&close_lru); - INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); - reclaim_str_hashtbl_size = 0; } /* @@ -4730,12 +4816,100 @@ set_max_delegations(void) max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } -/* initialization to perform when the nfsd service is started: */ +static int nfs4_state_create_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->conf_id_hashtbl) + goto err; + nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->unconf_id_hashtbl) + goto err_unconf_id; + nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!nn->ownerstr_hashtbl) + goto err_ownerstr; + nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * + LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); + if (!nn->lockowner_ino_hashtbl) + goto err_lockowner_ino; + nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * + SESSION_HASH_SIZE, GFP_KERNEL); + if (!nn->sessionid_hashtbl) + goto err_sessionid; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); + INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); + nn->conf_name_tree = RB_ROOT; + nn->unconf_name_tree = RB_ROOT; + INIT_LIST_HEAD(&nn->client_lru); + INIT_LIST_HEAD(&nn->close_lru); + spin_lock_init(&nn->client_lock); + + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + get_net(net); + + return 0; + +err_sessionid: + kfree(nn->lockowner_ino_hashtbl); +err_lockowner_ino: + kfree(nn->ownerstr_hashtbl); +err_ownerstr: + kfree(nn->unconf_id_hashtbl); +err_unconf_id: + kfree(nn->conf_id_hashtbl); +err: + return -ENOMEM; +} + +static void +nfs4_state_destroy_net(struct net *net) +{ + int i; + struct nfs4_client *clp = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct rb_node *node, *tmp; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->conf_id_hashtbl[i])) { + clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + node = rb_first(&nn->unconf_name_tree); + while (node != NULL) { + tmp = node; + node = rb_next(tmp); + clp = rb_entry(tmp, struct nfs4_client, cl_namenode); + rb_erase(tmp, &nn->unconf_name_tree); + destroy_client(clp); + } + + kfree(nn->sessionid_hashtbl); + kfree(nn->lockowner_ino_hashtbl); + kfree(nn->ownerstr_hashtbl); + kfree(nn->unconf_id_hashtbl); + kfree(nn->conf_id_hashtbl); + put_net(net); +} int -nfs4_state_start(void) +nfs4_state_start_net(struct net *net) { - struct net *net = &init_net; struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -4746,18 +4920,32 @@ nfs4_state_start(void) * to that instead and then do most of the rest of this on a per-net * basis. */ - get_net(net); + if (net != &init_net) + return -EINVAL; + + ret = nfs4_state_create_net(net); + if (ret) + return ret; nfsd4_client_tracking_init(net); nn->boot_time = get_seconds(); locks_start_grace(net, &nn->nfsd4_manager); nn->grace_ended = false; - printk(KERN_INFO "NFSD: starting %ld-second grace period\n", - nfsd4_grace); + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", + nn->nfsd4_grace, net); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); + return 0; +} + +/* initialization to perform when the nfsd service is started: */ + +int +nfs4_state_start(void) +{ + int ret; + ret = set_callback_cred(); - if (ret) { - ret = -ENOMEM; - goto out_recovery; - } + if (ret) + return -ENOMEM; laundry_wq = create_singlethread_workqueue("nfsd4"); if (laundry_wq == NULL) { ret = -ENOMEM; @@ -4766,39 +4954,34 @@ nfs4_state_start(void) ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; - queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); + set_max_delegations(); + return 0; + out_free_laundry: destroy_workqueue(laundry_wq); out_recovery: - nfsd4_client_tracking_exit(net); - put_net(net); return ret; } -static void -__nfs4_state_shutdown(void) +/* should be called with the state lock held */ +void +nfs4_state_shutdown_net(struct net *net) { - int i; - struct nfs4_client *clp = NULL; struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + cancel_delayed_work_sync(&nn->laundromat_work); + locks_end_grace(&nn->nfsd4_manager); - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&conf_id_hashtbl[i])) { - clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); - destroy_client(clp); - } - while (!list_empty(&unconf_str_hashtbl[i])) { - clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); - destroy_client(clp); - } - } INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); list_for_each_safe(pos, next, &del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (dp->dl_stid.sc_client->net != net) + continue; list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); @@ -4807,22 +4990,14 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - nfsd4_client_tracking_exit(&init_net); - put_net(&init_net); + nfsd4_client_tracking_exit(net); + nfs4_state_destroy_net(net); } void nfs4_state_shutdown(void) { - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); - locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); - __nfs4_state_shutdown(); - nfs4_unlock_state(); nfsd4_destroy_callback_queue(); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fd548d15508..0dc11586682 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -53,6 +53,7 @@ #include "vfs.h" #include "state.h" #include "cache.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -65,17 +66,17 @@ #define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL static __be32 -check_filename(char *str, int len, __be32 err) +check_filename(char *str, int len) { int i; if (len == 0) return nfserr_inval; if (isdotent(str, len)) - return err; + return nfserr_badname; for (i = 0; i < len; i++) if (str[i] == '/') - return err; + return nfserr_badname; return 0; } @@ -422,6 +423,86 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_TAIL; } +static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + DECODE_HEAD; + u32 dummy, uid, gid; + char *machine_name; + int i; + int nr_secflavs; + + /* callback_sec_params4 */ + READ_BUF(4); + READ32(nr_secflavs); + cbs->flavor = (u32)(-1); + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + READ32(dummy); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + if (cbs->flavor == (u32)(-1)) + cbs->flavor = RPC_AUTH_NULL; + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + READ32(dummy); + + /* machine name */ + READ32(dummy); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + READ32(uid); + READ32(gid); + + /* more gids */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy * 4); + if (cbs->flavor == (u32)(-1)) { + cbs->uid = uid; + cbs->gid = gid; + cbs->flavor = RPC_AUTH_UNIX; + } + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + READ32(dummy); + /* gcbp_handle_from_server */ + READ32(dummy); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; +} + +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(bc->bc_cb_program); + nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); + + DECODE_TAIL; +} + static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; @@ -490,7 +571,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ32(create->cr_namelen); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, @@ -522,7 +603,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) READ32(link->li_namelen); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + if ((status = check_filename(link->li_name, link->li_namelen))) return status; DECODE_TAIL; @@ -616,7 +697,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup READ32(lookup->lo_len); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + if ((status = check_filename(lookup->lo_name, lookup->lo_len))) return status; DECODE_TAIL; @@ -780,7 +861,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: @@ -795,7 +876,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_FH: @@ -907,7 +988,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove READ32(remove->rm_namelen); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + if ((status = check_filename(remove->rm_name, remove->rm_namelen))) return status; DECODE_TAIL; @@ -925,9 +1006,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename READ32(rename->rn_tnamelen); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) return status; DECODE_TAIL; @@ -954,8 +1035,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, READ32(secinfo->si_namelen); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen, - nfserr_noent); + status = check_filename(secinfo->si_name, secinfo->si_namelen); if (status) return status; DECODE_TAIL; @@ -1026,31 +1106,14 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { -#if 0 - struct nfsd4_compoundargs save = { - .p = argp->p, - .end = argp->end, - .rqstp = argp->rqstp, - }; - u32 ve_bmval[2]; - struct iattr ve_iattr; /* request */ - struct nfs4_acl *ve_acl; /* request */ -#endif DECODE_HEAD; if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) goto out; /* For convenience's sake, we compare raw xdr'd attributes in - * nfsd4_proc_verify; however we still decode here just to return - * correct error in case of bad xdr. */ -#if 0 - status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); - if (status == nfserr_inval) { - status = nfserrno(status); - goto out; - } -#endif + * nfsd4_proc_verify */ + READ_BUF(4); READ32(verify->ve_attrlen); READ_BUF(verify->ve_attrlen); @@ -1063,7 +1126,6 @@ static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { int avail; - int v; int len; DECODE_HEAD; @@ -1087,27 +1149,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) __FILE__, __LINE__); goto xdr_error; } - argp->rqstp->rq_vec[0].iov_base = p; - argp->rqstp->rq_vec[0].iov_len = avail; - v = 0; - len = write->wr_buflen; - while (len > argp->rqstp->rq_vec[v].iov_len) { - len -= argp->rqstp->rq_vec[v].iov_len; - v++; - argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen >= PAGE_SIZE) { - argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; - argp->pagelen -= PAGE_SIZE; - } else { - argp->rqstp->rq_vec[v].iov_len = argp->pagelen; - argp->pagelen -= len; - } + write->wr_head.iov_base = p; + write->wr_head.iov_len = avail; + WARN_ON(avail != (XDR_QUADLEN(avail) << 2)); + write->wr_pagelist = argp->pagelist; + + len = XDR_QUADLEN(write->wr_buflen) << 2; + if (len >= avail) { + int pages; + + len -= avail; + + pages = len >> PAGE_SHIFT; + argp->pagelist += pages; + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + + argp->p = (__be32 *)page_address(argp->pagelist[0]); + argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } - argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); - argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); - argp->rqstp->rq_vec[v].iov_len = len; - write->wr_vlen = v+1; + argp->p += XDR_QUADLEN(len); DECODE_TAIL; } @@ -1237,11 +1298,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) { DECODE_HEAD; - u32 dummy; - char *machine_name; - int i; - int nr_secflavs; READ_BUF(16); COPYMEM(&sess->clientid, 8); @@ -1282,58 +1339,9 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, goto xdr_error; } - READ_BUF(8); + READ_BUF(4); READ32(sess->callback_prog); - - /* callback_sec_params4 */ - READ32(nr_secflavs); - for (i = 0; i < nr_secflavs; ++i) { - READ_BUF(4); - READ32(dummy); - switch (dummy) { - case RPC_AUTH_NULL: - /* Nothing to read */ - break; - case RPC_AUTH_UNIX: - READ_BUF(8); - /* stamp */ - READ32(dummy); - - /* machine name */ - READ32(dummy); - READ_BUF(dummy); - SAVEMEM(machine_name, dummy); - - /* uid, gid */ - READ_BUF(8); - READ32(sess->uid); - READ32(sess->gid); - - /* more gids */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy * 4); - break; - case RPC_AUTH_GSS: - dprintk("RPC_AUTH_GSS callback secflavor " - "not supported!\n"); - READ_BUF(8); - /* gcbp_service */ - READ32(dummy); - /* gcbp_handle_from_server */ - READ32(dummy); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - /* gcbp_handle_from_client */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy); - break; - default: - dprintk("Illegal callback secflavor\n"); - return nfserr_inval; - } - } + nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1528,7 +1536,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, @@ -1568,12 +1576,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) bool cachethis = false; int i; - /* - * XXX: According to spec, we should check the tag - * for UTF-8 compliance. I'm postponing this for - * now because it seems that some clients do use - * binary tags. - */ READ_BUF(4); READ32(argp->taglen); READ_BUF(argp->taglen + 8); @@ -1603,38 +1605,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op = &argp->ops[i]; op->replay = NULL; - /* - * We can't use READ_BUF() here because we need to handle - * a missing opcode as an OP_WRITE + 1. So we need to check - * to see if we're truly at the end of our buffer or if there - * is another page we need to flip to. - */ - - if (argp->p == argp->end) { - if (argp->pagelen < 4) { - /* There isn't an opcode still on the wire */ - op->opnum = OP_WRITE + 1; - op->status = nfserr_bad_xdr; - argp->opcnt = i+1; - break; - } - - /* - * False alarm. We just hit a page boundary, but there - * is still data available. Move pointer across page - * boundary. *snip from READ_BUF* - */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } - } - op->opnum = ntohl(*argp->p++); + READ_BUF(4); + READ32(op->opnum); if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) op->status = ops->decoders[op->opnum](argp, &op->u); @@ -2014,6 +1986,22 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) return 0; } + +static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +{ + struct path path = exp->ex_path; + int err; + + path_get(&path); + while (follow_up(&path)) { + if (path.dentry != path.mnt->mnt_root) + break; + } + err = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); + return err; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. @@ -2048,6 +2036,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, .mnt = exp->ex_path.mnt, .dentry = dentry, }; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); @@ -2208,7 +2197,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_LEASE_TIME) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(nfsd4_lease); + WRITE32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { if ((buflen -= 4) < 0) @@ -2430,18 +2419,8 @@ out_acl: * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - dentry == exp->ex_path.mnt->mnt_root) { - struct path path = exp->ex_path; - path_get(&path); - while (follow_up(&path)) { - if (path.dentry != path.mnt->mnt_root) - break; - } - err = vfs_getattr(path.mnt, path.dentry, &stat); - path_put(&path); - if (err) - goto out_nfserr; - } + dentry == exp->ex_path.mnt->mnt_root) + get_parent_attributes(exp, &stat); WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { @@ -2927,7 +2906,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { u32 eof; - int v, pn; + int v; + struct page *page; unsigned long maxcount; long len; __be32 *p; @@ -2946,11 +2926,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused++; - resp->rqstp->rq_vec[v].iov_base = - page_address(resp->rqstp->rq_respages[pn]); + page = *(resp->rqstp->rq_next_page); + if (!page) { /* ran out of pages */ + maxcount -= len; + break; + } + resp->rqstp->rq_vec[v].iov_base = page_address(page); resp->rqstp->rq_vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_next_page++; v++; len -= PAGE_SIZE; } @@ -2996,8 +2980,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + page = page_address(*(resp->rqstp->rq_next_page++)); maxcount = PAGE_SIZE; RESERVE_SPACE(4); @@ -3045,6 +3031,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!*resp->rqstp->rq_next_page) + return nfserr_resource; RESERVE_SPACE(NFS4_VERIFIER_SIZE); savep = p; @@ -3071,7 +3059,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 goto err_no_verf; } - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + page = page_address(*(resp->rqstp->rq_next_page++)); readdir->common.err = 0; readdir->buflen = maxcount; readdir->buffer = page; @@ -3094,8 +3082,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 p = readdir->buffer; *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - (char*)page_address( - resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->xbuf->page_len = ((char*)p) - + (char*)page_address(*(resp->rqstp->rq_next_page-1)); /* Use rest of head for padding and remaining ops: */ resp->xbuf->tail[0].iov_base = tailbase; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index dab350dfc37..74934284d9a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -19,7 +19,7 @@ #include "idmap.h" #include "nfsd.h" #include "cache.h" -#include "fault_inject.h" +#include "state.h" #include "netns.h" /* @@ -186,9 +186,6 @@ static struct file_operations supported_enctypes_ops = { }; #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ -extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); -extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); - static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, @@ -399,6 +396,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; + struct net *net = &init_net; + if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); @@ -406,11 +405,11 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(newthreads); + rv = nfsd_svc(newthreads, net); if (rv < 0) return rv; } else - rv = nfsd_nrthreads(); + rv = nfsd_nrthreads(net); return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } @@ -448,9 +447,10 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; + struct net *net = &init_net; mutex_lock(&nfsd_mutex); - npools = nfsd_nrpools(); + npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by @@ -478,12 +478,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) if (nthreads[i] < 0) goto out_free; } - rv = nfsd_set_nrthreads(i, nthreads); + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } - rv = nfsd_get_nrthreads(npools, nthreads); + rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; @@ -510,11 +510,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { - if (nfsd_serv) + if (nn->nfsd_serv) /* Cannot change versions without updating - * nfsd_serv->sv_xdrsize, and reallocing + * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; @@ -645,11 +647,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) * Zero-length write. Return a list of NFSD's current listener * transports. */ -static ssize_t __write_ports_names(char *buf) +static ssize_t __write_ports_names(char *buf, struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; - return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); + return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* @@ -657,28 +661,28 @@ static ssize_t __write_ports_names(char *buf) * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ -static ssize_t __write_ports_addfd(char *buf) +static ssize_t __write_ports_addfd(char *buf, struct net *net) { char *mesg = buf; int fd, err; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; - err = nfsd_create_serv(); + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { nfsd_destroy(net); return err; } /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return err; } @@ -686,12 +690,12 @@ static ssize_t __write_ports_addfd(char *buf) * A transport listener is added by writing it's transport name and * a port number. */ -static ssize_t __write_ports_addxprt(char *buf) +static ssize_t __write_ports_addxprt(char *buf, struct net *net) { char transport[16]; struct svc_xprt *xprt; int port, err; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; @@ -699,25 +703,25 @@ static ssize_t __write_ports_addxprt(char *buf) if (port < 1 || port > USHRT_MAX) return -EINVAL; - err = nfsd_create_serv(); + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port); + xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); @@ -727,16 +731,17 @@ out_err: return err; } -static ssize_t __write_ports(struct file *file, char *buf, size_t size) +static ssize_t __write_ports(struct file *file, char *buf, size_t size, + struct net *net) { if (size == 0) - return __write_ports_names(buf); + return __write_ports_names(buf, net); if (isdigit(buf[0])) - return __write_ports_addfd(buf); + return __write_ports_addfd(buf, net); if (isalpha(buf[0])) - return __write_ports_addxprt(buf); + return __write_ports_addxprt(buf, net); return -EINVAL; } @@ -787,9 +792,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; + struct net *net = &init_net; mutex_lock(&nfsd_mutex); - rv = __write_ports(file, buf, size); + rv = __write_ports(file, buf, size, net); mutex_unlock(&nfsd_mutex); return rv; } @@ -821,6 +827,9 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); @@ -835,7 +844,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); - if (nfsd_serv) { + if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } @@ -848,13 +857,14 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) } #ifdef CONFIG_NFSD_V4 -static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; rv = get_int(&mesg, &i); if (rv) @@ -879,12 +889,13 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); } -static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { ssize_t rv; mutex_lock(&nfsd_mutex); - rv = __nfsd4_write_time(file, buf, size, time); + rv = __nfsd4_write_time(file, buf, size, time, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -912,7 +923,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_ */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_lease); + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } /** @@ -927,17 +939,19 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_grace); + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } -static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, + struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; if (size > PATH_MAX || buf[size-1] != '\n') return -EINVAL; @@ -981,9 +995,10 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); mutex_lock(&nfsd_mutex); - rv = __write_recoverydir(file, buf, size); + rv = __write_recoverydir(file, buf, size, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -1063,6 +1078,7 @@ int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); if (retval) @@ -1070,6 +1086,8 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; return 0; out_idmap_error: diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 80d5ce40aad..de23db255c6 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -55,7 +55,6 @@ extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; -extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; extern unsigned int nfsd_drc_max_mem; extern unsigned int nfsd_drc_mem_used; @@ -65,26 +64,17 @@ extern const struct seq_operations nfs_exports_op; /* * Function prototypes. */ -int nfsd_svc(int nrservs); +int nfsd_svc(int nrservs, struct net *net); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); -int nfsd_nrthreads(void); -int nfsd_nrpools(void); -int nfsd_get_nrthreads(int n, int *); -int nfsd_set_nrthreads(int n, int *); +int nfsd_nrthreads(struct net *); +int nfsd_nrpools(struct net *); +int nfsd_get_nrthreads(int n, int *, struct net *); +int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); -static inline void nfsd_destroy(struct net *net) -{ - int destroy = (nfsd_serv->sv_nrthreads == 1); - - if (destroy) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); - if (destroy) - nfsd_serv = NULL; -} +void nfsd_destroy(struct net *net); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL @@ -103,7 +93,7 @@ enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; int nfsd_vers(int vers, enum vers_op change); int nfsd_minorversion(u32 minorversion, enum vers_op change); void nfsd_reset_versions(void); -int nfsd_create_serv(void); +int nfsd_create_serv(struct net *net); extern int nfsd_max_blksize; @@ -121,7 +111,9 @@ void nfs4_state_init(void); int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); +int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); +void nfs4_state_shutdown_net(struct net *net); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); @@ -130,7 +122,9 @@ static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } +static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } +static inline void nfs4_state_shutdown_net(struct net *net) { } static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline char * nfs4_recoverydir(void) {return NULL; } @@ -265,16 +259,8 @@ void nfsd_lockd_shutdown(void); /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) -/* - * Time of server startup - */ -extern struct timeval nfssvc_boot; - #ifdef CONFIG_NFSD_V4 -extern time_t nfsd4_lease; -extern time_t nfsd4_grace; - /* before processing a COMPOUND operation, we have to check that there * is enough space in the buffer for XDR encode to succeed. otherwise, * we might process an operation with side effects, and be unable to diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 032af381b3a..814afaa4458 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) { + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); return nfserr_opnotsupp; } @@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp) goto out; _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } out: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2013aa001da..cee62ab9d4a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -22,19 +21,19 @@ #include "nfsd.h" #include "cache.h" #include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_SVC extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); -struct timeval nfssvc_boot; /* - * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members * of the svc_serv struct. In particular, ->sv_nrthreads but also to some * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt * - * If (out side the lock) nfsd_serv is non-NULL, then it must point to a + * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number * of nfsd threads must exist and each must listed in ->sp_all_threads in each * entry of ->sv_pools[]. @@ -52,7 +51,6 @@ struct timeval nfssvc_boot; * nfsd_versions */ DEFINE_MUTEX(nfsd_mutex); -struct svc_serv *nfsd_serv; /* * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. @@ -173,28 +171,32 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) */ #define NFSD_MAXSERVS 8192 -int nfsd_nrthreads(void) +int nfsd_nrthreads(struct net *net) { int rv = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv) - rv = nfsd_serv->sv_nrthreads; + if (nn->nfsd_serv) + rv = nn->nfsd_serv->sv_nrthreads; mutex_unlock(&nfsd_mutex); return rv; } -static int nfsd_init_socks(void) +static int nfsd_init_socks(struct net *net) { int error; - if (!list_empty(&nfsd_serv->sv_permsocks)) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, NFS_PORT, + error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, NFS_PORT, + error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; @@ -202,14 +204,15 @@ static int nfsd_init_socks(void) return 0; } -static bool nfsd_up = false; +static int nfsd_users = 0; -static int nfsd_startup(int nrservs) +static int nfsd_startup_generic(int nrservs) { int ret; - if (nfsd_up) + if (nfsd_users++) return 0; + /* * Readahead param cache - will no-op if it already exists. * (Note therefore results will be suboptimal if number of @@ -218,43 +221,79 @@ static int nfsd_startup(int nrservs) ret = nfsd_racache_init(2*nrservs); if (ret) return ret; - ret = nfsd_init_socks(); - if (ret) - goto out_racache; - ret = lockd_up(&init_net); - if (ret) - goto out_racache; ret = nfs4_state_start(); if (ret) - goto out_lockd; - nfsd_up = true; + goto out_racache; return 0; -out_lockd: - lockd_down(&init_net); + out_racache: nfsd_racache_shutdown(); return ret; } -static void nfsd_shutdown(void) +static void nfsd_shutdown_generic(void) { + if (--nfsd_users) + return; + + nfs4_state_shutdown(); + nfsd_racache_shutdown(); +} + +static int nfsd_startup_net(int nrservs, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + if (nn->nfsd_net_up) + return 0; + + ret = nfsd_startup_generic(nrservs); + if (ret) + return ret; + ret = nfsd_init_socks(net); + if (ret) + goto out_socks; + ret = lockd_up(net); + if (ret) + goto out_socks; + ret = nfs4_state_start_net(net); + if (ret) + goto out_lockd; + + nn->nfsd_net_up = true; + return 0; + +out_lockd: + lockd_down(net); +out_socks: + nfsd_shutdown_generic(); + return ret; +} + +static void nfsd_shutdown_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_state_shutdown_net(net); + lockd_down(net); + nn->nfsd_net_up = false; + nfsd_shutdown_generic(); +} + +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_last_thread will be run before any of this * other initialization has been done. */ - if (!nfsd_up) + if (!nn->nfsd_net_up) return; - nfs4_state_shutdown(); - lockd_down(&init_net); - nfsd_racache_shutdown(); - nfsd_up = false; -} - -static void nfsd_last_thread(struct svc_serv *serv, struct net *net) -{ - nfsd_shutdown(); + nfsd_shutdown_net(net); svc_rpcb_cleanup(serv, net); @@ -327,69 +366,84 @@ static int nfsd_get_default_max_blksize(void) return ret; } -int nfsd_create_serv(void) +int nfsd_create_serv(struct net *net) { int error; - struct net *net = current->nsproxy->net_ns; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv) { - svc_get(nfsd_serv); + if (nn->nfsd_serv) { + svc_get(nn->nfsd_serv); return 0; } if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); - nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); - if (nfsd_serv == NULL) + if (nn->nfsd_serv == NULL) return -ENOMEM; - error = svc_bind(nfsd_serv, net); + error = svc_bind(nn->nfsd_serv, net); if (error < 0) { - svc_destroy(nfsd_serv); + svc_destroy(nn->nfsd_serv); return error; } set_max_drc(); - do_gettimeofday(&nfssvc_boot); /* record boot time */ + do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } -int nfsd_nrpools(void) +int nfsd_nrpools(struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; else - return nfsd_serv->sv_nrpools; + return nn->nfsd_serv->sv_nrpools; } -int nfsd_get_nrthreads(int n, int *nthreads) +int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (nfsd_serv != NULL) { - for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads; + if (nn->nfsd_serv != NULL) { + for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) + nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; } return 0; } -int nfsd_set_nrthreads(int n, int *nthreads) +void nfsd_destroy(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int destroy = (nn->nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nn->nfsd_serv, net); + svc_destroy(nn->nfsd_serv); + if (destroy) + nn->nfsd_serv = NULL; +} + +int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; int tot = 0; int err = 0; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv == NULL || n <= 0) + if (nn->nfsd_serv == NULL || n <= 0) return 0; - if (n > nfsd_serv->sv_nrpools) - n = nfsd_serv->sv_nrpools; + if (n > nn->nfsd_serv->sv_nrpools) + n = nn->nfsd_serv->sv_nrpools; /* enforce a global maximum number of threads */ tot = 0; @@ -419,9 +473,9 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - svc_get(nfsd_serv); + svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], + err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; @@ -436,11 +490,11 @@ int nfsd_set_nrthreads(int n, int *nthreads) * this is the first time nrservs is nonzero. */ int -nfsd_svc(int nrservs) +nfsd_svc(int nrservs, struct net *net) { int error; bool nfsd_up_before; - struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -449,29 +503,29 @@ nfsd_svc(int nrservs) if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; error = 0; - if (nrservs == 0 && nfsd_serv == NULL) + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - error = nfsd_create_serv(); + error = nfsd_create_serv(net); if (error) goto out; - nfsd_up_before = nfsd_up; + nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup(nrservs); + error = nfsd_startup_net(nrservs, net); if (error) goto out_destroy; - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nfsd_serv which + /* We are holding a reference to nn->nfsd_serv which * we don't want to count in the return value, * so subtract 1 */ - error = nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads - 1; out_shutdown: if (error < 0 && !nfsd_up_before) - nfsd_shutdown(); + nfsd_shutdown_net(net); out_destroy: nfsd_destroy(net); /* Release server */ out: @@ -487,6 +541,8 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; + struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); + struct net *net = perm_sock->xpt_net; int err; /* Lock module and set up kernel thread */ @@ -551,7 +607,7 @@ out: /* Release the thread */ svc_exit_thread(rqstp); - nfsd_destroy(&init_net); + nfsd_destroy(net); /* Release module */ mutex_unlock(&nfsd_mutex); @@ -640,21 +696,24 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; + struct net *net = &init_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv == NULL) { + if (nn->nfsd_serv == NULL) { mutex_unlock(&nfsd_mutex); return -ENODEV; } /* bump up the psudo refcount while traversing */ - svc_get(nfsd_serv); - ret = svc_pool_stats_open(nfsd_serv, file); + svc_get(nn->nfsd_serv); + ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 65ec595e222..979b4210697 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -246,7 +246,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readargs *args) { unsigned int len; - int v,pn; + int v; if (!(p = decode_fh(p, &args->fh))) return 0; @@ -262,8 +262,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -355,7 +356,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli { if (!(p = decode_fh(p, &args->fh))) return 0; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -396,7 +397,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e036894bce5..d1c229feed5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -150,6 +150,12 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_cb_sec { + u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ + u32 uid; + u32 gid; +}; + struct nfsd4_create_session { clientid_t clientid; struct nfs4_sessionid sessionid; @@ -158,8 +164,12 @@ struct nfsd4_create_session { struct nfsd4_channel_attrs fore_channel; struct nfsd4_channel_attrs back_channel; u32 callback_prog; - u32 uid; - u32 gid; + struct nfsd4_cb_sec cb_sec; +}; + +struct nfsd4_backchannel_ctl { + u32 bc_cb_program; + struct nfsd4_cb_sec bc_cb_sec; }; struct nfsd4_bind_conn_to_session { @@ -192,6 +202,7 @@ struct nfsd4_session { struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; + struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; u32 se_cb_prog; u32 se_cb_seq_nr; @@ -221,13 +232,12 @@ struct nfsd4_sessionid { */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ - struct list_head cl_strhash; /* hash by cl_name */ + struct rb_node cl_namenode; /* link into by-name trees */ struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ - char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ @@ -242,9 +252,11 @@ struct nfs4_client { #define NFSD4_CLIENT_CB_KILL (1) #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ +#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) unsigned long cl_flags; + struct rpc_cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; #define NFSD4_CB_UP 0 @@ -271,6 +283,7 @@ struct nfs4_client { unsigned long cl_cb_slot_busy; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ + struct net *net; }; static inline void @@ -292,6 +305,7 @@ is_client_expired(struct nfs4_client *clp) */ struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ + struct nfs4_client *cr_clp; /* pointer to associated clp */ char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; @@ -452,25 +466,26 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); -extern int nfs4_in_grace(void); -extern void nfs4_release_reclaim(void); -extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions); +void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); +extern void nfs4_release_reclaim(struct nfsd_net *); +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, + struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); extern void nfs4_free_openowner(struct nfs4_openowner *); extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); +extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); -extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); -extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); -extern int nfs4_client_to_reclaim(const char *name); -extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, + struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); extern void release_session_client(struct nfsd4_session *); extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); @@ -480,5 +495,28 @@ extern void nfsd4_client_tracking_exit(struct net *net); extern void nfsd4_client_record_create(struct nfs4_client *clp); extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); -extern void nfsd4_record_grace_done(struct net *net, time_t boot_time); +extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); + +/* nfs fault injection functions */ +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); + +u64 nfsd_forget_client(struct nfs4_client *, u64); +u64 nfsd_forget_client_locks(struct nfs4_client*, u64); +u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); +u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); +u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_print_client(struct nfs4_client *, u64); +u64 nfsd_print_client_locks(struct nfs4_client *, u64); +u64 nfsd_print_client_openowners(struct nfs4_client *, u64); +u64 nfsd_print_client_delegations(struct nfs4_client *, u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c120b48ec30..f0a6d88d7ff 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -886,7 +886,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page **pp = rqstp->rq_next_page; struct page *page = buf->page; size_t size; @@ -894,17 +894,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (rqstp->rq_res.page_len == 0) { get_page(page); - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); - if (*pp) - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + if (*rqstp->rq_next_page) + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_len += size; } else rqstp->rq_res.page_len += size; @@ -936,7 +934,8 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, .u.data = rqstp, }; - rqstp->rq_resused = 1; + WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1); + rqstp->rq_next_page = rqstp->rq_respages + 1; host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); } else { oldfs = get_fs(); @@ -1020,28 +1019,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, inode = dentry->d_inode; exp = fhp->fh_export; - /* - * Request sync writes if - * - the sync export option has been set, or - * - the client requested O_SYNC behavior (NFSv3 feature). - * - The file system doesn't support fsync(). - * When NFSv2 gathered writes have been configured for this volume, - * flushing the data to disk is handled separately below. - */ use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); - if (!file->f_op->fsync) {/* COMMIT3 cannot work */ - stable = 2; - *stablep = 2; /* FILE_SYNC */ - } - if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !use_wgather) { - spin_lock(&file->f_lock); - file->f_flags |= O_SYNC; - spin_unlock(&file->f_lock); - } /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); @@ -1057,8 +1038,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (stable && use_wgather) - host_err = wait_for_concurrent_writes(file); + if (stable) { + if (use_wgather) + host_err = wait_for_concurrent_writes(file); + else + host_err = vfs_fsync_range(file, offset, offset+*cnt, 0); + } out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); @@ -1485,13 +1470,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } case NFS4_CREATE_EXCLUSIVE4_1: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index acd127d4ee8..0889bfb43dc 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -385,7 +385,8 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - int wr_vlen; + struct kvec wr_head; + struct page ** wr_pagelist; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -462,6 +463,7 @@ struct nfsd4_op { /* NFSv4.1 */ struct nfsd4_exchange_id exchange_id; + struct nfsd4_backchannel_ctl backchannel_ctl; struct nfsd4_bind_conn_to_session bind_conn_to_session; struct nfsd4_create_session create_session; struct nfsd4_destroy_session destroy_session; @@ -526,6 +528,14 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) || nfsd4_is_solo_sequence(resp); } +static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void @@ -566,6 +576,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); extern __be32 nfsd4_create_session(struct svc_rqst *, struct nfsd4_compound_state *, @@ -579,7 +590,7 @@ extern __be32 nfsd4_destroy_session(struct svc_rqst *, extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, - struct nfsd4_open *open); + struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index c7e6b6392ab..5b9b5b31718 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -83,6 +83,11 @@ enum fid_type { * 64 bit parent inode number. */ FILEID_NILFS_WITH_PARENT = 0x62, + + /* + * Filesystems must not use 0xff file ID. + */ + FILEID_INVALID = 0xff, }; struct fid { diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f792794f663..5dc9ee4d616 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize); static inline int get_int(char **bpp, int *anint) { char buf[50]; + char *ep; + int rv; int len = qword_get(bpp, buf, sizeof(buf)); if (len < 0) @@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint) if (len == 0) return -ENOENT; - if (kstrtoint(buf, 0, anint)) + rv = simple_strtol(buf, &ep, 0); + if (*ep) return -EINVAL; + *anint = rv; return 0; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d83db800fe0..676ddf53b3e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -243,6 +243,7 @@ struct svc_rqst { struct page * rq_pages[RPCSVC_MAXPAGES]; struct page * *rq_respages; /* points into rq_pages */ int rq_resused; /* number of pages used for result */ + struct page * *rq_next_page; /* next reply page to use */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ @@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) static inline void svc_free_res_pages(struct svc_rqst *rqstp) { - while (rqstp->rq_resused) { - struct page **pp = (rqstp->rq_respages + - --rqstp->rq_resused); + while (rqstp->rq_next_page != rqstp->rq_respages) { + struct page **pp = --rqstp->rq_next_page; if (*pp) { put_page(*pp); *pp = NULL; diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 92ad02f0dcc..62fd1b756e9 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -26,11 +26,28 @@ struct svc_sock { void (*sk_owspace)(struct sock *); /* private TCP part */ - u32 sk_reclen; /* length of record */ - u32 sk_tcplen; /* current read length */ + /* On-the-wire fragment header: */ + __be32 sk_reclen; + /* As we receive a record, this includes the length received so + * far (including the fragment header): */ + u32 sk_tcplen; + /* Total length of the data (not including fragment headers) + * received so far in the fragments making up this rpc: */ + u32 sk_datalen; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; +static inline u32 svc_sock_reclen(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; +} + +static inline u32 svc_sock_final_rec(struct svc_sock *svsk) +{ + return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; +} + /* * Function prototypes. */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 411f332de0b..795a0f4e920 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dfa4ba69ff4..dbf12ac5ecb 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1041,7 +1040,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) } /* - * Printk the given error with the address of the client that caused it. + * dprintk the given error with the address of the client that caused it. */ static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) @@ -1055,8 +1054,7 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - net_warn_ratelimited("svc: %s: %pV", - svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); + dprintk("svc: %s: %pV", svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); va_end(args); } @@ -1305,7 +1303,7 @@ svc_process(struct svc_rqst *rqstp) * Setup response xdr_buf. * Initially it has just one page */ - rqstp->rq_resused = 1; + rqstp->rq_next_page = &rqstp->rq_respages[1]; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; rqstp->rq_res.pages = rqstp->rq_respages + 1; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc3020d1678..0a148c9d2a5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -605,6 +605,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = rqstp->rq_pages + 1 + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } + rqstp->rq_next_page = rqstp->rq_respages+1; if (serv->sv_stats) serv->sv_stats->netudpcnt++; @@ -878,9 +879,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return 0; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -897,9 +898,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) return; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { svsk->sk_pages[i] = rqstp->rq_pages[i]; @@ -911,9 +912,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) { unsigned int i, len, npages; - if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + if (svsk->sk_datalen == 0) goto out; - len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { BUG_ON(svsk->sk_pages[i] == NULL); @@ -922,13 +923,12 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) } out: svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; } /* - * Receive data. + * Receive fragment record header. * If we haven't gotten the record length yet, get the next four bytes. - * Otherwise try to gobble up as much as possible up to the complete - * record length. */ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { @@ -954,32 +954,16 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } - svsk->sk_reclen = ntohl(svsk->sk_reclen); - if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { - /* FIXME: technically, a record can be fragmented, - * and non-terminal fragments will not have the top - * bit set in the fragment length header. - * But apparently no known nfs clients send fragmented - * records. */ - net_notice_ratelimited("RPC: multiple fragments per record not supported\n"); - goto err_delete; - } - - svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; - dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); - if (svsk->sk_reclen > serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n", - (unsigned long)svsk->sk_reclen); + dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + if (svc_sock_reclen(svsk) + svsk->sk_datalen > + serv->sv_max_mesg) { + net_notice_ratelimited("RPC: fragment too large: %d\n", + svc_sock_reclen(svsk)); goto err_delete; } } - if (svsk->sk_reclen < 8) - goto err_delete; /* client is nuts. */ - - len = svsk->sk_reclen; - - return len; + return svc_sock_reclen(svsk); error: dprintk("RPC: TCP recv_record got %d\n", len); return len; @@ -1023,7 +1007,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (dst->iov_len < src->iov_len) return -EAGAIN; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; return 0; } @@ -1042,6 +1026,17 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } +static void svc_tcp_fragment_received(struct svc_sock *svsk) +{ + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + dprintk("svc: TCP %s record (%d bytes)\n", + svc_sock_final_rec(svsk) ? "final" : "nonfinal", + svc_sock_reclen(svsk)); + svsk->sk_tcplen = 0; + svsk->sk_reclen = 0; +} /* * Receive data from a TCP socket. @@ -1068,29 +1063,39 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svsk->sk_reclen - base; + want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], - svsk->sk_reclen); + svsk->sk_datalen + want); rqstp->rq_respages = &rqstp->rq_pages[pnum]; + rqstp->rq_next_page = rqstp->rq_respages + 1; /* Now receive data */ len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); - if (len >= 0) + if (len >= 0) { svsk->sk_tcplen += len; - if (len != want) { + svsk->sk_datalen += len; + } + if (len != want || !svc_sock_final_rec(svsk)) { svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) - goto err_other; - dprintk("svc: incomplete TCP record (%d of %d)\n", - svsk->sk_tcplen, svsk->sk_reclen); + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + dprintk("svc: incomplete TCP record (%d of %d)\n", + (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), + svc_sock_reclen(svsk)); goto err_noclose; } - rqstp->rq_arg.len = svsk->sk_reclen; + if (svc_sock_reclen(svsk) < 8) + goto err_delete; /* client is nuts. */ + + rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; @@ -1107,11 +1112,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) len = receive_cb_reply(svsk, rqstp); /* Reset TCP read info */ - svsk->sk_reclen = 0; - svsk->sk_tcplen = 0; - /* If we have more data, signal svc_xprt_enqueue() to try again */ - if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svsk->sk_datalen = 0; + svc_tcp_fragment_received(svsk); if (len < 0) goto error; @@ -1120,15 +1122,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; error: if (len != -EAGAIN) - goto err_other; + goto err_delete; dprintk("RPC: TCP recvfrom got EAGAIN\n"); return 0; -err_other: +err_delete: printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_xprt.xpt_server->sv_name, -len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1305,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 41cb63b623d..0ce75524ed2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -521,11 +521,11 @@ next_sge: rqstp->rq_pages[ch_no] = NULL; /* - * Detach res pages. svc_release must see a resused count of - * zero or it will attempt to put them. + * Detach res pages. If svc_release sees any it will attempt to + * put them. */ - while (rqstp->rq_resused) - rqstp->rq_respages[--rqstp->rq_resused] = NULL; + while (rqstp->rq_next_page != rqstp->rq_respages) + *(--rqstp->rq_next_page) = NULL; return err; } @@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, /* rq_respages starts after the last arg page */ rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; - rqstp->rq_resused = 0; + rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no]; /* Rebuild rq_arg head and tail. */ rqstp->rq_arg.head[0] = head->arg.head[0]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 42eb7ba0b90..c1d124dc772 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int sge_no; int sge_bytes; int page_no; + int pages; int ret; /* Post a recv buffer to handle another request. */ @@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma, * respages array. They are our pages until the I/O * completes. */ - for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { + pages = rqstp->rq_next_page - rqstp->rq_respages; + for (page_no = 0; page_no < pages; page_no++) { ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL;