aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
committerPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
commitd53b4ed072d9779cdf53582c46436dec06d0961f (patch)
treeac95ecab33e31cd79aae69c475e8348adac51230 /fs/namei.c
parent5d4dff7f1011a81a693a9c7b1f6a0b9c842eb60c (diff)
parent28a33cbc24e4256c143dce96c7d93bf423229f92 (diff)
Merge tag 'v3.5' of 192.168.0.154:/repos/git/linux-2.6
Conflicts: drivers/Kconfig Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c723
1 files changed, 445 insertions, 278 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 5008f01787f..7d694194024 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -15,7 +15,8 @@
*/
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/namei.h>
@@ -36,6 +37,7 @@
#include <asm/uaccess.h>
#include "internal.h"
+#include "mount.h"
/* [Feb-1997 T. Schoebel-Theuer]
* Fundamental changes in the pathname lookup mechanisms (namei)
@@ -115,54 +117,42 @@
* POSIX.1 2.4: an empty pathname is invalid (ENOENT).
* PATH_MAX includes the nul terminator --RR.
*/
-static int do_getname(const char __user *filename, char *page)
+static char *getname_flags(const char __user *filename, int flags, int *empty)
{
- int retval;
- unsigned long len = PATH_MAX;
-
- if (!segment_eq(get_fs(), KERNEL_DS)) {
- if ((unsigned long) filename >= TASK_SIZE)
- return -EFAULT;
- if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
- len = TASK_SIZE - (unsigned long) filename;
- }
+ char *result = __getname(), *err;
+ int len;
- retval = strncpy_from_user(page, filename, len);
- if (retval > 0) {
- if (retval < len)
- return 0;
- return -ENAMETOOLONG;
- } else if (!retval)
- retval = -ENOENT;
- return retval;
-}
+ if (unlikely(!result))
+ return ERR_PTR(-ENOMEM);
-static char *getname_flags(const char __user *filename, int flags, int *empty)
-{
- char *tmp, *result;
+ len = strncpy_from_user(result, filename, PATH_MAX);
+ err = ERR_PTR(len);
+ if (unlikely(len < 0))
+ goto error;
- result = ERR_PTR(-ENOMEM);
- tmp = __getname();
- if (tmp) {
- int retval = do_getname(filename, tmp);
+ /* The empty path is special. */
+ if (unlikely(!len)) {
+ if (empty)
+ *empty = 1;
+ err = ERR_PTR(-ENOENT);
+ if (!(flags & LOOKUP_EMPTY))
+ goto error;
+ }
- result = tmp;
- if (retval < 0) {
- if (retval == -ENOENT && empty)
- *empty = 1;
- if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
- __putname(tmp);
- result = ERR_PTR(retval);
- }
- }
+ err = ERR_PTR(-ENAMETOOLONG);
+ if (likely(len < PATH_MAX)) {
+ audit_getname(result);
+ return result;
}
- audit_getname(result);
- return result;
+
+error:
+ __putname(result);
+ return err;
}
char *getname(const char __user * filename)
{
- return getname_flags(filename, 0, 0);
+ return getname_flags(filename, 0, NULL);
}
#ifdef CONFIG_AUDITSYSCALL
@@ -229,10 +219,7 @@ static int acl_permission_check(struct inode *inode, int mask)
{
unsigned int mode = inode->i_mode;
- if (current_user_ns() != inode_userns(inode))
- goto other_perms;
-
- if (likely(current_fsuid() == inode->i_uid))
+ if (likely(uid_eq(current_fsuid(), inode->i_uid)))
mode >>= 6;
else {
if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
@@ -245,7 +232,6 @@ static int acl_permission_check(struct inode *inode, int mask)
mode >>= 3;
}
-other_perms:
/*
* If the DACs are ok we don't need any capability check.
*/
@@ -281,10 +267,10 @@ int generic_permission(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
- if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
+ if (inode_capable(inode, CAP_DAC_OVERRIDE))
return 0;
if (!(mask & MAY_WRITE))
- if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
+ if (inode_capable(inode, CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
@@ -294,7 +280,7 @@ int generic_permission(struct inode *inode, int mask)
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
+ if (inode_capable(inode, CAP_DAC_OVERRIDE))
return 0;
/*
@@ -302,7 +288,7 @@ int generic_permission(struct inode *inode, int mask)
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
+ if (inode_capable(inode, CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
@@ -463,7 +449,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
mntget(nd->path.mnt);
rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
nd->flags &= ~LOOKUP_RCU;
return 0;
@@ -521,14 +507,14 @@ static int complete_walk(struct nameidata *nd)
if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
return -ECHILD;
}
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
mntget(nd->path.mnt);
rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
}
if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -643,7 +629,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
cond_resched();
current->total_link_count++;
- touch_atime(link->mnt, dentry);
+ touch_atime(link);
nd_set_link(nd, NULL);
error = security_inode_follow_link(link->dentry, nd);
@@ -676,36 +662,38 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
static int follow_up_rcu(struct path *path)
{
- struct vfsmount *parent;
+ struct mount *mnt = real_mount(path->mnt);
+ struct mount *parent;
struct dentry *mountpoint;
- parent = path->mnt->mnt_parent;
- if (parent == path->mnt)
+ parent = mnt->mnt_parent;
+ if (&parent->mnt == path->mnt)
return 0;
- mountpoint = path->mnt->mnt_mountpoint;
+ mountpoint = mnt->mnt_mountpoint;
path->dentry = mountpoint;
- path->mnt = parent;
+ path->mnt = &parent->mnt;
return 1;
}
int follow_up(struct path *path)
{
- struct vfsmount *parent;
+ struct mount *mnt = real_mount(path->mnt);
+ struct mount *parent;
struct dentry *mountpoint;
- br_read_lock(vfsmount_lock);
- parent = path->mnt->mnt_parent;
- if (parent == path->mnt) {
- br_read_unlock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
+ parent = mnt->mnt_parent;
+ if (&parent->mnt == path->mnt) {
+ br_read_unlock(&vfsmount_lock);
return 0;
}
- mntget(parent);
- mountpoint = dget(path->mnt->mnt_mountpoint);
- br_read_unlock(vfsmount_lock);
+ mntget(&parent->mnt);
+ mountpoint = dget(mnt->mnt_mountpoint);
+ br_read_unlock(&vfsmount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
- path->mnt = parent;
+ path->mnt = &parent->mnt;
return 1;
}
@@ -884,7 +872,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode)
{
for (;;) {
- struct vfsmount *mounted;
+ struct mount *mounted;
/*
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
@@ -898,8 +886,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted)
break;
- path->mnt = mounted;
- path->dentry = mounted->mnt_root;
+ path->mnt = &mounted->mnt;
+ path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
/*
@@ -915,12 +903,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
static void follow_mount_rcu(struct nameidata *nd)
{
while (d_mountpoint(nd->path.dentry)) {
- struct vfsmount *mounted;
+ struct mount *mounted;
mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
if (!mounted)
break;
- nd->path.mnt = mounted;
- nd->path.dentry = mounted->mnt_root;
+ nd->path.mnt = &mounted->mnt;
+ nd->path.dentry = mounted->mnt.mnt_root;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
}
}
@@ -959,7 +947,7 @@ failed:
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
return -ECHILD;
}
@@ -1053,51 +1041,65 @@ static void follow_dotdot(struct nameidata *nd)
}
/*
- * Allocate a dentry with name and parent, and perform a parent
- * directory ->lookup on it. Returns the new dentry, or ERR_PTR
- * on error. parent->d_inode->i_mutex must be held. d_lookup must
- * have verified that no child exists while under i_mutex.
+ * This looks up the name in dcache, possibly revalidates the old dentry and
+ * allocates a new one if not found or not valid. In the need_lookup argument
+ * returns whether i_op->lookup is necessary.
+ *
+ * dir->d_inode->i_mutex must be held
*/
-static struct dentry *d_alloc_and_lookup(struct dentry *parent,
- struct qstr *name, struct nameidata *nd)
+static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
+ struct nameidata *nd, bool *need_lookup)
{
- struct inode *inode = parent->d_inode;
struct dentry *dentry;
- struct dentry *old;
+ int error;
- /* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(inode)))
- return ERR_PTR(-ENOENT);
+ *need_lookup = false;
+ dentry = d_lookup(dir, name);
+ if (dentry) {
+ if (d_need_lookup(dentry)) {
+ *need_lookup = true;
+ } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+ error = d_revalidate(dentry, nd);
+ if (unlikely(error <= 0)) {
+ if (error < 0) {
+ dput(dentry);
+ return ERR_PTR(error);
+ } else if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ }
+ }
- dentry = d_alloc(parent, name);
- if (unlikely(!dentry))
- return ERR_PTR(-ENOMEM);
+ if (!dentry) {
+ dentry = d_alloc(dir, name);
+ if (unlikely(!dentry))
+ return ERR_PTR(-ENOMEM);
- old = inode->i_op->lookup(inode, dentry, nd);
- if (unlikely(old)) {
- dput(dentry);
- dentry = old;
+ *need_lookup = true;
}
return dentry;
}
/*
- * We already have a dentry, but require a lookup to be performed on the parent
- * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
- * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
- * child exists while under i_mutex.
+ * Call i_op->lookup on the dentry. The dentry must be negative but may be
+ * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ *
+ * dir->d_inode->i_mutex must be held
*/
-static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
- struct nameidata *nd)
+static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
{
- struct inode *inode = parent->d_inode;
struct dentry *old;
/* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(inode)))
+ if (unlikely(IS_DEADDIR(dir))) {
+ dput(dentry);
return ERR_PTR(-ENOENT);
+ }
- old = inode->i_op->lookup(inode, dentry, nd);
+ old = dir->i_op->lookup(dir, dentry, nd);
if (unlikely(old)) {
dput(dentry);
dentry = old;
@@ -1105,13 +1107,26 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
return dentry;
}
+static struct dentry *__lookup_hash(struct qstr *name,
+ struct dentry *base, struct nameidata *nd)
+{
+ bool need_lookup;
+ struct dentry *dentry;
+
+ dentry = lookup_dcache(name, base, nd, &need_lookup);
+ if (!need_lookup)
+ return dentry;
+
+ return lookup_real(base->d_inode, dentry, nd);
+}
+
/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
*/
-static int do_lookup(struct nameidata *nd, struct qstr *name,
- struct path *path, struct inode **inode)
+static int lookup_fast(struct nameidata *nd, struct qstr *name,
+ struct path *path, struct inode **inode)
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
@@ -1126,16 +1141,31 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
- *inode = nd->inode;
- dentry = __d_lookup_rcu(parent, name, &seq, inode);
+ dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
if (!dentry)
goto unlazy;
- /* Memory barrier in read_seqcount_begin of child is enough */
+ /*
+ * This sequence count validates that the inode matches
+ * the dentry name information from lookup.
+ */
+ *inode = dentry->d_inode;
+ if (read_seqcount_retry(&dentry->d_seq, seq))
+ return -ECHILD;
+
+ /*
+ * This sequence count validates that the parent had no
+ * changes while we did the lookup of the dentry above.
+ *
+ * The memory barrier in read_seqcount_begin of child is
+ * enough, we can use __read_seqcount_retry here.
+ */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
nd->seq = seq;
+ if (unlikely(d_need_lookup(dentry)))
+ goto unlazy;
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
@@ -1144,8 +1174,6 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
goto unlazy;
}
}
- if (unlikely(d_need_lookup(dentry)))
- goto unlazy;
path->mnt = mnt;
path->dentry = dentry;
if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1160,38 +1188,14 @@ unlazy:
dentry = __d_lookup(parent, name);
}
- if (dentry && unlikely(d_need_lookup(dentry))) {
+ if (unlikely(!dentry))
+ goto need_lookup;
+
+ if (unlikely(d_need_lookup(dentry))) {
dput(dentry);
- dentry = NULL;
- }
-retry:
- if (unlikely(!dentry)) {
- struct inode *dir = parent->d_inode;
- BUG_ON(nd->inode != dir);
-
- mutex_lock(&dir->i_mutex);
- dentry = d_lookup(parent, name);
- if (likely(!dentry)) {
- dentry = d_alloc_and_lookup(parent, name, nd);
- if (IS_ERR(dentry)) {
- mutex_unlock(&dir->i_mutex);
- return PTR_ERR(dentry);
- }
- /* known good */
- need_reval = 0;
- status = 1;
- } else if (unlikely(d_need_lookup(dentry))) {
- dentry = d_inode_lookup(parent, dentry, nd);
- if (IS_ERR(dentry)) {
- mutex_unlock(&dir->i_mutex);
- return PTR_ERR(dentry);
- }
- /* known good */
- need_reval = 0;
- status = 1;
- }
- mutex_unlock(&dir->i_mutex);
+ goto need_lookup;
}
+
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
@@ -1201,9 +1205,7 @@ retry:
}
if (!d_invalidate(dentry)) {
dput(dentry);
- dentry = NULL;
- need_reval = 1;
- goto retry;
+ goto need_lookup;
}
}
@@ -1218,6 +1220,36 @@ retry:
nd->flags |= LOOKUP_JUMPED;
*inode = path->dentry->d_inode;
return 0;
+
+need_lookup:
+ return 1;
+}
+
+/* Fast lookup failed, do it the slow way */
+static int lookup_slow(struct nameidata *nd, struct qstr *name,
+ struct path *path)
+{
+ struct dentry *dentry, *parent;
+ int err;
+
+ parent = nd->path.dentry;
+ BUG_ON(nd->inode != parent->d_inode);
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ dentry = __lookup_hash(name, parent, nd);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ path->mnt = nd->path.mnt;
+ path->dentry = dentry;
+ err = follow_managed(path, nd->flags);
+ if (unlikely(err < 0)) {
+ path_put_conditional(path, nd);
+ return err;
+ }
+ if (err)
+ nd->flags |= LOOKUP_JUMPED;
+ return 0;
}
static inline int may_lookup(struct nameidata *nd)
@@ -1253,7 +1285,7 @@ static void terminate_walk(struct nameidata *nd)
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;
rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
}
}
@@ -1289,21 +1321,26 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
*/
if (unlikely(type != LAST_NORM))
return handle_dots(nd, type);
- err = do_lookup(nd, name, path, &inode);
+ err = lookup_fast(nd, name, path, &inode);
if (unlikely(err)) {
- terminate_walk(nd);
- return err;
- }
- if (!inode) {
- path_to_nameidata(path, nd);
- terminate_walk(nd);
- return -ENOENT;
+ if (err < 0)
+ goto out_err;
+
+ err = lookup_slow(nd, name, path);
+ if (err < 0)
+ goto out_err;
+
+ inode = path->dentry->d_inode;
}
+ err = -ENOENT;
+ if (!inode)
+ goto out_path_put;
+
if (should_follow_link(inode, follow)) {
if (nd->flags & LOOKUP_RCU) {
if (unlikely(unlazy_walk(nd, path->dentry))) {
- terminate_walk(nd);
- return -ECHILD;
+ err = -ECHILD;
+ goto out_err;
}
}
BUG_ON(inode != path->dentry->d_inode);
@@ -1312,6 +1349,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
path_to_nameidata(path, nd);
nd->inode = inode;
return 0;
+
+out_path_put:
+ path_to_nameidata(path, nd);
+out_err:
+ terminate_walk(nd);
+ return err;
}
/*
@@ -1372,6 +1415,130 @@ static inline int can_lookup(struct inode *inode)
}
/*
+ * We can do the critical dentry name comparison and hashing
+ * operations one word at a time, but we are limited to:
+ *
+ * - Architectures with fast unaligned word accesses. We could
+ * do a "get_unaligned()" if this helps and is sufficiently
+ * fast.
+ *
+ * - Little-endian machines (so that we can generate the mask
+ * of low bytes efficiently). Again, we *could* do a byte
+ * swapping load on big-endian architectures if that is not
+ * expensive enough to make the optimization worthless.
+ *
+ * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
+ * do not trap on the (extremely unlikely) case of a page
+ * crossing operation.
+ *
+ * - Furthermore, we need an efficient 64-bit compile for the
+ * 64-bit case in order to generate the "number of bytes in
+ * the final mask". Again, that could be replaced with a
+ * efficient population count instruction or similar.
+ */
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#include <asm/word-at-a-time.h>
+
+#ifdef CONFIG_64BIT
+
+static inline unsigned int fold_hash(unsigned long hash)
+{
+ hash += hash >> (8*sizeof(int));
+ return hash;
+}
+
+#else /* 32-bit case */
+
+#define fold_hash(x) (x)
+
+#endif
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+ unsigned long a, mask;
+ unsigned long hash = 0;
+
+ for (;;) {
+ a = load_unaligned_zeropad(name);
+ if (len < sizeof(unsigned long))
+ break;
+ hash += a;
+ hash *= 9;
+ name += sizeof(unsigned long);
+ len -= sizeof(unsigned long);
+ if (!len)
+ goto done;
+ }
+ mask = ~(~0ul << len*8);
+ hash += mask & a;
+done:
+ return fold_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+/*
+ * Calculate the length and hash of the path component, and
+ * return the length of the component;
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+ unsigned long a, b, adata, bdata, mask, hash, len;
+ const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+
+ hash = a = 0;
+ len = -sizeof(unsigned long);
+ do {
+ hash = (hash + a) * 9;
+ len += sizeof(unsigned long);
+ a = load_unaligned_zeropad(name+len);
+ b = a ^ REPEAT_BYTE('/');
+ } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+
+ adata = prep_zero_mask(a, adata, &constants);
+ bdata = prep_zero_mask(b, bdata, &constants);
+
+ mask = create_zero_mask(adata | bdata);
+
+ hash += a & zero_bytemask(mask);
+ *hashp = fold_hash(hash);
+
+ return len + find_zero(mask);
+}
+
+#else
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+ unsigned long hash = init_name_hash();
+ while (len--)
+ hash = partial_name_hash(*name++, hash);
+ return end_name_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+/*
+ * We know there's a real path component here of at least
+ * one character.
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+ unsigned long hash = init_name_hash();
+ unsigned long len = 0, c;
+
+ c = (unsigned char)*name;
+ do {
+ len++;
+ hash = partial_name_hash(c, hash);
+ c = (unsigned char)name[len];
+ } while (c && c != '/');
+ *hashp = end_name_hash(hash);
+ return len;
+}
+
+#endif
+
+/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
@@ -1391,31 +1558,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */
for(;;) {
- unsigned long hash;
struct qstr this;
- unsigned int c;
+ long len;
int type;
err = may_lookup(nd);
if (err)
break;
+ len = hash_name(name, &this.hash);
this.name = name;
- c = *(const unsigned char *)name;
-
- hash = init_name_hash();
- do {
- name++;
- hash = partial_name_hash(c, hash);
- c = *(const unsigned char *)name;
- } while (c && (c != '/'));
- this.len = name - (const char *) this.name;
- this.hash = end_name_hash(hash);
+ this.len = len;
type = LAST_NORM;
- if (this.name[0] == '.') switch (this.len) {
+ if (name[0] == '.') switch (len) {
case 2:
- if (this.name[1] == '.') {
+ if (name[1] == '.') {
type = LAST_DOTDOT;
nd->flags |= LOOKUP_JUMPED;
}
@@ -1434,12 +1592,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
}
}
- /* remove trailing slashes? */
- if (!c)
+ if (!name[len])
goto last_component;
- while (*++name == '/');
- if (!*name)
+ /*
+ * If it wasn't NUL, we know it was '/'. Skip that
+ * slash, and continue until no more slashes.
+ */
+ do {
+ len++;
+ } while (unlikely(name[len] == '/'));
+ if (!name[len])
goto last_component;
+ name += len;
err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
if (err < 0)
@@ -1487,7 +1651,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- br_read_lock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
} else {
@@ -1500,7 +1664,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (*name=='/') {
if (flags & LOOKUP_RCU) {
- br_read_lock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
rcu_read_lock();
set_root_rcu(nd);
} else {
@@ -1513,7 +1677,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
struct fs_struct *fs = current->fs;
unsigned seq;
- br_read_lock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
rcu_read_lock();
do {
@@ -1549,7 +1713,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (fput_needed)
*fp = file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- br_read_lock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
rcu_read_lock();
} else {
path_get(&file->f_path);
@@ -1695,59 +1859,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
return err;
}
-static struct dentry *__lookup_hash(struct qstr *name,
- struct dentry *base, struct nameidata *nd)
-{
- struct inode *inode = base->d_inode;
- struct dentry *dentry;
- int err;
-
- err = inode_permission(inode, MAY_EXEC);
- if (err)
- return ERR_PTR(err);
-
- /*
- * Don't bother with __d_lookup: callers are for creat as
- * well as unlink, so a lot of the time it would cost
- * a double lookup.
- */
- dentry = d_lookup(base, name);
-
- if (dentry && d_need_lookup(dentry)) {
- /*
- * __lookup_hash is called with the parent dir's i_mutex already
- * held, so we are good to go here.
- */
- dentry = d_inode_lookup(base, dentry, nd);
- if (IS_ERR(dentry))
- return dentry;
- }
-
- if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- int status = d_revalidate(dentry, nd);
- if (unlikely(status <= 0)) {
- /*
- * The dentry failed validation.
- * If d_revalidate returned 0 attempt to invalidate
- * the dentry otherwise d_revalidate is asking us
- * to return a fail status.
- */
- if (status < 0) {
- dput(dentry);
- return ERR_PTR(status);
- } else if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- }
- }
-
- if (!dentry)
- dentry = d_alloc_and_lookup(base, name, nd);
-
- return dentry;
-}
-
/*
* Restricted form of lookup. Doesn't follow links, single-component only,
* needs parent already locked. Doesn't follow mounts.
@@ -1772,24 +1883,22 @@ static struct dentry *lookup_hash(struct nameidata *nd)
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
struct qstr this;
- unsigned long hash;
unsigned int c;
+ int err;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
this.name = name;
this.len = len;
+ this.hash = full_name_hash(name, len);
if (!len)
return ERR_PTR(-EACCES);
- hash = init_name_hash();
while (len--) {
c = *(const unsigned char *)name++;
if (c == '/' || c == '\0')
return ERR_PTR(-EACCES);
- hash = partial_name_hash(c, hash);
}
- this.hash = end_name_hash(hash);
/*
* See if the low-level filesystem might want
* to use its own hash..
@@ -1800,6 +1909,10 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
return ERR_PTR(err);
}
+ err = inode_permission(base->d_inode, MAY_EXEC);
+ if (err)
+ return ERR_PTR(err);
+
return __lookup_hash(&this, base, NULL);
}
@@ -1824,7 +1937,7 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
- return user_path_at_empty(dfd, name, flags, path, 0);
+ return user_path_at_empty(dfd, name, flags, path, NULL);
}
static int user_path_parent(int dfd, const char __user *path,
@@ -1851,19 +1964,15 @@ static int user_path_parent(int dfd, const char __user *path,
*/
static inline int check_sticky(struct inode *dir, struct inode *inode)
{
- uid_t fsuid = current_fsuid();
+ kuid_t fsuid = current_fsuid();
if (!(dir->i_mode & S_ISVTX))
return 0;
- if (current_user_ns() != inode_userns(inode))
- goto other_userns;
- if (inode->i_uid == fsuid)
+ if (uid_eq(inode->i_uid, fsuid))
return 0;
- if (dir->i_uid == fsuid)
+ if (uid_eq(dir->i_uid, fsuid))
return 0;
-
-other_userns:
- return !ns_capable(inode_userns(inode), CAP_FOWNER);
+ return !inode_capable(inode, CAP_FOWNER);
}
/*
@@ -1976,7 +2085,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
}
-int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
int error = may_create(dir, dentry);
@@ -2091,6 +2200,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
int want_write = 0;
int acc_mode = op->acc_mode;
struct file *filp;
+ struct inode *inode;
+ int symlink_ok = 0;
+ struct path save_parent = { .dentry = NULL, .mnt = NULL };
+ bool retried = false;
int error;
nd->flags &= ~LOOKUP_PARENT;
@@ -2122,30 +2235,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
if (!(open_flag & O_CREAT)) {
- int symlink_ok = 0;
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
symlink_ok = 1;
/* we _can_ be in RCU mode here */
- error = walk_component(nd, path, &nd->last, LAST_NORM,
- !symlink_ok);
- if (error < 0)
- return ERR_PTR(error);
- if (error) /* symlink */
- return NULL;
- /* sayonara */
- error = complete_walk(nd);
- if (error)
- return ERR_PTR(-ECHILD);
+ error = lookup_fast(nd, &nd->last, path, &inode);
+ if (unlikely(error)) {
+ if (error < 0)
+ goto exit;
- error = -ENOTDIR;
- if (nd->flags & LOOKUP_DIRECTORY) {
- if (!nd->inode->i_op->lookup)
+ error = lookup_slow(nd, &nd->last, path);
+ if (error < 0)
goto exit;
+
+ inode = path->dentry->d_inode;
}
- audit_inode(pathname, nd->path.dentry);
- goto ok;
+ goto finish_lookup;
}
/* create side of things */
@@ -2163,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (nd->last.name[nd->last.len])
goto exit;
+retry_lookup:
mutex_lock(&dir->d_inode->i_mutex);
dentry = lookup_hash(nd);
@@ -2177,7 +2284,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
- int mode = op->mode;
+ umode_t mode = op->mode;
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask();
/*
@@ -2224,22 +2331,49 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (error)
nd->flags |= LOOKUP_JUMPED;
+ BUG_ON(nd->flags & LOOKUP_RCU);
+ inode = path->dentry->d_inode;
+finish_lookup:
+ /* we _can_ be in RCU mode here */
error = -ENOENT;
- if (!path->dentry->d_inode)
- goto exit_dput;
+ if (!inode) {
+ path_to_nameidata(path, nd);
+ goto exit;
+ }
- if (path->dentry->d_inode->i_op->follow_link)
+ if (should_follow_link(inode, !symlink_ok)) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(unlazy_walk(nd, path->dentry))) {
+ error = -ECHILD;
+ goto exit;
+ }
+ }
+ BUG_ON(inode != path->dentry->d_inode);
return NULL;
+ }
- path_to_nameidata(path, nd);
- nd->inode = path->dentry->d_inode;
+ if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
+ path_to_nameidata(path, nd);
+ } else {
+ save_parent.dentry = nd->path.dentry;
+ save_parent.mnt = mntget(path->mnt);
+ nd->path.dentry = path->dentry;
+
+ }
+ nd->inode = inode;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd);
- if (error)
- goto exit;
+ if (error) {
+ path_put(&save_parent);
+ return ERR_PTR(error);
+ }
error = -EISDIR;
- if (S_ISDIR(nd->inode->i_mode))
+ if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+ goto exit;
+ error = -ENOTDIR;
+ if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
goto exit;
+ audit_inode(pathname, nd->path.dentry);
ok:
if (!S_ISREG(nd->inode->i_mode))
will_truncate = 0;
@@ -2255,6 +2389,20 @@ common:
if (error)
goto exit;
filp = nameidata_to_filp(nd);
+ if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
+ BUG_ON(save_parent.dentry != dir);
+ path_put(&nd->path);
+ nd->path = save_parent;
+ nd->inode = dir->d_inode;
+ save_parent.mnt = NULL;
+ save_parent.dentry = NULL;
+ if (want_write) {
+ mnt_drop_write(nd->path.mnt);
+ want_write = 0;
+ }
+ retried = true;
+ goto retry_lookup;
+ }
if (!IS_ERR(filp)) {
error = ima_file_check(filp, op->acc_mode);
if (error) {
@@ -2274,7 +2422,8 @@ common:
out:
if (want_write)
mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
+ path_put(&save_parent);
+ terminate_walk(nd);
return filp;
exit_mutex_unlock:
@@ -2337,6 +2486,12 @@ out:
if (base)
fput(base);
release_open_intent(nd);
+ if (filp == ERR_PTR(-EOPENSTALE)) {
+ if (flags & LOOKUP_RCU)
+ filp = ERR_PTR(-ECHILD);
+ else
+ filp = ERR_PTR(-ESTALE);
+ }
return filp;
out_filp:
@@ -2444,15 +2599,14 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct pat
}
EXPORT_SYMBOL(user_path_create);
-int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
int error = may_create(dir, dentry);
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
- !ns_capable(inode_userns(dir), CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
@@ -2472,7 +2626,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
return error;
}
-static int may_mknod(mode_t mode)
+static int may_mknod(umode_t mode)
{
switch (mode & S_IFMT) {
case S_IFREG:
@@ -2489,7 +2643,7 @@ static int may_mknod(mode_t mode)
}
}
-SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned, dev)
{
struct dentry *dentry;
@@ -2536,14 +2690,15 @@ out_dput:
return error;
}
-SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev)
+SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
return sys_mknodat(AT_FDCWD, filename, mode, dev);
}
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int error = may_create(dir, dentry);
+ unsigned max_links = dir->i_sb->s_max_links;
if (error)
return error;
@@ -2556,13 +2711,16 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (error)
return error;
+ if (max_links && dir->i_nlink >= max_links)
+ return -EMLINK;
+
error = dir->i_op->mkdir(dir, dentry, mode);
if (!error)
fsnotify_mkdir(dir, dentry);
return error;
}
-SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
struct dentry *dentry;
struct path path;
@@ -2590,14 +2748,14 @@ out_dput:
return error;
}
-SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
return sys_mkdirat(AT_FDCWD, pathname, mode);
}
/*
* The dentry_unhash() helper will try to drop the dentry early: we
- * should have a usage count of 2 if we're the only user of this
+ * should have a usage count of 1 if we're the only user of this
* dentry, and if that is true (possibly after pruning the dcache),
* then we drop the dentry now.
*
@@ -2886,6 +3044,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
{
struct inode *inode = old_dentry->d_inode;
+ unsigned max_links = dir->i_sb->s_max_links;
int error;
if (!inode)
@@ -2916,6 +3075,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
/* Make sure we don't allow creating hardlink to an unlinked file */
if (inode->i_nlink == 0)
error = -ENOENT;
+ else if (max_links && inode->i_nlink >= max_links)
+ error = -EMLINK;
else
error = dir->i_op->link(old_dentry, dir, new_dentry);
mutex_unlock(&inode->i_mutex);
@@ -3025,6 +3186,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
{
int error = 0;
struct inode *target = new_dentry->d_inode;
+ unsigned max_links = new_dir->i_sb->s_max_links;
/*
* If we are going to change the parent - check write permissions,
@@ -3048,6 +3210,11 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
goto out;
+ error = -EMLINK;
+ if (max_links && !target && new_dir != old_dir &&
+ new_dir->i_nlink >= max_links)
+ goto out;
+
if (target)
shrink_dcache_parent(new_dentry);
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -3346,9 +3513,9 @@ retry:
if (err)
goto fail;
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memcpy(kaddr, symname, len-1);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
page, fsdata);