From a118084432d642eeccb961c7c8cc61525a941fcb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 20 May 2016 22:13:45 +0200 Subject: vfs: add d_real_inode() helper Needed by the following fix. Signed-off-by: Miklos Szeredi Cc: --- include/linux/dcache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux/dcache.h') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e9422cb5989..ad5d582f9b14 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -576,5 +576,17 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry, return inode; } +/** + * d_real_inode - Return the real inode + * @dentry: The dentry to query + * + * If dentry is on an union/overlay, then return the underlying, real inode. + * Otherwise return d_inode(). + */ +static inline struct inode *d_real_inode(struct dentry *dentry) +{ + return d_backing_inode(d_real(dentry)); +} + #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3 From f4bcbe792b8f434e32487cff9d9e30ab45a3ce02 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Fri, 20 May 2016 07:26:00 -0400 Subject: Pull out string hash to ... so they can be used without the rest of The hashlen_* macros will make sense next patch. Signed-off-by: George Spelvin --- include/linux/dcache.h | 27 +---------------- include/linux/stringhash.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 26 deletions(-) create mode 100644 include/linux/stringhash.h (limited to 'include/linux/dcache.h') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e9422cb5989..0f9a977c334f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -10,6 +10,7 @@ #include #include #include +#include struct path; struct vfsmount; @@ -52,9 +53,6 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define hashlen_hash(hashlen) ((u32) (hashlen)) -#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) -#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash)) struct dentry_stat_t { long nr_dentry; @@ -65,29 +63,6 @@ struct dentry_stat_t { }; extern struct dentry_stat_t dentry_stat; -/* Name hashing routines. Initial hash value */ -/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ -#define init_name_hash() 0 - -/* partial hash update function. Assume roughly 4 bits per character */ -static inline unsigned long -partial_name_hash(unsigned long c, unsigned long prevhash) -{ - return (prevhash + (c << 4) + (c >> 4)) * 11; -} - -/* - * Finally: cut down the number of bits to a int value (and try to avoid - * losing bits) - */ -static inline unsigned long end_name_hash(unsigned long hash) -{ - return (unsigned int) hash; -} - -/* Compute the hash for a name string. */ -extern unsigned int full_name_hash(const unsigned char *, unsigned int); - /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h new file mode 100644 index 000000000000..2eaaaf6d2776 --- /dev/null +++ b/include/linux/stringhash.h @@ -0,0 +1,72 @@ +#ifndef __LINUX_STRINGHASH_H +#define __LINUX_STRINGHASH_H + +#include + +/* + * Routines for hashing strings of bytes to a 32-bit hash value. + * + * These hash functions are NOT GUARANTEED STABLE between kernel + * versions, architectures, or even repeated boots of the same kernel. + * (E.g. they may depend on boot-time hardware detection or be + * deliberately randomized.) + * + * They are also not intended to be secure against collisions caused by + * malicious inputs; much slower hash functions are required for that. + * + * They are optimized for pathname components, meaning short strings. + * Even if a majority of files have longer names, the dynamic profile of + * pathname components skews short due to short directory names. + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) + */ + +/* + * Version 1: one byte at a time. Example of use: + * + * unsigned long hash = init_name_hash; + * while (*p) + * hash = partial_name_hash(tolower(*p++), hash); + * hash = end_name_hash(hash); + * + * Although this is designed for bytes, fs/hfsplus/unicode.c + * abuses it to hash 16-bit values. + */ + +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define init_name_hash() 0 + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits) + */ +static inline unsigned long end_name_hash(unsigned long hash) +{ + return (unsigned int)hash; +} + +/* + * Version 2: One word (32 or 64 bits) at a time. + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning + * exists, which describes major Linux platforms like x86 and ARM), then + * this computes a different hash function much faster. + * + * If not set, this falls back to a wrapper around the preceding. + */ +extern unsigned int full_name_hash(const unsigned char *, unsigned int); + +/* + * A hash_len is a u64 with the hash of a string in the low + * half and the length in the high half. + */ +#define hashlen_hash(hashlen) ((u32)(hashlen)) +#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) + +#endif /* __LINUX_STRINGHASH_H */ -- cgit v1.2.3 From ba65dc5ef16f82fba77869cecf7a7d515f61446b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2016 11:32:47 -0400 Subject: much milder d_walk() race d_walk() relies upon the tree not getting rearranged under it without rename_lock being touched. And we do grab rename_lock around the places that change the tree topology. Unfortunately, branch reordering is just as bad from d_walk() POV and we have two places that do it without touching rename_lock - one in handling of cursors (for ramfs-style directories) and another in autofs. autofs one is a separate story; this commit deals with the cursors. * mark cursor dentries explicitly at allocation time * make __dentry_kill() leave ->d_child.next pointing to the next non-cursor sibling, making sure that it won't be moved around unnoticed before the parent is relocked on ascend-to-parent path in d_walk(). * make d_walk() skip cursors explicitly; strictly speaking it's not necessary (all callbacks we pass to d_walk() are no-ops on cursors), but it makes analysis easier. Signed-off-by: Al Viro --- fs/dcache.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------ fs/internal.h | 1 + fs/libfs.c | 4 +--- include/linux/dcache.h | 1 + 4 files changed, 55 insertions(+), 9 deletions(-) (limited to 'include/linux/dcache.h') diff --git a/fs/dcache.c b/fs/dcache.c index 817c243c1ff1..b7eddfd35aa5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1203,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock diff --git a/fs/internal.h b/fs/internal.h index b71deeecea17..f57ced528cde 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool); extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); +extern struct dentry *d_alloc_cursor(struct dentry *); /* * read_write.c diff --git a/fs/libfs.c b/fs/libfs.c index 3db2721144c2..cedeacbae303 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = QSTR_INIT(".", 1); - - file->private_data = d_alloc(file->f_path.dentry, &cursor_name); + file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 484c8792da82..bcd0c64e3ed8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -212,6 +212,7 @@ struct dentry_operations { #define DCACHE_OP_REAL 0x08000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR 0x20000000 extern seqlock_t rename_lock; -- cgit v1.2.3