Merge branch 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin

* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin: (57 commits)
  fs: scale mntget/mntput
  fs: rename vfsmount counter helpers
  fs: implement faster dentry memcmp
  fs: prefetch inode data in dcache lookup
  fs: improve scalability of pseudo filesystems
  fs: dcache per-inode inode alias locking
  fs: dcache per-bucket dcache hash locking
  bit_spinlock: add required includes
  kernel: add bl_list
  xfs: provide simple rcu-walk ACL implementation
  btrfs: provide simple rcu-walk ACL implementation
  ext2,3,4: provide simple rcu-walk ACL implementation
  fs: provide simple rcu-walk generic_check_acl implementation
  fs: provide rcu-walk aware permission i_ops
  fs: rcu-walk aware d_revalidate method
  fs: cache optimise dentry and inode for rcu-walk
  fs: dcache reduce branches in lookup path
  fs: dcache remove d_mounted
  fs: fs_struct use seqlock
  fs: rcu-walk for path lookup
  ...
This commit is contained in:
Linus Torvalds 2011-01-07 08:56:33 -08:00
commit b4a45f5fe8
212 changed files with 4778 additions and 2018 deletions

View file

@ -1,6 +1,10 @@
#ifndef __LINUX_BIT_SPINLOCK_H
#define __LINUX_BIT_SPINLOCK_H
#include <linux/kernel.h>
#include <linux/preempt.h>
#include <asm/atomic.h>
/*
* bit-based spin_lock()
*

View file

@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations;
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
int coda_permission(struct inode *inode, int mask);
int coda_permission(struct inode *inode, int mask, unsigned int flags);
int coda_revalidate_inode(struct dentry *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);

View file

@ -4,7 +4,9 @@
#include <asm/atomic.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/rculist_bl.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
#include <linux/cache.h>
#include <linux/rcupdate.h>
@ -45,6 +47,27 @@ struct dentry_stat_t {
};
extern struct dentry_stat_t dentry_stat;
/*
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
*/
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
{
int ret;
if (scount != tcount)
return 1;
do {
ret = (*cs != *ct);
if (ret)
break;
cs++;
ct++;
tcount--;
} while (tcount);
return ret;
}
/* Name hashing routines. Initial hash value */
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
#define init_name_hash() 0
@ -81,25 +104,33 @@ full_name_hash(const unsigned char *name, unsigned int len)
* large memory footprint increase).
*/
#ifdef CONFIG_64BIT
#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
# define DNAME_INLINE_LEN 32 /* 192 bytes */
#else
#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
# ifdef CONFIG_SMP
# define DNAME_INLINE_LEN 36 /* 128 bytes */
# else
# define DNAME_INLINE_LEN 40 /* 128 bytes */
# endif
#endif
struct dentry {
atomic_t d_count;
/* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
int d_mounted;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
/*
* The next three fields are touched by __d_lookup. Place them here
* so they all fit in a cache line.
*/
struct hlist_node d_hash; /* lookup hash list */
seqcount_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
/* Ref lookup also touches following */
unsigned int d_count; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; /* fs-specific data */
struct list_head d_lru; /* LRU list */
/*
@ -111,12 +142,6 @@ struct dentry {
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
unsigned long d_time; /* used by d_revalidate */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
void *d_fsdata; /* fs-specific data */
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};
/*
@ -133,97 +158,62 @@ enum dentry_d_lock_class
struct dentry_operations {
int (*d_revalidate)(struct dentry *, struct nameidata *);
int (*d_hash) (struct dentry *, struct qstr *);
int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
int (*d_delete)(struct dentry *);
int (*d_hash)(const struct dentry *, const struct inode *,
struct qstr *);
int (*d_compare)(const struct dentry *, const struct inode *,
const struct dentry *, const struct inode *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
};
/* the dentry parameter passed to d_hash and d_compare is the parent
* directory of the entries to be compared. It is used in case these
* functions need any directory specific information for determining
* equivalency classes. Using the dentry itself might not work, as it
* might be a negative dentry which has no information associated with
* it */
} ____cacheline_aligned;
/*
locking rules:
big lock dcache_lock d_lock may block
d_revalidate: no no no yes
d_hash no no no yes
d_compare: no yes yes no
d_delete: no yes no no
d_release: no no no yes
d_iput: no no no yes
* Locking rules for dentry_operations callbacks are to be found in
* Documentation/filesystems/Locking. Keep it updated!
*
* FUrther descriptions are found in Documentation/filesystems/vfs.txt.
* Keep it updated too!
*/
/* d_flags entries */
#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */
#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly
* renamed" and has to be
* deleted on the last dput()
*/
#define DCACHE_DISCONNECTED 0x0004
/* This dentry is possibly not currently connected to the dcache tree,
* in which case its parent will either be itself, or will have this
* flag as well. nfsd will not use a dentry with this bit set, but will
* first endeavour to clear the bit either by discovering that it is
* connected, or by performing lookup operations. Any filesystem which
* supports nfsd_operations MUST have a lookup function which, if it finds
* a directory inode with a DCACHE_DISCONNECTED dentry, will d_move
* that dentry into place and return that dentry rather than the passed one,
* typically using d_splice_alias.
*/
#define DCACHE_NFSFS_RENAMED 0x0002
/* this dentry has been "silly renamed" and has to be deleted on the last
* dput() */
#define DCACHE_DISCONNECTED 0x0004
/* This dentry is possibly not currently connected to the dcache tree, in
* which case its parent will either be itself, or will have this flag as
* well. nfsd will not use a dentry with this bit set, but will first
* endeavour to clear the bit either by discovering that it is connected,
* or by performing lookup operations. Any filesystem which supports
* nfsd_operations MUST have a lookup function which, if it finds a
* directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
* dentry into place and return that dentry rather than the passed one,
* typically using d_splice_alias. */
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */
#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
/* Parent inode is watched by inotify */
#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */
#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
/* Parent inode is watched by some fsnotify listener */
#define DCACHE_CANT_MOUNT 0x0100
#define DCACHE_GENOCIDE 0x0200
#define DCACHE_MOUNTED 0x0400 /* is a mountpoint */
#define DCACHE_OP_HASH 0x1000
#define DCACHE_OP_COMPARE 0x2000
#define DCACHE_OP_REVALIDATE 0x4000
#define DCACHE_OP_DELETE 0x8000
extern spinlock_t dcache_lock;
extern seqlock_t rename_lock;
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
*
* d_drop() unhashes the entry from the parent dentry hashes, so that it won't
* be found through a VFS lookup any more. Note that this is different from
* deleting the dentry - d_delete will try to mark the dentry negative if
* possible, giving a successful _negative_ lookup, while d_drop will
* just make the cache lookup fail.
*
* d_drop() is used mainly for stuff that wants to invalidate a dentry for some
* reason (NFS timeouts or autofs deletes).
*
* __d_drop requires dentry->d_lock.
*/
static inline void __d_drop(struct dentry *dentry)
{
if (!(dentry->d_flags & DCACHE_UNHASHED)) {
dentry->d_flags |= DCACHE_UNHASHED;
hlist_del_rcu(&dentry->d_hash);
}
}
static inline void d_drop(struct dentry *dentry)
{
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
}
static inline int dname_external(struct dentry *dentry)
{
return dentry->d_name.name != dentry->d_iname;
@ -235,10 +225,14 @@ static inline int dname_external(struct dentry *dentry)
extern void d_instantiate(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
extern void __d_drop(struct dentry *dentry);
extern void d_drop(struct dentry *dentry);
extern void d_delete(struct dentry *);
extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
/* allocate/de-allocate */
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern struct dentry * d_obtain_alias(struct inode *);
@ -296,14 +290,40 @@ static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *in
return res;
}
extern void dentry_update_name_case(struct dentry *, struct qstr *);
/* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *);
extern struct dentry *d_lookup(struct dentry *, struct qstr *);
extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
extern struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
unsigned *seq, struct inode **inode);
/**
* __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
* @dentry: dentry to take a ref on
* @seq: seqcount to verify against
* @Returns: 0 on failure, else 1.
*
* __d_rcu_to_refcount operates on a dentry,seq pair that was returned
* by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
*/
static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
{
int ret = 0;
assert_spin_locked(&dentry->d_lock);
if (!read_seqcount_retry(&dentry->d_seq, seq)) {
ret = 1;
dentry->d_count++;
}
return ret;
}
/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
@ -316,34 +336,37 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
extern char *__d_path(const struct path *path, struct path *root, char *, int);
extern char *d_path(const struct path *, char *, int);
extern char *d_path_with_unreachable(const struct path *, char *, int);
extern char *__dentry_path(struct dentry *, char *, int);
extern char *dentry_path_raw(struct dentry *, char *, int);
extern char *dentry_path(struct dentry *, char *, int);
/* Allocation counts.. */
/**
* dget, dget_locked - get a reference to a dentry
* dget, dget_dlock - get a reference to a dentry
* @dentry: dentry to get a reference to
*
* Given a dentry or %NULL pointer increment the reference count
* if appropriate and return the dentry. A dentry will not be
* destroyed when it has references. dget() should never be
* called for dentries with zero reference counter. For these cases
* (preferably none, functions in dcache.c are sufficient for normal
* needs and they take necessary precautions) you should hold dcache_lock
* and call dget_locked() instead of dget().
* destroyed when it has references.
*/
static inline struct dentry *dget_dlock(struct dentry *dentry)
{
if (dentry)
dentry->d_count++;
return dentry;
}
static inline struct dentry *dget(struct dentry *dentry)
{
if (dentry) {
BUG_ON(!atomic_read(&dentry->d_count));
atomic_inc(&dentry->d_count);
spin_lock(&dentry->d_lock);
dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
}
return dentry;
}
extern struct dentry * dget_locked(struct dentry *);
extern struct dentry *dget_parent(struct dentry *dentry);
/**
* d_unhashed - is dentry hashed
@ -374,21 +397,11 @@ static inline void dont_mount(struct dentry *dentry)
spin_unlock(&dentry->d_lock);
}
static inline struct dentry *dget_parent(struct dentry *dentry)
{
struct dentry *ret;
spin_lock(&dentry->d_lock);
ret = dget(dentry->d_parent);
spin_unlock(&dentry->d_lock);
return ret;
}
extern void dput(struct dentry *);
static inline int d_mountpoint(struct dentry *dentry)
{
return dentry->d_mounted;
return dentry->d_flags & DCACHE_MOUNTED;
}
extern struct vfsmount *lookup_mnt(struct path *);

View file

@ -392,6 +392,7 @@ struct inodes_stat_t {
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
@ -733,16 +734,31 @@ struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
struct inode {
/* RCU path lookup touches following: */
umode_t i_mode;
uid_t i_uid;
gid_t i_gid;
const struct inode_operations *i_op;
struct super_block *i_sb;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned int i_flags;
struct mutex i_mutex;
unsigned long i_state;
unsigned long dirtied_when; /* jiffies of first dirtying */
struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
struct list_head i_dentry;
union {
struct list_head i_dentry;
struct rcu_head i_rcu;
};
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
dev_t i_rdev;
unsigned int i_blkbits;
u64 i_version;
@ -755,13 +771,8 @@ struct inode {
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
umode_t i_mode;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct mutex i_mutex;
struct rw_semaphore i_alloc_sem;
const struct inode_operations *i_op;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct super_block *i_sb;
struct file_lock *i_flock;
struct address_space *i_mapping;
struct address_space i_data;
@ -782,11 +793,6 @@ struct inode {
struct hlist_head i_fsnotify_marks;
#endif
unsigned long i_state;
unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned int i_flags;
#ifdef CONFIG_IMA
/* protected by i_lock */
unsigned int i_readcount; /* struct files open RO */
@ -1372,13 +1378,13 @@ struct super_block {
const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
struct list_head __percpu *s_files;
#else
struct list_head s_files;
#endif
/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
/* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
@ -1545,9 +1551,18 @@ struct file_operations {
int (*setlease)(struct file *, long, struct file_lock **);
};
#define IPERM_FLAG_RCU 0x0001
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
void * (*follow_link) (struct dentry *, struct nameidata *);
int (*permission) (struct inode *, int, unsigned int);
int (*check_acl)(struct inode *, int, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
void (*put_link) (struct dentry *, struct nameidata *, void *);
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
@ -1556,12 +1571,7 @@ struct inode_operations {
int (*mknod) (struct inode *,struct dentry *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
int (*readlink) (struct dentry *, char __user *,int);
void * (*follow_link) (struct dentry *, struct nameidata *);
void (*put_link) (struct dentry *, struct nameidata *, void *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*check_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@ -1573,7 +1583,7 @@ struct inode_operations {
loff_t len);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
};
} ____cacheline_aligned;
struct seq_file;
@ -2158,8 +2168,8 @@ extern sector_t bmap(struct inode *, sector_t);
#endif
extern int notify_change(struct dentry *, struct iattr *);
extern int inode_permission(struct inode *, int);
extern int generic_permission(struct inode *, int,
int (*check_acl)(struct inode *, int));
extern int generic_permission(struct inode *, int, unsigned int,
int (*check_acl)(struct inode *, int, unsigned int));
static inline bool execute_ok(struct inode *inode)
{
@ -2230,6 +2240,7 @@ extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
@ -2446,6 +2457,10 @@ static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
/*
* Don't strictly need d_lock here? If the parent ino could change
* then surely we'd have a deeper race in the caller?
*/
spin_lock(&dentry->d_lock);
res = dentry->d_parent->d_inode->i_ino;
spin_unlock(&dentry->d_lock);

View file

@ -2,10 +2,13 @@
#define _LINUX_FS_STRUCT_H
#include <linux/path.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
struct fs_struct {
int users;
spinlock_t lock;
seqcount_t seq;
int umask;
int in_exec;
struct path root, pwd;

View file

@ -17,7 +17,6 @@
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
* Called with dcache_lock held.
*/
static inline void fsnotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
@ -62,7 +61,6 @@ static inline int fsnotify_perm(struct file *file, int mask)
/*
* fsnotify_d_move - dentry has been moved
* Called with dcache_lock and dentry->d_lock held.
*/
static inline void fsnotify_d_move(struct dentry *dentry)
{

View file

@ -329,9 +329,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
{
struct dentry *parent;
assert_spin_locked(&dcache_lock);
assert_spin_locked(&dentry->d_lock);
/*
* Serialisation of setting PARENT_WATCHED on the dentries is provided
* by d_lock. If inotify_inode_watched changes after we have taken
* d_lock, the following __fsnotify_update_child_dentry_flags call will
* find our entry, so it will spin until we complete here, and update
* us with the new state.
*/
parent = dentry->d_parent;
if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
@ -341,15 +347,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
* Called with dcache_lock held.
*/
static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (!inode)
return;
assert_spin_locked(&dcache_lock);
spin_lock(&dentry->d_lock);
__fsnotify_update_dcache_flags(dentry);
spin_unlock(&dentry->d_lock);

View file

@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler;
int generic_acl_init(struct inode *, struct inode *);
int generic_acl_chmod(struct inode *);
int generic_check_acl(struct inode *inode, int mask);
int generic_check_acl(struct inode *inode, int mask, unsigned int flags);
#endif /* LINUX_GENERIC_ACL_H */

144
include/linux/list_bl.h Normal file
View file

@ -0,0 +1,144 @@
#ifndef _LINUX_LIST_BL_H
#define _LINUX_LIST_BL_H
#include <linux/list.h>
/*
* Special version of lists, where head of the list has a lock in the lowest
* bit. This is useful for scalable hash tables without increasing memory
* footprint overhead.
*
* For modification operations, the 0 bit of hlist_bl_head->first
* pointer must be set.
*
* With some small modifications, this can easily be adapted to store several
* arbitrary bits (not just a single lock bit), if the need arises to store
* some fast and compact auxiliary data.
*/
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
#define LIST_BL_LOCKMASK 1UL
#else
#define LIST_BL_LOCKMASK 0UL
#endif
#ifdef CONFIG_DEBUG_LIST
#define LIST_BL_BUG_ON(x) BUG_ON(x)
#else
#define LIST_BL_BUG_ON(x)
#endif
struct hlist_bl_head {
struct hlist_bl_node *first;
};
struct hlist_bl_node {
struct hlist_bl_node *next, **pprev;
};
#define INIT_HLIST_BL_HEAD(ptr) \
((ptr)->first = NULL)
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
{
h->next = NULL;
h->pprev = NULL;
}
#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)
static inline int hlist_bl_unhashed(const struct hlist_bl_node *h)
{
return !h->pprev;
}
static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
{
return (struct hlist_bl_node *)
((unsigned long)h->first & ~LIST_BL_LOCKMASK);
}
static inline void hlist_bl_set_first(struct hlist_bl_head *h,
struct hlist_bl_node *n)
{
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
}
static inline int hlist_bl_empty(const struct hlist_bl_head *h)
{
return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
}
static inline void hlist_bl_add_head(struct hlist_bl_node *n,
struct hlist_bl_head *h)
{
struct hlist_bl_node *first = hlist_bl_first(h);
n->next = first;
if (first)
first->pprev = &n->next;
n->pprev = &h->first;
hlist_bl_set_first(h, n);
}
static inline void __hlist_bl_del(struct hlist_bl_node *n)
{
struct hlist_bl_node *next = n->next;
struct hlist_bl_node **pprev = n->pprev;
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
/* pprev may be `first`, so be careful not to lose the lock bit */
*pprev = (struct hlist_bl_node *)
((unsigned long)next |
((unsigned long)*pprev & LIST_BL_LOCKMASK));
if (next)
next->pprev = pprev;
}
static inline void hlist_bl_del(struct hlist_bl_node *n)
{
__hlist_bl_del(n);
n->next = LIST_POISON1;
n->pprev = LIST_POISON2;
}
static inline void hlist_bl_del_init(struct hlist_bl_node *n)
{
if (!hlist_bl_unhashed(n)) {
__hlist_bl_del(n);
INIT_HLIST_BL_NODE(n);
}
}
/**
* hlist_bl_for_each_entry - iterate over list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
*
*/
#define hlist_bl_for_each_entry(tpos, pos, head, member) \
for (pos = hlist_bl_first(head); \
pos && \
({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* hlist_bl_for_each_entry_safe - iterate over list of given type safe against removal of list entry
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @n: another &struct hlist_node to use as temporary storage
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
*/
#define hlist_bl_for_each_entry_safe(tpos, pos, n, head, member) \
for (pos = hlist_bl_first(head); \
pos && ({ n = pos->next; 1; }) && \
({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1;}); \
pos = n)
#endif

View file

@ -13,6 +13,7 @@
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
#include <linux/seqlock.h>
#include <asm/atomic.h>
struct super_block;
@ -46,12 +47,24 @@ struct mnt_namespace;
#define MNT_INTERNAL 0x4000
struct mnt_pcp {
int mnt_count;
int mnt_writers;
};
struct vfsmount {
struct list_head mnt_hash;
struct vfsmount *mnt_parent; /* fs we are mounted on */
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
#ifdef CONFIG_SMP
struct mnt_pcp __percpu *mnt_pcp;
atomic_t mnt_longrefs;
#else
int mnt_count;
int mnt_writers;
#endif
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
int mnt_flags;
@ -70,57 +83,25 @@ struct vfsmount {
struct mnt_namespace *mnt_ns; /* containing namespace */
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
/*
* We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
* to let these frequently modified fields in a separate cache line
* (so that reads of mnt_flags wont ping-pong on SMP machines)
*/
atomic_t mnt_count;
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
#ifdef CONFIG_SMP
int __percpu *mnt_writers;
#else
int mnt_writers;
#endif
};
static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
return mnt->mnt_writers;
#else
return &mnt->mnt_writers;
#endif
}
static inline struct vfsmount *mntget(struct vfsmount *mnt)
{
if (mnt)
atomic_inc(&mnt->mnt_count);
return mnt;
}
struct file; /* forward dec */
extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
extern void mntput_no_expire(struct vfsmount *mnt);
extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern void mntput_long(struct vfsmount *mnt);
extern struct vfsmount *mntget_long(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt);
static inline void mntput(struct vfsmount *mnt)
{
if (mnt) {
mnt->mnt_expiry_mark = 0;
mntput_no_expire(mnt);
}
}
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);

View file

@ -19,7 +19,10 @@ struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct file *file;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
@ -41,14 +44,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path components" flag
* - locked when lookup done with dcache_lock held
* - dentry cache is untrusted; force a real lookup
*/
#define LOOKUP_FOLLOW 1
#define LOOKUP_DIRECTORY 2
#define LOOKUP_CONTINUE 4
#define LOOKUP_PARENT 16
#define LOOKUP_REVAL 64
#define LOOKUP_FOLLOW 0x0001
#define LOOKUP_DIRECTORY 0x0002
#define LOOKUP_CONTINUE 0x0004
#define LOOKUP_PARENT 0x0010
#define LOOKUP_REVAL 0x0020
#define LOOKUP_RCU 0x0040
/*
* Intent data
*/

View file

@ -184,13 +184,13 @@ struct ncp_entry_info {
__u8 file_handle[6];
};
static inline struct ncp_server *NCP_SBP(struct super_block *sb)
static inline struct ncp_server *NCP_SBP(const struct super_block *sb)
{
return sb->s_fs_info;
}
#define NCP_SERVER(inode) NCP_SBP((inode)->i_sb)
static inline struct ncp_inode_info *NCP_FINFO(struct inode *inode)
static inline struct ncp_inode_info *NCP_FINFO(const struct inode *inode)
{
return container_of(inode, struct ncp_inode_info, vfs_inode);
}

View file

@ -351,7 +351,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int nfs_permission(struct inode *, int);
extern int nfs_permission(struct inode *, int, unsigned int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
extern int nfs_attribute_timeout(struct inode *inode);

View file

@ -10,7 +10,9 @@ struct path {
};
extern void path_get(struct path *);
extern void path_get_long(struct path *);
extern void path_put(struct path *);
extern void path_put_long(struct path *);
static inline int path_equal(const struct path *path1, const struct path *path2)
{

View file

@ -108,6 +108,25 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
return acl;
}
static inline int negative_cached_acl(struct inode *inode, int type)
{
struct posix_acl **p, *acl;
switch (type) {
case ACL_TYPE_ACCESS:
p = &inode->i_acl;
break;
case ACL_TYPE_DEFAULT:
p = &inode->i_default_acl;
break;
default:
BUG();
}
acl = ACCESS_ONCE(*p);
if (acl)
return 0;
return 1;
}
static inline void set_cached_acl(struct inode *inode,
int type,
struct posix_acl *acl)

127
include/linux/rculist_bl.h Normal file
View file

@ -0,0 +1,127 @@
#ifndef _LINUX_RCULIST_BL_H
#define _LINUX_RCULIST_BL_H
/*
* RCU-protected bl list version. See include/linux/list_bl.h.
*/
#include <linux/list_bl.h>
#include <linux/rcupdate.h>
static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
struct hlist_bl_node *n)
{
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
rcu_assign_pointer(h->first,
(struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
}
static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
{
return (struct hlist_bl_node *)
((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
}
/**
* hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization
* @n: the element to delete from the hash list.
*
* Note: hlist_bl_unhashed() on the node returns true after this. It is
* useful for RCU based read lockfree traversal if the writer side
* must know if the list entry is still hashed or already unhashed.
*
* In particular, it means that we can not poison the forward pointers
* that may still be used for walking the hash list and we can only
* zero the pprev pointer so list_unhashed() will return true after
* this.
*
* The caller must take whatever precautions are necessary (such as
* holding appropriate locks) to avoid racing with another
* list-mutation primitive, such as hlist_bl_add_head_rcu() or
* hlist_bl_del_rcu(), running on this same list. However, it is
* perfectly legal to run concurrently with the _rcu list-traversal
* primitives, such as hlist_bl_for_each_entry_rcu().
*/
static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n)
{
if (!hlist_bl_unhashed(n)) {
__hlist_bl_del(n);
n->pprev = NULL;
}
}
/**
* hlist_bl_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list.
*
* Note: hlist_bl_unhashed() on entry does not return true after this,
* the entry is in an undefined state. It is useful for RCU based
* lockfree traversal.
*
* In particular, it means that we can not poison the forward
* pointers that may still be used for walking the hash list.
*
* The caller must take whatever precautions are necessary
* (such as holding appropriate locks) to avoid racing
* with another list-mutation primitive, such as hlist_bl_add_head_rcu()
* or hlist_bl_del_rcu(), running on this same list.
* However, it is perfectly legal to run concurrently with
* the _rcu list-traversal primitives, such as
* hlist_bl_for_each_entry().
*/
static inline void hlist_bl_del_rcu(struct hlist_bl_node *n)
{
__hlist_bl_del(n);
n->pprev = LIST_POISON2;
}
/**
* hlist_bl_add_head_rcu
* @n: the element to add to the hash list.
* @h: the list to add to.
*
* Description:
* Adds the specified element to the specified hlist_bl,
* while permitting racing traversals.
*
* The caller must take whatever precautions are necessary
* (such as holding appropriate locks) to avoid racing
* with another list-mutation primitive, such as hlist_bl_add_head_rcu()
* or hlist_bl_del_rcu(), running on this same list.
* However, it is perfectly legal to run concurrently with
* the _rcu list-traversal primitives, such as
* hlist_bl_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs. Regardless of the type of CPU, the
* list-traversal primitive must be guarded by rcu_read_lock().
*/
static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
struct hlist_bl_head *h)
{
struct hlist_bl_node *first;
/* don't need hlist_bl_first_rcu because we're under lock */
first = hlist_bl_first(h);
n->next = first;
if (first)
first->pprev = &n->next;
n->pprev = &h->first;
/* need _rcu because we can have concurrent lock free readers */
hlist_bl_set_first_rcu(h, n);
}
/**
* hlist_bl_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_bl_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_bl_node within the struct.
*
*/
#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member) \
for (pos = hlist_bl_first_rcu(head); \
pos && \
({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(pos->next))
#endif

View file

@ -41,7 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
int reiserfs_lookup_privroot(struct super_block *sb);
int reiserfs_delete_xattrs(struct inode *inode);
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
int reiserfs_permission(struct inode *inode, int mask);
int reiserfs_permission(struct inode *inode, int mask, unsigned int flags);
#ifdef CONFIG_REISERFS_FS_XATTR
#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)

View file

@ -457,7 +457,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* called when the actual read/write operations are performed.
* @inode contains the inode structure to check.
* @mask contains the permission mask.
* @nd contains the nameidata (may be NULL).
* Return 0 if permission is granted.
* @inode_setattr:
* Check permission before setting file attributes. Note that the kernel
@ -1713,6 +1712,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
int security_inode_readlink(struct dentry *dentry);
int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
int security_inode_permission(struct inode *inode, int mask);
int security_inode_exec_permission(struct inode *inode, unsigned int flags);
int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
int security_inode_setxattr(struct dentry *dentry, const char *name,
@ -2102,6 +2102,12 @@ static inline int security_inode_permission(struct inode *inode, int mask)
return 0;
}
static inline int security_inode_exec_permission(struct inode *inode,
unsigned int flags)
{
return 0;
}
static inline int security_inode_setattr(struct dentry *dentry,
struct iattr *attr)
{

View file

@ -107,7 +107,7 @@ static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
{
smp_rmb();
return (sl->sequence != start);
return unlikely(sl->sequence != start);
}
@ -125,14 +125,25 @@ typedef struct seqcount {
#define SEQCNT_ZERO { 0 }
#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
/* Start of read using pointer to a sequence counter only. */
static inline unsigned read_seqcount_begin(const seqcount_t *s)
/**
* __read_seqcount_begin - begin a seq-read critical section (without barrier)
* @s: pointer to seqcount_t
* Returns: count to be passed to read_seqcount_retry
*
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
* provided before actually loading any of the variables that are to be
* protected in this critical section.
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
*/
static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;
repeat:
ret = s->sequence;
smp_rmb();
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
@ -140,14 +151,56 @@ repeat:
return ret;
}
/*
* Test if reader processed invalid data because sequence number has changed.
/**
* read_seqcount_begin - begin a seq-read critical section
* @s: pointer to seqcount_t
* Returns: count to be passed to read_seqcount_retry
*
* read_seqcount_begin opens a read critical section of the given seqcount.
* Validity of the critical section is tested by checking read_seqcount_retry
* function.
*/
static inline unsigned read_seqcount_begin(const seqcount_t *s)
{
unsigned ret = __read_seqcount_begin(s);
smp_rmb();
return ret;
}
/**
* __read_seqcount_retry - end a seq-read critical section (without barrier)
* @s: pointer to seqcount_t
* @start: count, from read_seqcount_begin
* Returns: 1 if retry is required, else 0
*
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
* provided before actually loading any of the variables that are to be
* protected in this critical section.
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
*/
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
{
return unlikely(s->sequence != start);
}
/**
* read_seqcount_retry - end a seq-read critical section
* @s: pointer to seqcount_t
* @start: count, from read_seqcount_begin
* Returns: 1 if retry is required, else 0
*
* read_seqcount_retry closes a read critical section of the given seqcount.
* If the critical section was invalid, it must be ignored (and typically
* retried).
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
smp_rmb();
return s->sequence != start;
return __read_seqcount_retry(s, start);
}
@ -167,6 +220,19 @@ static inline void write_seqcount_end(seqcount_t *s)
s->sequence++;
}
/**
* write_seqcount_barrier - invalidate in-progress read-side seq operations
* @s: pointer to seqcount_t
*
* After write_seqcount_barrier, no read-side seq operations will complete
* successfully and see data older than this.
*/
static inline void write_seqcount_barrier(seqcount_t *s)
{
smp_wmb();
s->sequence+=2;
}
/*
* Possible sw/hw IRQ protected versions of the interfaces.
*/

View file

@ -106,8 +106,6 @@ int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *);
unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
int kern_ptr_validate(const void *ptr, unsigned long size);
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
/*
* Please use this macro to create slab caches. Simply specify the