diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 431 |
1 files changed, 176 insertions, 255 deletions
diff --git a/fs/inode.c b/fs/inode.c index 66f76f927..55eddfde8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -10,6 +10,7 @@ #include <linux/dcache.h> #include <linux/init.h> #include <linux/quotaops.h> +#include <linux/slab.h> /* * New inode.c implementation. @@ -21,6 +22,8 @@ * Famous last words. */ +/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ + #define INODE_PARANOIA 1 /* #define INODE_DEBUG 1 */ @@ -28,7 +31,7 @@ * Inode lookup is no longer as critical as it used to be: * most of the lookups are going to be through the dcache. */ -#define HASH_BITS 8 +#define HASH_BITS 14 #define HASH_SIZE (1UL << HASH_BITS) #define HASH_MASK (HASH_SIZE-1) @@ -36,9 +39,9 @@ * Each inode can be on two separate lists. One is * the hash list of the inode, used for lookups. The * other linked list is the "type" list: - * "in_use" - valid inode, hashed if i_nlink > 0 - * "dirty" - valid inode, hashed if i_nlink > 0, dirty. - * "unused" - ready to be re-used. Not hashed. + * "in_use" - valid inode, i_count > 0, i_nlink > 0 + * "dirty" - as "in_use" but also dirty + * "unused" - valid inode, i_count = 0 * * A "dirty" list is maintained for each super block, * allowing for low-overhead inode sync() operations. @@ -61,11 +64,36 @@ spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; */ struct { int nr_inodes; - int nr_free_inodes; + int nr_unused; int dummy[5]; } inodes_stat = {0, 0,}; -int max_inodes; +static kmem_cache_t * inode_cachep; + +#define alloc_inode() \ + ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL)) +#define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode)) + +/* + * These are initializations that only need to be done + * once, because the fields are idempotent across use + * of the inode, so let the slab aware of that. + */ +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ + struct inode * inode = (struct inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + { + memset(inode, 0, sizeof(*inode)); + init_waitqueue_head(&inode->i_wait); + INIT_LIST_HEAD(&inode->i_hash); + INIT_LIST_HEAD(&inode->i_dentry); + sema_init(&inode->i_sem, 1); + spin_lock_init(&inode->i_shared_lock); + } +} /* * Put the inode on the super block's dirty list. @@ -118,20 +146,6 @@ static inline void wait_on_inode(struct inode *inode) __wait_on_inode(inode); } -/* - * These are initializations that only need to be done - * once, because the fields are idempotent across use - * of the inode.. - */ -static inline void init_once(struct inode * inode) -{ - memset(inode, 0, sizeof(*inode)); - init_waitqueue_head(&inode->i_wait); - INIT_LIST_HEAD(&inode->i_hash); - INIT_LIST_HEAD(&inode->i_dentry); - sema_init(&inode->i_sem, 1); - spin_lock_init(&inode->i_shared_lock); -} static inline void write_inode(struct inode *inode) { @@ -147,7 +161,8 @@ static inline void sync_one(struct inode *inode) spin_lock(&inode_lock); } else { list_del(&inode->i_list); - list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_list, + inode->i_count ? &inode_in_use : &inode_unused); /* Set I_LOCK, reset I_DIRTY */ inode->i_state ^= I_DIRTY | I_LOCK; spin_unlock(&inode_lock); @@ -233,6 +248,8 @@ void clear_inode(struct inode *inode) { if (inode->i_nrpages) BUG(); + if (!(inode->i_state & I_FREEING)) + BUG(); wait_on_inode(inode); if (IS_QUOTAINIT(inode)) DQUOT_DROP(inode); @@ -243,35 +260,24 @@ void clear_inode(struct inode *inode) } /* - * Dispose-list gets a local list, so it doesn't need to - * worry about list corruption. It releases the inode lock - * while clearing the inodes. + * Dispose-list gets a local list with local inodes in it, so it doesn't + * need to worry about list corruption and SMP locks. */ static void dispose_list(struct list_head * head) { - struct list_head *next; - int count = 0; + struct list_head * inode_entry; + struct inode * inode; - spin_unlock(&inode_lock); - next = head->next; - for (;;) { - struct list_head * tmp = next; - struct inode * inode; + while ((inode_entry = head->next) != head) + { + list_del(inode_entry); - next = next->next; - if (tmp == head) - break; - inode = list_entry(tmp, struct inode, i_list); + inode = list_entry(inode_entry, struct inode, i_list); if (inode->i_nrpages) truncate_inode_pages(inode, 0); clear_inode(inode); - count++; + destroy_inode(inode); } - - /* Add them all to the unused list in one fell swoop */ - spin_lock(&inode_lock); - list_splice(head, &inode_unused); - inodes_stat.nr_free_inodes += count; } /* @@ -280,7 +286,7 @@ static void dispose_list(struct list_head * head) static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) { struct list_head *next; - int busy = 0; + int busy = 0, count = 0; next = head->next; for (;;) { @@ -299,10 +305,13 @@ static int invalidate_list(struct list_head *head, struct super_block * sb, stru list_del(&inode->i_list); list_add(&inode->i_list, dispose); inode->i_state |= I_FREEING; + count++; continue; } busy = 1; } + /* only unused inodes may be cached with i_count zero */ + inodes_stat.nr_unused -= count; return busy; } @@ -320,10 +329,12 @@ int invalidate_inodes(struct super_block * sb) spin_lock(&inode_lock); busy = invalidate_list(&inode_in_use, sb, &throw_away); + busy |= invalidate_list(&inode_unused, sb, &throw_away); busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); - dispose_list(&throw_away); spin_unlock(&inode_lock); + dispose_list(&throw_away); + return busy; } @@ -339,155 +350,84 @@ int invalidate_inodes(struct super_block * sb) * dispose_list. */ #define CAN_UNUSE(inode) \ - (((inode)->i_count | (inode)->i_state | (inode)->i_nrpages) == 0) + (((inode)->i_state | (inode)->i_nrpages) == 0) #define INODE(entry) (list_entry(entry, struct inode, i_list)) -static int free_inodes(void) +void prune_icache(int goal) { - struct list_head list, *entry, *freeable = &list; - int found = 0; + LIST_HEAD(list); + struct list_head *entry, *freeable = &list; + int count = 0; + struct inode * inode; + + spin_lock(&inode_lock); + /* go simple and safe syncing everything before starting */ + sync_all_inodes(); - INIT_LIST_HEAD(freeable); - entry = inode_in_use.next; - while (entry != &inode_in_use) { + entry = inode_unused.prev; + while (entry != &inode_unused) + { struct list_head *tmp = entry; - entry = entry->next; - if (!CAN_UNUSE(INODE(tmp))) + entry = entry->prev; + inode = INODE(tmp); + if (!CAN_UNUSE(inode)) continue; + if (inode->i_count) + BUG(); list_del(tmp); - list_del(&INODE(tmp)->i_hash); - INIT_LIST_HEAD(&INODE(tmp)->i_hash); + list_del(&inode->i_hash); + INIT_LIST_HEAD(&inode->i_hash); list_add(tmp, freeable); - list_entry(tmp, struct inode, i_list)->i_state = I_FREEING; - found = 1; + inode->i_state |= I_FREEING; + count++; + if (!--goal) + break; } + inodes_stat.nr_unused -= count; + spin_unlock(&inode_lock); - if (found) - dispose_list(freeable); - - return found; + dispose_list(freeable); } -/* - * Searches the inodes list for freeable inodes, - * shrinking the dcache before (and possible after, - * if we're low) - */ -static void try_to_free_inodes(int goal) +int shrink_icache_memory(int priority, int gfp_mask) { - /* - * First stry to just get rid of unused inodes. - * - * If we can't reach our goal that way, we'll have - * to try to shrink the dcache and sync existing - * inodes.. - */ - free_inodes(); - goal -= inodes_stat.nr_free_inodes; - if (goal > 0) { - spin_unlock(&inode_lock); - select_dcache(goal, 0); - prune_dcache(goal); - spin_lock(&inode_lock); - sync_all_inodes(); - free_inodes(); + if (gfp_mask & __GFP_IO) + { + int count = 0; + + if (priority) + count = inodes_stat.nr_unused / priority; + prune_icache(count); + /* FIXME: kmem_cache_shrink here should tell us + the number of pages freed, and it should + work in a __GFP_DMA/__GFP_BIGMEM behaviour + to free only the interesting pages in + function of the needs of the current allocation. */ + kmem_cache_shrink(inode_cachep); } -} -/* - * This is the externally visible routine for - * inode memory management. - */ -void free_inode_memory(int goal) -{ - spin_lock(&inode_lock); - free_inodes(); - spin_unlock(&inode_lock); + return 0; } - -/* - * This is called with the spinlock held, but releases - * the lock when freeing or allocating inodes. - * Look out! This returns with the inode lock held if - * it got an inode.. - * - * We do inode allocations two pages at a time to reduce - * fragmentation. - */ -#define INODE_PAGE_ORDER 1 -#define INODE_ALLOCATION_SIZE (PAGE_SIZE << INODE_PAGE_ORDER) -#define INODES_PER_ALLOCATION (INODE_ALLOCATION_SIZE/sizeof(struct inode)) - -static struct inode * grow_inodes(void) +static inline void __iget(struct inode * inode) { - struct inode * inode; - - /* - * Check whether to restock the unused list. - */ - if (inodes_stat.nr_inodes > max_inodes) { - struct list_head *tmp; - try_to_free_inodes(inodes_stat.nr_inodes >> 2); - tmp = inode_unused.next; - if (tmp != &inode_unused) { - inodes_stat.nr_free_inodes--; - list_del(tmp); - inode = list_entry(tmp, struct inode, i_list); - return inode; - } - } - - spin_unlock(&inode_lock); - inode = (struct inode *)__get_free_pages(GFP_KERNEL,INODE_PAGE_ORDER); - if (inode) { - int size; - struct inode * tmp; - - size = INODE_ALLOCATION_SIZE - 2*sizeof(struct inode); - tmp = inode; - spin_lock(&inode_lock); - do { - tmp++; - init_once(tmp); - list_add(&tmp->i_list, &inode_unused); - size -= sizeof(struct inode); - } while (size >= 0); - init_once(inode); - /* - * Update the inode statistics - */ - inodes_stat.nr_inodes += INODES_PER_ALLOCATION; - inodes_stat.nr_free_inodes += INODES_PER_ALLOCATION - 1; - return inode; - } - - /* - * If the allocation failed, do an extensive pruning of - * the dcache and then try again to free some inodes. - */ - prune_dcache(inodes_stat.nr_inodes >> 2); - - spin_lock(&inode_lock); - free_inodes(); + if (!inode->i_count++) { - struct list_head *tmp = inode_unused.next; - if (tmp != &inode_unused) { - inodes_stat.nr_free_inodes--; - list_del(tmp); - inode = list_entry(tmp, struct inode, i_list); - return inode; + if (!(inode->i_state & I_DIRTY)) + { + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_in_use); } + inodes_stat.nr_unused--; } - spin_unlock(&inode_lock); - - printk("grow_inodes: allocation failed\n"); - return NULL; } /* * Called with the inode lock held. + * NOTE: we are not increasing the inode-refcount, you must call __iget() + * by hand after calling find_inode now! This simplify iunique and won't + * add any additional branch in the common code. */ static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head) { @@ -505,7 +445,6 @@ static struct inode * find_inode(struct super_block * sb, unsigned long ino, str continue; if (inode->i_ino != ino) continue; - inode->i_count++; break; } return inode; @@ -518,7 +457,7 @@ static struct inode * find_inode(struct super_block * sb, unsigned long ino, str * i_sb, i_ino, i_count, i_state and the lists have * been initialized elsewhere.. */ -void clean_inode(struct inode *inode) +static void clean_inode(struct inode *inode) { memset(&inode->u, 0, sizeof(inode->u)); inode->i_sock = 0; @@ -528,7 +467,6 @@ void clean_inode(struct inode *inode) inode->i_size = 0; inode->i_generation = 0; memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); - sema_init(&inode->i_sem, 1); inode->i_pipe = NULL; } @@ -542,15 +480,11 @@ struct inode * get_empty_inode(void) { static unsigned long last_ino = 0; struct inode * inode; - struct list_head * tmp; - spin_lock(&inode_lock); - tmp = inode_unused.next; - if (tmp != &inode_unused) { - list_del(tmp); - inodes_stat.nr_free_inodes--; - inode = list_entry(tmp, struct inode, i_list); -add_new_inode: + inode = alloc_inode(); + if (inode) + { + spin_lock(&inode_lock); list_add(&inode->i_list, &inode_in_use); inode->i_sb = NULL; inode->i_dev = 0; @@ -560,22 +494,12 @@ add_new_inode: inode->i_state = 0; spin_unlock(&inode_lock); clean_inode(inode); - return inode; } - - /* - * Warning: if this succeeded, we will now - * return with the inode lock. - */ - inode = grow_inodes(); - if (inode) - goto add_new_inode; - return inode; } /* - * This is called with the inode lock held.. Be careful. + * This is called without the inode lock held.. Be careful. * * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org @@ -583,56 +507,47 @@ add_new_inode: static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head) { struct inode * inode; - struct list_head * tmp = inode_unused.next; - - if (tmp != &inode_unused) { - list_del(tmp); - inodes_stat.nr_free_inodes--; - inode = list_entry(tmp, struct inode, i_list); -add_new_inode: - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_hash, head); - inode->i_sb = sb; - inode->i_dev = sb->s_dev; - inode->i_ino = ino; - inode->i_flags = 0; - inode->i_count = 1; - inode->i_state = I_LOCK; - spin_unlock(&inode_lock); - clean_inode(inode); - sb->s_op->read_inode(inode); - - /* - * This is special! We do not need the spinlock - * when clearing I_LOCK, because we're guaranteed - * that nobody else tries to do anything about the - * state of the inode when it is locked, as we - * just created it (so there can be no old holders - * that haven't tested I_LOCK). - */ - inode->i_state &= ~I_LOCK; - wake_up(&inode->i_wait); - - return inode; - } - - /* - * We need to expand. Note that "grow_inodes()" will - * release the spinlock, but will return with the lock - * held again if the allocation succeeded. - */ - inode = grow_inodes(); + inode = alloc_inode(); if (inode) { + struct inode * old; + + spin_lock(&inode_lock); /* We released the lock, so.. */ - struct inode * old = find_inode(sb, ino, head); + old = find_inode(sb, ino, head); if (!old) - goto add_new_inode; - list_add(&inode->i_list, &inode_unused); - inodes_stat.nr_free_inodes++; + { + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_hash, head); + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = ino; + inode->i_flags = 0; + inode->i_count = 1; + inode->i_state = I_LOCK; + spin_unlock(&inode_lock); + + clean_inode(inode); + sb->s_op->read_inode(inode); + + /* + * This is special! We do not need the spinlock + * when clearing I_LOCK, because we're guaranteed + * that nobody else tries to do anything about the + * state of the inode when it is locked, as we + * just created it (so there can be no old holders + * that haven't tested I_LOCK). + */ + inode->i_state &= ~I_LOCK; + wake_up(&inode->i_wait); + + return inode; + } + __iget(inode); spin_unlock(&inode_lock); - wait_on_inode(old); - return old; + destroy_inode(inode); + inode = old; + wait_on_inode(inode); } return inode; } @@ -660,7 +575,6 @@ retry: spin_unlock(&inode_lock); return res; } - inode->i_count--; /* compensate find_inode() */ } else { counter = max_reserved + 1; } @@ -671,10 +585,10 @@ retry: struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (inode->i_state & I_FREEING) - inode = NULL; + if (!(inode->i_state & I_FREEING)) + __iget(inode); else - inode->i_count++; + inode = NULL; spin_unlock(&inode_lock); if (inode) wait_on_inode(inode); @@ -689,14 +603,16 @@ struct inode *iget(struct super_block *sb, unsigned long ino) spin_lock(&inode_lock); inode = find_inode(sb, ino, head); if (inode) { + __iget(inode); spin_unlock(&inode_lock); wait_on_inode(inode); return inode; } + spin_unlock(&inode_lock); + /* - * get_new_inode() will do the right thing, releasing - * the inode lock and re-trying the search in case it - * had to block at any point. + * get_new_inode() will do the right thing, re-trying the search + * in case it had to block at any point. */ return get_new_inode(sb, ino, head); } @@ -721,6 +637,7 @@ void iput(struct inode *inode) { if (inode) { struct super_operations *op = NULL; + int destroy = 0; if (inode->i_sb && inode->i_sb->s_op) op = inode->i_sb->s_op; @@ -750,13 +667,17 @@ void iput(struct inode *inode) inode->i_state|=I_FREEING; spin_unlock(&inode_lock); clear_inode(inode); + destroy = 1; spin_lock(&inode_lock); - list_add(&inode->i_list, &inode_unused); - inodes_stat.nr_free_inodes++; } - else if (!(inode->i_state & I_DIRTY)) { - list_del(&inode->i_list); - list_add(&inode->i_list, &inode_in_use); + else + { + if (!(inode->i_state & I_DIRTY)) { + list_del(&inode->i_list); + list_add(&inode->i_list, + &inode_unused); + } + inodes_stat.nr_unused++; } #ifdef INODE_PARANOIA if (inode->i_flock) @@ -778,6 +699,8 @@ kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); kdevname(inode->i_dev), inode->i_ino); } spin_unlock(&inode_lock); + if (destroy) + destroy_inode(inode); } } @@ -795,14 +718,11 @@ int bmap(struct inode * inode, int block) } /* - * Initialize the hash tables and default - * value for max inodes + * Initialize the hash tables. */ -#define MAX_INODE (16384) - void __init inode_init(void) { - int i, max; + int i; struct list_head *head = inode_hashtable; i = HASH_SIZE; @@ -812,11 +732,12 @@ void __init inode_init(void) i--; } while (i); - /* Initial guess at reasonable inode number */ - max = num_physpages >> 1; - if (max > MAX_INODE) - max = MAX_INODE; - max_inodes = max; + /* inode slab cache */ + inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), + 0, SLAB_HWCACHE_ALIGN, init_once, + NULL); + if (!inode_cachep) + panic("cannot create inode slab cache"); } void update_atime (struct inode *inode) |