diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 888 |
1 files changed, 446 insertions, 442 deletions
diff --git a/fs/inode.c b/fs/inode.c index 724e8c4cd..b1d9bda4e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,229 +1,234 @@ /* - * linux/fs/inode.c + * linux/fs/inode.c: Keeping track of inodes. * - * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1997 David S. Miller */ -#include <linux/stat.h> -#include <linux/sched.h> #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/string.h> -#include <asm/system.h> - -#define NR_IHASH 512 - -/* - * Be VERY careful when you access the inode hash table. There - * are some rather scary race conditions you need to take care of: - * - P1 tries to open file "xx", calls "iget()" with the proper - * inode number, but blocks because it's not on the list. - * - P2 deletes file "xx", gets the inode (which P1 has just read, - * but P1 hasn't woken up to the fact yet) - * - P2 iput()'s the inode, which now has i_nlink = 0 - * - P1 wakes up and has the inode, but now P2 has made that - * inode invalid (but P1 has no way of knowing that). - * - * The "updating" counter makes sure that when P1 blocks on the - * iget(), P2 can't delete the inode from under it because P2 - * will wait until P1 has been able to update the inode usage - * count so that the inode will stay in use until everybody has - * closed it.. - */ -static struct inode_hash_entry { - struct inode * inode; - int updating; -} hash_table[NR_IHASH]; - -static struct inode * first_inode; -static struct wait_queue * inode_wait = NULL; -/* Keep these next two contiguous in memory for sysctl.c */ int nr_inodes = 0, nr_free_inodes = 0; int max_inodes = NR_INODE; -static inline int const hashfn(kdev_t dev, unsigned int i) -{ - return (HASHDEV(dev) ^ i) % NR_IHASH; -} +#define INODE_HASHSZ 1024 -static inline struct inode_hash_entry * const hash(kdev_t dev, int i) -{ - return hash_table + hashfn(dev, i); -} +static struct inode *inode_hash[INODE_HASHSZ]; -static inline void insert_inode_free(struct inode *inode) -{ - struct inode * prev, * next = first_inode; +/* All the details of hashing and lookup. */ +#define hashfn(dev, i) ((HASHDEV(dev) + ((i) ^ ((i) >> 10))) & (INODE_HASHSZ - 1)) - first_inode = inode; - prev = next->i_prev; - inode->i_next = next; - inode->i_prev = prev; - prev->i_next = inode; - next->i_prev = inode; +__inline__ void insert_inode_hash(struct inode *inode) +{ + struct inode **htable = &inode_hash[hashfn(inode->i_dev, inode->i_ino)]; + if((inode->i_hash_next = *htable) != NULL) + (*htable)->i_hash_pprev = &inode->i_hash_next; + *htable = inode; + inode->i_hash_pprev = htable; } -static inline void remove_inode_free(struct inode *inode) +#define hash_inode(inode) insert_inode_hash(inode) + +static inline void unhash_inode(struct inode *inode) { - if (first_inode == inode) - first_inode = first_inode->i_next; - if (inode->i_next) - inode->i_next->i_prev = inode->i_prev; - if (inode->i_prev) - inode->i_prev->i_next = inode->i_next; - inode->i_next = inode->i_prev = NULL; + if(inode->i_hash_pprev) { + if(inode->i_hash_next) + inode->i_hash_next->i_hash_pprev = inode->i_hash_pprev; + *(inode->i_hash_pprev) = inode->i_hash_next; + inode->i_hash_pprev = NULL; + } } -void insert_inode_hash(struct inode *inode) +static inline struct inode *find_inode(unsigned int hashent, + kdev_t dev, unsigned long ino) { - struct inode_hash_entry *h; - h = hash(inode->i_dev, inode->i_ino); + struct inode *inode; - inode->i_hash_next = h->inode; - inode->i_hash_prev = NULL; - if (inode->i_hash_next) - inode->i_hash_next->i_hash_prev = inode; - h->inode = inode; + for(inode = inode_hash[hashent]; inode; inode = inode->i_hash_next) + if(inode->i_dev == dev && inode->i_ino == ino) + break; + return inode; } -static inline void remove_inode_hash(struct inode *inode) -{ - struct inode_hash_entry *h; - h = hash(inode->i_dev, inode->i_ino); +/* Free list queue and management. */ +static struct free_inode_queue { + struct inode *head; + struct inode **last; +} free_inodes = { NULL, &free_inodes.head }; - if (h->inode == inode) - h->inode = inode->i_hash_next; - if (inode->i_hash_next) - inode->i_hash_next->i_hash_prev = inode->i_hash_prev; - if (inode->i_hash_prev) - inode->i_hash_prev->i_hash_next = inode->i_hash_next; - inode->i_hash_prev = inode->i_hash_next = NULL; +static inline void put_inode_head(struct inode *inode) +{ + if((inode->i_next = free_inodes.head) != NULL) + free_inodes.head->i_pprev = &inode->i_next; + else + free_inodes.last = &inode->i_next; + free_inodes.head = inode; + inode->i_pprev = &free_inodes.head; + nr_free_inodes++; } -static inline void put_last_free(struct inode *inode) +static inline void put_inode_last(struct inode *inode) { - remove_inode_free(inode); - inode->i_prev = first_inode->i_prev; - inode->i_prev->i_next = inode; - inode->i_next = first_inode; - inode->i_next->i_prev = inode; + inode->i_next = NULL; + inode->i_pprev = free_inodes.last; + *free_inodes.last = inode; + free_inodes.last = &inode->i_next; + nr_free_inodes++; } -int grow_inodes(void) +static inline void remove_free_inode(struct inode *inode) { - struct inode * inode; - int i; - - if (!(inode = (struct inode*) get_free_page(GFP_KERNEL))) - return -ENOMEM; - - i=PAGE_SIZE / sizeof(struct inode); - nr_inodes += i; - nr_free_inodes += i; + if(inode->i_pprev) { + if(inode->i_next) + inode->i_next->i_pprev = inode->i_pprev; + else + free_inodes.last = inode->i_pprev; + *inode->i_pprev = inode->i_next; + inode->i_pprev = NULL; + nr_free_inodes--; + } +} - if (!first_inode) - inode->i_next = inode->i_prev = first_inode = inode++, i--; +/* This is the in-use queue, if i_count > 0 (as far as we can tell) + * the sucker is here. + */ +static struct inode *inuse_list = NULL; - for ( ; i ; i-- ) - insert_inode_free(inode++); - return 0; +static inline void put_inuse(struct inode *inode) +{ + if((inode->i_next = inuse_list) != NULL) + inuse_list->i_pprev = &inode->i_next; + inuse_list = inode; + inode->i_pprev = &inuse_list; } -unsigned long inode_init(unsigned long start, unsigned long end) +static inline void remove_inuse(struct inode *inode) { - memset(hash_table, 0, sizeof(hash_table)); - first_inode = NULL; - return start; + if(inode->i_pprev) { + if(inode->i_next) + inode->i_next->i_pprev = inode->i_pprev; + *inode->i_pprev = inode->i_next; + inode->i_pprev = NULL; + } } +/* Locking and unlocking inodes, plus waiting for locks to clear. */ static void __wait_on_inode(struct inode *); -static inline void wait_on_inode(struct inode * inode) +static inline void wait_on_inode(struct inode *inode) { - if (inode->i_lock) + if(inode->i_lock) __wait_on_inode(inode); } -static inline void lock_inode(struct inode * inode) +static inline void lock_inode(struct inode *inode) { - wait_on_inode(inode); + if(inode->i_lock) + __wait_on_inode(inode); inode->i_lock = 1; } -static inline void unlock_inode(struct inode * inode) +static inline void unlock_inode(struct inode *inode) { inode->i_lock = 0; wake_up(&inode->i_wait); } -/* - * Note that we don't want to disturb any wait-queues when we discard - * an inode. - * - * Argghh. Got bitten by a gcc problem with inlining: no way to tell - * the compiler that the inline asm function 'memset' changes 'inode'. - * I've been searching for the bug for days, and was getting desperate. - * Finally looked at the assembler output... Grrr. - * - * The solution is the weird use of 'volatile'. Ho humm. Have to report - * it to the gcc lists, and hope we can do this more cleanly some day.. - */ -void clear_inode(struct inode * inode) +static void __wait_on_inode(struct inode * inode) { - struct wait_queue * wait; + struct wait_queue wait = { current, NULL }; + + add_wait_queue(&inode->i_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (inode->i_lock) { + schedule(); + goto repeat; + } + remove_wait_queue(&inode->i_wait, &wait); + current->state = TASK_RUNNING; +} + +/* Clear an inode of all it's identity, this is exported to the world. */ +void clear_inode(struct inode *inode) +{ + struct wait_queue *wait; + + /* So we don't disappear. */ + inode->i_count++; truncate_inode_pages(inode, 0); wait_on_inode(inode); - if (IS_WRITABLE(inode)) { - if (inode->i_sb && inode->i_sb->dq_op) - inode->i_sb->dq_op->drop(inode); - } - remove_inode_hash(inode); - remove_inode_free(inode); - wait = ((volatile struct inode *) inode)->i_wait; - if (inode->i_count) - nr_free_inodes++; - memset(inode,0,sizeof(*inode)); - ((volatile struct inode *) inode)->i_wait = wait; - insert_inode_free(inode); + if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) + inode->i_sb->dq_op->drop(inode); + + if(--inode->i_count > 0) + remove_inuse(inode); + else + remove_free_inode(inode); + unhash_inode(inode); + wait = inode->i_wait; + memset(inode, 0, sizeof(*inode)); barrier(); + inode->i_wait = wait; + put_inode_head(inode); /* Pages zapped, put at the front. */ } +/* These check the validity of a mount/umount type operation, we essentially + * check if there are any inodes hanging around which prevent this operation + * from occurring. We also clear out clean inodes referencing this device. + */ int fs_may_mount(kdev_t dev) { - struct inode * inode, * next; - int i; + struct inode *inode; + int pass = 0; - next = first_inode; - for (i = nr_inodes ; i > 0 ; i--) { - inode = next; - next = inode->i_next; /* clear_inode() changes the queues.. */ - if (inode->i_dev != dev) - continue; - if (inode->i_count || inode->i_dirt || inode->i_lock) + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(inode->i_dev != dev) + goto next; + if(inode->i_count || inode->i_dirt || inode->i_lock) return 0; clear_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } - return 1; + return 1; /* Tis' cool bro. */ } -int fs_may_umount(kdev_t dev, struct inode * mount_root) +int fs_may_umount(kdev_t dev, struct inode *iroot) { - struct inode * inode; - int i; + struct inode *inode; + int pass = 0; - inode = first_inode; - for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) { - if (inode->i_dev != dev || !inode->i_count) + inode = free_inodes.head; +repeat: + for(; inode; inode = inode->i_next) { + if(inode->i_dev != dev || !inode->i_count) continue; - if (inode == mount_root && inode->i_count == - (inode->i_mount != inode ? 1 : 2)) + if(inode == iroot && + (inode->i_count == (inode->i_mount == inode ? 2 : 1))) continue; return 0; } - return 1; + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; + } + return 1; /* Tis' cool bro. */ } +/* This belongs in file_table.c, not here... */ int fs_may_remount_ro(kdev_t dev) { struct file * file; @@ -237,79 +242,70 @@ int fs_may_remount_ro(kdev_t dev) if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2)) return 0; } - return 1; + return 1; /* Tis' cool bro. */ } -static void write_inode(struct inode * inode) +/* Reading/writing inodes. */ +static void write_inode(struct inode *inode) { - if (!inode->i_dirt) - return; - wait_on_inode(inode); - if (!inode->i_dirt) - return; - if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) { - inode->i_dirt = 0; - return; + if(inode->i_dirt) { + wait_on_inode(inode); + if(inode->i_dirt) { + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->write_inode) { + inode->i_lock = 1; + inode->i_sb->s_op->write_inode(inode); + unlock_inode(inode); + } else { + inode->i_dirt = 0; + } + } } - inode->i_lock = 1; - inode->i_sb->s_op->write_inode(inode); - unlock_inode(inode); } -static inline void read_inode(struct inode * inode) +static inline void read_inode(struct inode *inode) { - lock_inode(inode); - if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode) + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->read_inode) { + lock_inode(inode); inode->i_sb->s_op->read_inode(inode); - unlock_inode(inode); + unlock_inode(inode); + } } -/* POSIX UID/GID verification for setting inode attributes */ int inode_change_ok(struct inode *inode, struct iattr *attr) { - /* - * If force is set do it anyway. - */ - - if (attr->ia_valid & ATTR_FORCE) - return 0; + if(!(attr->ia_valid & ATTR_FORCE)) { + unsigned short fsuid = current->fsuid; + uid_t iuid = inode->i_uid; + int not_fsuser = !fsuser(); - /* Make sure a caller can chown */ - if ((attr->ia_valid & ATTR_UID) && - (current->fsuid != inode->i_uid || - attr->ia_uid != inode->i_uid) && !fsuser()) - return -EPERM; + if(((attr->ia_valid & ATTR_UID) && + ((fsuid != iuid) || + (attr->ia_uid != iuid)) && not_fsuser) || - /* Make sure caller can chgrp */ - if ((attr->ia_valid & ATTR_GID) && - (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) && - !fsuser()) - return -EPERM; + ((attr->ia_valid & ATTR_GID) && + (!in_group_p(attr->ia_gid) && + (attr->ia_gid != inode->i_gid))) || - /* Make sure a caller can chmod */ - if (attr->ia_valid & ATTR_MODE) { - if ((current->fsuid != inode->i_uid) && !fsuser()) + ((attr->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)) && + (fsuid != iuid) && not_fsuser)) return -EPERM; - /* Also check the setgid bit! */ - if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid : - inode->i_gid)) - attr->ia_mode &= ~S_ISGID; - } - /* Check for setting the inode time */ - if ((attr->ia_valid & ATTR_ATIME_SET) && - ((current->fsuid != inode->i_uid) && !fsuser())) - return -EPERM; - if ((attr->ia_valid & ATTR_MTIME_SET) && - ((current->fsuid != inode->i_uid) && !fsuser())) - return -EPERM; + if(attr->ia_valid & ATTR_MODE) { + gid_t grp; + if(fsuid != iuid && not_fsuser) + return -EPERM; + grp = attr->ia_valid & ATTR_GID ? attr->ia_gid : inode->i_gid; + if(not_fsuser && !in_group_p(grp)) + attr->ia_mode &= ~S_ISGID; + } + } return 0; } -/* - * Set the appropriate attributes from an attribute structure into - * the inode structure. - */ void inode_setattr(struct inode *inode, struct iattr *attr) { if (attr->ia_valid & ATTR_UID) @@ -332,17 +328,8 @@ void inode_setattr(struct inode *inode, struct iattr *attr) inode->i_dirt = 1; } -/* - * notify_change is called for inode-changing operations such as - * chown, chmod, utime, and truncate. It is guaranteed (unlike - * write_inode) to be called from the context of the user requesting - * the change. - */ - -int notify_change(struct inode * inode, struct iattr *attr) +int notify_change(struct inode *inode, struct iattr *attr) { - int retval; - attr->ia_ctime = CURRENT_TIME; if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { if (!(attr->ia_valid & ATTR_ATIME_SET)) @@ -351,303 +338,320 @@ int notify_change(struct inode * inode, struct iattr *attr) attr->ia_mtime = attr->ia_ctime; } - if (inode->i_sb && inode->i_sb->s_op && + if (inode->i_sb && + inode->i_sb->s_op && inode->i_sb->s_op->notify_change) return inode->i_sb->s_op->notify_change(inode, attr); - if ((retval = inode_change_ok(inode, attr)) != 0) - return retval; + if(inode_change_ok(inode, attr) != 0) + return -EPERM; inode_setattr(inode, attr); return 0; } -/* - * bmap is needed for demand-loading and paging: if this function - * doesn't exist for a filesystem, then those things are impossible: - * executables cannot be run from the filesystem etc... - * - * This isn't as bad as it sounds: the read-routines might still work, - * so the filesystem would be otherwise ok (for example, you might have - * a DOS filesystem, which doesn't lend itself to bmap very well, but - * you could still transfer files to/from the filesystem) - */ -int bmap(struct inode * inode, int block) +int bmap(struct inode *inode, int block) { - if (inode->i_op && inode->i_op->bmap) - return inode->i_op->bmap(inode,block); + if(inode->i_op && inode->i_op->bmap) + return inode->i_op->bmap(inode, block); return 0; } void invalidate_inodes(kdev_t dev) { - struct inode * inode, * next; - int i; + struct inode *inode; + int pass = 0; - next = first_inode; - for(i = nr_inodes ; i > 0 ; i--) { - inode = next; - next = inode->i_next; /* clear_inode() changes the queues.. */ - if (inode->i_dev != dev) - continue; - if (inode->i_count || inode->i_dirt || inode->i_lock) { - printk("VFS: inode busy on removed device %s\n", - kdevname(dev)); - continue; - } + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(inode->i_dev != dev) + goto next; clear_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } } void sync_inodes(kdev_t dev) { - int i; - struct inode * inode; + struct inode *inode; + int pass = 0; - inode = first_inode; - for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) { - if (dev && inode->i_dev != dev) - continue; + inode = free_inodes.head; +repeat: + while(inode) { + struct inode *next = inode->i_next; + if(dev && inode->i_dev != dev) + goto next; wait_on_inode(inode); - if (inode->i_dirt) - write_inode(inode); + write_inode(inode); + next: + inode = next; + } + if(pass == 0) { + inode = inuse_list; + pass = 1; + goto repeat; } } -void iput(struct inode * inode) +static struct wait_queue *inode_wait, *update_wait; + +void iput(struct inode *inode) { - if (!inode) + if(!inode) return; wait_on_inode(inode); - if (!inode->i_count) { - printk("VFS: iput: trying to free free inode\n"); - printk("VFS: device %s, inode %lu, mode=0%07o\n", - kdevname(inode->i_rdev), inode->i_ino, (int) inode->i_mode); + if(!inode->i_count) { + printk("VFS: Freeing free inode, tell DaveM\n"); return; } - if (inode->i_pipe) + if(inode->i_pipe) wake_up_interruptible(&PIPE_WAIT(*inode)); -repeat: - if (inode->i_count>1) { +we_slept: + if(inode->i_count > 1) { inode->i_count--; - return; + } else { + wake_up(&inode_wait); + if(inode->i_pipe) { + free_page((unsigned long)PIPE_BASE(*inode)); + PIPE_BASE(*inode) = NULL; + } + if(inode->i_sb && + inode->i_sb->s_op && + inode->i_sb->s_op->put_inode) { + inode->i_sb->s_op->put_inode(inode); + if(!inode->i_nlink) + return; + } + if(inode->i_dirt) { + write_inode(inode); + wait_on_inode(inode); + goto we_slept; + } + if(IS_WRITABLE(inode) && + inode->i_sb && + inode->i_sb->dq_op) { + inode->i_lock = 1; + inode->i_sb->dq_op->drop(inode); + unlock_inode(inode); + goto we_slept; + } + /* There is a serious race leading to here, watch out. */ + if(--inode->i_count == 0) { + remove_inuse(inode); + put_inode_last(inode); /* Place at end of LRU free queue */ + } } +} - wake_up(&inode_wait); - if (inode->i_pipe) { - unsigned long page = (unsigned long) PIPE_BASE(*inode); - PIPE_BASE(*inode) = NULL; - free_page(page); - } +static kmem_cache_t *inode_cachep; - if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) { - inode->i_sb->s_op->put_inode(inode); - if (!inode->i_nlink) - return; - } +static void grow_inodes(void) +{ + int i = 16; - if (inode->i_dirt) { - write_inode(inode); /* we can sleep - so do again */ - wait_on_inode(inode); - goto repeat; + while(i--) { + struct inode *inode; + + inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL); + if(!inode) + return; + memset(inode, 0, sizeof(*inode)); + put_inode_head(inode); + nr_inodes++; } +} - if (IS_WRITABLE(inode)) { - if (inode->i_sb && inode->i_sb->dq_op) { - /* Here we can sleep also. Let's do it again - * Dmitry Gorodchanin 02/11/96 - */ - inode->i_lock = 1; - inode->i_sb->dq_op->drop(inode); - unlock_inode(inode); - goto repeat; - } +/* We have to be really careful, it's really easy to run yourself into + * inefficient sequences of events. The first problem is that when you + * steal a non-referenced inode you run the risk of zaping a considerable + * number of page cache entries, which might get refernced once again. + * But if you are growing the inode set to quickly, you suck up ram + * and cause other problems. + * + * We approach the problem in the following way, we take two things into + * consideration. Firstly we take a look at how much we have "committed" + * to this inode already (i_nrpages), this accounts for the cost of getting + * those pages back if someone should reference that inode soon. We also + * attempt to factor in i_blocks, which says "how much of a problem could + * this potentially be". It still needs some tuning though. -DaveM + */ +#define BLOCK_FACTOR_SHIFT 5 /* It is not factored in as much. */ +static struct inode *find_best_candidate_weighted(struct inode *inode) +{ + struct inode *best = NULL; + + if(inode) { + unsigned long bestscore = 1000; + int limit = nr_free_inodes >> 2; + do { + if(!(inode->i_lock | inode->i_dirt)) { + int myscore = inode->i_nrpages; + + myscore += (inode->i_blocks >> BLOCK_FACTOR_SHIFT); + if(myscore < bestscore) { + bestscore = myscore; + best = inode; + } + } + inode = inode->i_next; + } while(inode && --limit); } - - inode->i_count--; + return best; +} - if (inode->i_mmap) { - printk("iput: inode %lu on device %s still has mappings.\n", - inode->i_ino, kdevname(inode->i_dev)); - inode->i_mmap = NULL; +static inline struct inode *find_best_free(struct inode *inode) +{ + if(inode) { + int limit = nr_free_inodes >> 5; + do { + if(!inode->i_nrpages) + return inode; + inode = inode->i_next; + } while(inode && --limit); } - - nr_free_inodes++; - return; + return NULL; } -struct inode * get_empty_inode(void) +struct inode *get_empty_inode(void) { static int ino = 0; - struct inode * inode, * best; - unsigned long badness; - int i; + struct inode *inode; - if (nr_inodes < max_inodes && nr_free_inodes < (nr_inodes >> 1)) - grow_inodes(); repeat: - inode = first_inode; - best = NULL; - badness = 1000; - for (i = nr_inodes/2; i > 0; i--,inode = inode->i_next) { - if (!inode->i_count) { - unsigned long i = 999; - if (!(inode->i_lock | inode->i_dirt)) - i = inode->i_nrpages; - if (i < badness) { - best = inode; - if (!i) - goto found_good; - badness = i; - } - } - } - if (nr_inodes < max_inodes) { - if (grow_inodes() == 0) - goto repeat; - best = NULL; - } - if (!best) { - printk("VFS: No free inodes - contact Linus\n"); - sleep_on(&inode_wait); + inode = find_best_free(free_inodes.head); + if(!inode) + goto pressure; +got_it: + inode->i_count++; + truncate_inode_pages(inode, 0); + wait_on_inode(inode); + if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) + inode->i_sb->dq_op->drop(inode); + unhash_inode(inode); + remove_free_inode(inode); + + memset(inode, 0, sizeof(*inode)); + inode->i_count = 1; + inode->i_nlink = 1; + inode->i_version = ++event; + sema_init(&inode->i_sem, 1); + inode->i_ino = ++ino; + inode->i_dev = 0; + put_inuse(inode); + return inode; +pressure: + if(nr_inodes < max_inodes) { + grow_inodes(); goto repeat; } - if (best->i_lock) { - wait_on_inode(best); + inode = find_best_candidate_weighted(free_inodes.head); + if(!inode) { + printk("VFS: No free inodes, contact DaveM\n"); + sleep_on(&inode_wait); goto repeat; } - if (best->i_dirt) { - write_inode(best); + if(inode->i_lock) { + wait_on_inode(inode); goto repeat; - } - if (best->i_count) + } else if(inode->i_dirt) { + write_inode(inode); goto repeat; -found_good: - clear_inode(best); - best->i_count = 1; - best->i_nlink = 1; - best->i_version = ++event; - best->i_sem.count = 1; - best->i_ino = ++ino; - best->i_dev = 0; - nr_free_inodes--; - if (nr_free_inodes < 0) { - printk ("VFS: get_empty_inode: bad free inode count.\n"); - nr_free_inodes = 0; } - return best; + goto got_it; } -struct inode * get_pipe_inode(void) +struct inode *get_pipe_inode(void) { - struct inode * inode; extern struct inode_operations pipe_inode_operations; - - if (!(inode = get_empty_inode())) - return NULL; - if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) { - iput(inode); - return NULL; + struct inode *inode = get_empty_inode(); + + if(inode) { + unsigned long page = __get_free_page(GFP_USER); + if(!page) { + iput(inode); + inode = NULL; + } else { + PIPE_BASE(*inode) = (char *) page; + inode->i_op = &pipe_inode_operations; + inode->i_count = 2; + PIPE_WAIT(*inode) = NULL; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_LOCK(*inode) = 0; + inode->i_pipe = 1; + inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = PAGE_SIZE; + } } - inode->i_op = &pipe_inode_operations; - inode->i_count = 2; /* sum of readers/writers */ - PIPE_WAIT(*inode) = NULL; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; - PIPE_LOCK(*inode) = 0; - inode->i_pipe = 1; - inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; return inode; } -struct inode *__iget(struct super_block * sb, int nr, int crossmntp) -{ - static struct wait_queue * update_wait = NULL; - struct inode_hash_entry * h; - struct inode * inode; - struct inode * empty = NULL; +static int inode_updating[INODE_HASHSZ]; - if (!sb) - panic("VFS: iget with sb==NULL"); - h = hash(sb->s_dev, nr); -repeat: - for (inode = h->inode; inode ; inode = inode->i_hash_next) - if (inode->i_dev == sb->s_dev && inode->i_ino == nr) - goto found_it; - if (!empty) { - /* - * If we sleep here before we have found an inode - * we need to make sure nobody does anything bad - * to the inode while we sleep, because otherwise - * we may return an inode that is not valid any - * more when we wake up.. - */ - h->updating++; - empty = get_empty_inode(); - if (!--h->updating) - wake_up(&update_wait); - if (empty) - goto repeat; - return (NULL); - } - inode = empty; - inode->i_sb = sb; - inode->i_dev = sb->s_dev; - inode->i_ino = nr; - inode->i_flags = sb->s_flags; - put_last_free(inode); - insert_inode_hash(inode); - read_inode(inode); - goto return_it; - -found_it: - if (!inode->i_count) - nr_free_inodes--; - inode->i_count++; - wait_on_inode(inode); - if (inode->i_dev != sb->s_dev || inode->i_ino != nr) { - printk("Whee.. inode changed from under us. Tell Linus\n"); - iput(inode); - goto repeat; - } - if (crossmntp && inode->i_mount) { - struct inode * tmp = inode->i_mount; - tmp->i_count++; - iput(inode); - inode = tmp; +struct inode *__iget(struct super_block *sb, int nr, int crossmntp) +{ + unsigned int hashent = hashfn(sb->s_dev, nr); + struct inode *inode, *empty = NULL; + +we_slept: + if((inode = find_inode(hashent, sb->s_dev, nr)) == NULL) { + if(empty == NULL) { + inode_updating[hashent]++; + empty = get_empty_inode(); + if(!--inode_updating[hashent]) + wake_up(&update_wait); + goto we_slept; + } + inode = empty; + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = nr; + inode->i_flags = sb->s_flags; + hash_inode(inode); + read_inode(inode); + } else { + if(!inode->i_count++) { + remove_free_inode(inode); + put_inuse(inode); + } wait_on_inode(inode); + if(crossmntp && inode->i_mount) { + struct inode *mp = inode->i_mount; + mp->i_count++; + iput(inode); + wait_on_inode(inode = mp); + } + if(empty) + iput(empty); } - if (empty) - iput(empty); - -return_it: - while (h->updating) + while(inode_updating[hashent]) sleep_on(&update_wait); return inode; } -/* - * The "new" scheduling primitives (new as of 0.97 or so) allow this to - * be done without disabling interrupts (other than in the actual queue - * updating things: only a couple of 386 instructions). This should be - * much better for interrupt latency. - */ -static void __wait_on_inode(struct inode * inode) +void inode_init(void) { - struct wait_queue wait = { current, NULL }; + int i; - add_wait_queue(&inode->i_wait, &wait); -repeat: - current->state = TASK_UNINTERRUPTIBLE; - if (inode->i_lock) { - schedule(); - goto repeat; - } - remove_wait_queue(&inode->i_wait, &wait); - current->state = TASK_RUNNING; + inode_cachep = kmem_cache_create("inode", sizeof(struct inode), + sizeof(unsigned long) * 4, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!inode_cachep) + panic("Cannot create inode SLAB cache\n"); + + for(i = 0; i < INODE_HASHSZ; i++) + inode_hash[i] = NULL; } |