summaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c888
1 files changed, 446 insertions, 442 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 724e8c4cd..b1d9bda4e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1,229 +1,234 @@
/*
- * linux/fs/inode.c
+ * linux/fs/inode.c: Keeping track of inodes.
*
- * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1997 David S. Miller
*/
-#include <linux/stat.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/string.h>
-#include <asm/system.h>
-
-#define NR_IHASH 512
-
-/*
- * Be VERY careful when you access the inode hash table. There
- * are some rather scary race conditions you need to take care of:
- * - P1 tries to open file "xx", calls "iget()" with the proper
- * inode number, but blocks because it's not on the list.
- * - P2 deletes file "xx", gets the inode (which P1 has just read,
- * but P1 hasn't woken up to the fact yet)
- * - P2 iput()'s the inode, which now has i_nlink = 0
- * - P1 wakes up and has the inode, but now P2 has made that
- * inode invalid (but P1 has no way of knowing that).
- *
- * The "updating" counter makes sure that when P1 blocks on the
- * iget(), P2 can't delete the inode from under it because P2
- * will wait until P1 has been able to update the inode usage
- * count so that the inode will stay in use until everybody has
- * closed it..
- */
-static struct inode_hash_entry {
- struct inode * inode;
- int updating;
-} hash_table[NR_IHASH];
-
-static struct inode * first_inode;
-static struct wait_queue * inode_wait = NULL;
-/* Keep these next two contiguous in memory for sysctl.c */
int nr_inodes = 0, nr_free_inodes = 0;
int max_inodes = NR_INODE;
-static inline int const hashfn(kdev_t dev, unsigned int i)
-{
- return (HASHDEV(dev) ^ i) % NR_IHASH;
-}
+#define INODE_HASHSZ 1024
-static inline struct inode_hash_entry * const hash(kdev_t dev, int i)
-{
- return hash_table + hashfn(dev, i);
-}
+static struct inode *inode_hash[INODE_HASHSZ];
-static inline void insert_inode_free(struct inode *inode)
-{
- struct inode * prev, * next = first_inode;
+/* All the details of hashing and lookup. */
+#define hashfn(dev, i) ((HASHDEV(dev) + ((i) ^ ((i) >> 10))) & (INODE_HASHSZ - 1))
- first_inode = inode;
- prev = next->i_prev;
- inode->i_next = next;
- inode->i_prev = prev;
- prev->i_next = inode;
- next->i_prev = inode;
+__inline__ void insert_inode_hash(struct inode *inode)
+{
+ struct inode **htable = &inode_hash[hashfn(inode->i_dev, inode->i_ino)];
+ if((inode->i_hash_next = *htable) != NULL)
+ (*htable)->i_hash_pprev = &inode->i_hash_next;
+ *htable = inode;
+ inode->i_hash_pprev = htable;
}
-static inline void remove_inode_free(struct inode *inode)
+#define hash_inode(inode) insert_inode_hash(inode)
+
+static inline void unhash_inode(struct inode *inode)
{
- if (first_inode == inode)
- first_inode = first_inode->i_next;
- if (inode->i_next)
- inode->i_next->i_prev = inode->i_prev;
- if (inode->i_prev)
- inode->i_prev->i_next = inode->i_next;
- inode->i_next = inode->i_prev = NULL;
+ if(inode->i_hash_pprev) {
+ if(inode->i_hash_next)
+ inode->i_hash_next->i_hash_pprev = inode->i_hash_pprev;
+ *(inode->i_hash_pprev) = inode->i_hash_next;
+ inode->i_hash_pprev = NULL;
+ }
}
-void insert_inode_hash(struct inode *inode)
+static inline struct inode *find_inode(unsigned int hashent,
+ kdev_t dev, unsigned long ino)
{
- struct inode_hash_entry *h;
- h = hash(inode->i_dev, inode->i_ino);
+ struct inode *inode;
- inode->i_hash_next = h->inode;
- inode->i_hash_prev = NULL;
- if (inode->i_hash_next)
- inode->i_hash_next->i_hash_prev = inode;
- h->inode = inode;
+ for(inode = inode_hash[hashent]; inode; inode = inode->i_hash_next)
+ if(inode->i_dev == dev && inode->i_ino == ino)
+ break;
+ return inode;
}
-static inline void remove_inode_hash(struct inode *inode)
-{
- struct inode_hash_entry *h;
- h = hash(inode->i_dev, inode->i_ino);
+/* Free list queue and management. */
+static struct free_inode_queue {
+ struct inode *head;
+ struct inode **last;
+} free_inodes = { NULL, &free_inodes.head };
- if (h->inode == inode)
- h->inode = inode->i_hash_next;
- if (inode->i_hash_next)
- inode->i_hash_next->i_hash_prev = inode->i_hash_prev;
- if (inode->i_hash_prev)
- inode->i_hash_prev->i_hash_next = inode->i_hash_next;
- inode->i_hash_prev = inode->i_hash_next = NULL;
+static inline void put_inode_head(struct inode *inode)
+{
+ if((inode->i_next = free_inodes.head) != NULL)
+ free_inodes.head->i_pprev = &inode->i_next;
+ else
+ free_inodes.last = &inode->i_next;
+ free_inodes.head = inode;
+ inode->i_pprev = &free_inodes.head;
+ nr_free_inodes++;
}
-static inline void put_last_free(struct inode *inode)
+static inline void put_inode_last(struct inode *inode)
{
- remove_inode_free(inode);
- inode->i_prev = first_inode->i_prev;
- inode->i_prev->i_next = inode;
- inode->i_next = first_inode;
- inode->i_next->i_prev = inode;
+ inode->i_next = NULL;
+ inode->i_pprev = free_inodes.last;
+ *free_inodes.last = inode;
+ free_inodes.last = &inode->i_next;
+ nr_free_inodes++;
}
-int grow_inodes(void)
+static inline void remove_free_inode(struct inode *inode)
{
- struct inode * inode;
- int i;
-
- if (!(inode = (struct inode*) get_free_page(GFP_KERNEL)))
- return -ENOMEM;
-
- i=PAGE_SIZE / sizeof(struct inode);
- nr_inodes += i;
- nr_free_inodes += i;
+ if(inode->i_pprev) {
+ if(inode->i_next)
+ inode->i_next->i_pprev = inode->i_pprev;
+ else
+ free_inodes.last = inode->i_pprev;
+ *inode->i_pprev = inode->i_next;
+ inode->i_pprev = NULL;
+ nr_free_inodes--;
+ }
+}
- if (!first_inode)
- inode->i_next = inode->i_prev = first_inode = inode++, i--;
+/* This is the in-use queue, if i_count > 0 (as far as we can tell)
+ * the sucker is here.
+ */
+static struct inode *inuse_list = NULL;
- for ( ; i ; i-- )
- insert_inode_free(inode++);
- return 0;
+static inline void put_inuse(struct inode *inode)
+{
+ if((inode->i_next = inuse_list) != NULL)
+ inuse_list->i_pprev = &inode->i_next;
+ inuse_list = inode;
+ inode->i_pprev = &inuse_list;
}
-unsigned long inode_init(unsigned long start, unsigned long end)
+static inline void remove_inuse(struct inode *inode)
{
- memset(hash_table, 0, sizeof(hash_table));
- first_inode = NULL;
- return start;
+ if(inode->i_pprev) {
+ if(inode->i_next)
+ inode->i_next->i_pprev = inode->i_pprev;
+ *inode->i_pprev = inode->i_next;
+ inode->i_pprev = NULL;
+ }
}
+/* Locking and unlocking inodes, plus waiting for locks to clear. */
static void __wait_on_inode(struct inode *);
-static inline void wait_on_inode(struct inode * inode)
+static inline void wait_on_inode(struct inode *inode)
{
- if (inode->i_lock)
+ if(inode->i_lock)
__wait_on_inode(inode);
}
-static inline void lock_inode(struct inode * inode)
+static inline void lock_inode(struct inode *inode)
{
- wait_on_inode(inode);
+ if(inode->i_lock)
+ __wait_on_inode(inode);
inode->i_lock = 1;
}
-static inline void unlock_inode(struct inode * inode)
+static inline void unlock_inode(struct inode *inode)
{
inode->i_lock = 0;
wake_up(&inode->i_wait);
}
-/*
- * Note that we don't want to disturb any wait-queues when we discard
- * an inode.
- *
- * Argghh. Got bitten by a gcc problem with inlining: no way to tell
- * the compiler that the inline asm function 'memset' changes 'inode'.
- * I've been searching for the bug for days, and was getting desperate.
- * Finally looked at the assembler output... Grrr.
- *
- * The solution is the weird use of 'volatile'. Ho humm. Have to report
- * it to the gcc lists, and hope we can do this more cleanly some day..
- */
-void clear_inode(struct inode * inode)
+static void __wait_on_inode(struct inode * inode)
{
- struct wait_queue * wait;
+ struct wait_queue wait = { current, NULL };
+
+ add_wait_queue(&inode->i_wait, &wait);
+repeat:
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (inode->i_lock) {
+ schedule();
+ goto repeat;
+ }
+ remove_wait_queue(&inode->i_wait, &wait);
+ current->state = TASK_RUNNING;
+}
+
+/* Clear an inode of all it's identity, this is exported to the world. */
+void clear_inode(struct inode *inode)
+{
+ struct wait_queue *wait;
+
+ /* So we don't disappear. */
+ inode->i_count++;
truncate_inode_pages(inode, 0);
wait_on_inode(inode);
- if (IS_WRITABLE(inode)) {
- if (inode->i_sb && inode->i_sb->dq_op)
- inode->i_sb->dq_op->drop(inode);
- }
- remove_inode_hash(inode);
- remove_inode_free(inode);
- wait = ((volatile struct inode *) inode)->i_wait;
- if (inode->i_count)
- nr_free_inodes++;
- memset(inode,0,sizeof(*inode));
- ((volatile struct inode *) inode)->i_wait = wait;
- insert_inode_free(inode);
+ if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
+ inode->i_sb->dq_op->drop(inode);
+
+ if(--inode->i_count > 0)
+ remove_inuse(inode);
+ else
+ remove_free_inode(inode);
+ unhash_inode(inode);
+ wait = inode->i_wait;
+ memset(inode, 0, sizeof(*inode)); barrier();
+ inode->i_wait = wait;
+ put_inode_head(inode); /* Pages zapped, put at the front. */
}
+/* These check the validity of a mount/umount type operation, we essentially
+ * check if there are any inodes hanging around which prevent this operation
+ * from occurring. We also clear out clean inodes referencing this device.
+ */
int fs_may_mount(kdev_t dev)
{
- struct inode * inode, * next;
- int i;
+ struct inode *inode;
+ int pass = 0;
- next = first_inode;
- for (i = nr_inodes ; i > 0 ; i--) {
- inode = next;
- next = inode->i_next; /* clear_inode() changes the queues.. */
- if (inode->i_dev != dev)
- continue;
- if (inode->i_count || inode->i_dirt || inode->i_lock)
+ inode = free_inodes.head;
+repeat:
+ while(inode) {
+ struct inode *next = inode->i_next;
+ if(inode->i_dev != dev)
+ goto next;
+ if(inode->i_count || inode->i_dirt || inode->i_lock)
return 0;
clear_inode(inode);
+ next:
+ inode = next;
+ }
+ if(pass == 0) {
+ inode = inuse_list;
+ pass = 1;
+ goto repeat;
}
- return 1;
+ return 1; /* Tis' cool bro. */
}
-int fs_may_umount(kdev_t dev, struct inode * mount_root)
+int fs_may_umount(kdev_t dev, struct inode *iroot)
{
- struct inode * inode;
- int i;
+ struct inode *inode;
+ int pass = 0;
- inode = first_inode;
- for (i=0 ; i < nr_inodes ; i++, inode = inode->i_next) {
- if (inode->i_dev != dev || !inode->i_count)
+ inode = free_inodes.head;
+repeat:
+ for(; inode; inode = inode->i_next) {
+ if(inode->i_dev != dev || !inode->i_count)
continue;
- if (inode == mount_root && inode->i_count ==
- (inode->i_mount != inode ? 1 : 2))
+ if(inode == iroot &&
+ (inode->i_count == (inode->i_mount == inode ? 2 : 1)))
continue;
return 0;
}
- return 1;
+ if(pass == 0) {
+ inode = inuse_list;
+ pass = 1;
+ goto repeat;
+ }
+ return 1; /* Tis' cool bro. */
}
+/* This belongs in file_table.c, not here... */
int fs_may_remount_ro(kdev_t dev)
{
struct file * file;
@@ -237,79 +242,70 @@ int fs_may_remount_ro(kdev_t dev)
if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2))
return 0;
}
- return 1;
+ return 1; /* Tis' cool bro. */
}
-static void write_inode(struct inode * inode)
+/* Reading/writing inodes. */
+static void write_inode(struct inode *inode)
{
- if (!inode->i_dirt)
- return;
- wait_on_inode(inode);
- if (!inode->i_dirt)
- return;
- if (!inode->i_sb || !inode->i_sb->s_op || !inode->i_sb->s_op->write_inode) {
- inode->i_dirt = 0;
- return;
+ if(inode->i_dirt) {
+ wait_on_inode(inode);
+ if(inode->i_dirt) {
+ if(inode->i_sb &&
+ inode->i_sb->s_op &&
+ inode->i_sb->s_op->write_inode) {
+ inode->i_lock = 1;
+ inode->i_sb->s_op->write_inode(inode);
+ unlock_inode(inode);
+ } else {
+ inode->i_dirt = 0;
+ }
+ }
}
- inode->i_lock = 1;
- inode->i_sb->s_op->write_inode(inode);
- unlock_inode(inode);
}
-static inline void read_inode(struct inode * inode)
+static inline void read_inode(struct inode *inode)
{
- lock_inode(inode);
- if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode)
+ if(inode->i_sb &&
+ inode->i_sb->s_op &&
+ inode->i_sb->s_op->read_inode) {
+ lock_inode(inode);
inode->i_sb->s_op->read_inode(inode);
- unlock_inode(inode);
+ unlock_inode(inode);
+ }
}
-/* POSIX UID/GID verification for setting inode attributes */
int inode_change_ok(struct inode *inode, struct iattr *attr)
{
- /*
- * If force is set do it anyway.
- */
-
- if (attr->ia_valid & ATTR_FORCE)
- return 0;
+ if(!(attr->ia_valid & ATTR_FORCE)) {
+ unsigned short fsuid = current->fsuid;
+ uid_t iuid = inode->i_uid;
+ int not_fsuser = !fsuser();
- /* Make sure a caller can chown */
- if ((attr->ia_valid & ATTR_UID) &&
- (current->fsuid != inode->i_uid ||
- attr->ia_uid != inode->i_uid) && !fsuser())
- return -EPERM;
+ if(((attr->ia_valid & ATTR_UID) &&
+ ((fsuid != iuid) ||
+ (attr->ia_uid != iuid)) && not_fsuser) ||
- /* Make sure caller can chgrp */
- if ((attr->ia_valid & ATTR_GID) &&
- (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) &&
- !fsuser())
- return -EPERM;
+ ((attr->ia_valid & ATTR_GID) &&
+ (!in_group_p(attr->ia_gid) &&
+ (attr->ia_gid != inode->i_gid))) ||
- /* Make sure a caller can chmod */
- if (attr->ia_valid & ATTR_MODE) {
- if ((current->fsuid != inode->i_uid) && !fsuser())
+ ((attr->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)) &&
+ (fsuid != iuid) && not_fsuser))
return -EPERM;
- /* Also check the setgid bit! */
- if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid :
- inode->i_gid))
- attr->ia_mode &= ~S_ISGID;
- }
- /* Check for setting the inode time */
- if ((attr->ia_valid & ATTR_ATIME_SET) &&
- ((current->fsuid != inode->i_uid) && !fsuser()))
- return -EPERM;
- if ((attr->ia_valid & ATTR_MTIME_SET) &&
- ((current->fsuid != inode->i_uid) && !fsuser()))
- return -EPERM;
+ if(attr->ia_valid & ATTR_MODE) {
+ gid_t grp;
+ if(fsuid != iuid && not_fsuser)
+ return -EPERM;
+ grp = attr->ia_valid & ATTR_GID ? attr->ia_gid : inode->i_gid;
+ if(not_fsuser && !in_group_p(grp))
+ attr->ia_mode &= ~S_ISGID;
+ }
+ }
return 0;
}
-/*
- * Set the appropriate attributes from an attribute structure into
- * the inode structure.
- */
void inode_setattr(struct inode *inode, struct iattr *attr)
{
if (attr->ia_valid & ATTR_UID)
@@ -332,17 +328,8 @@ void inode_setattr(struct inode *inode, struct iattr *attr)
inode->i_dirt = 1;
}
-/*
- * notify_change is called for inode-changing operations such as
- * chown, chmod, utime, and truncate. It is guaranteed (unlike
- * write_inode) to be called from the context of the user requesting
- * the change.
- */
-
-int notify_change(struct inode * inode, struct iattr *attr)
+int notify_change(struct inode *inode, struct iattr *attr)
{
- int retval;
-
attr->ia_ctime = CURRENT_TIME;
if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
if (!(attr->ia_valid & ATTR_ATIME_SET))
@@ -351,303 +338,320 @@ int notify_change(struct inode * inode, struct iattr *attr)
attr->ia_mtime = attr->ia_ctime;
}
- if (inode->i_sb && inode->i_sb->s_op &&
+ if (inode->i_sb &&
+ inode->i_sb->s_op &&
inode->i_sb->s_op->notify_change)
return inode->i_sb->s_op->notify_change(inode, attr);
- if ((retval = inode_change_ok(inode, attr)) != 0)
- return retval;
+ if(inode_change_ok(inode, attr) != 0)
+ return -EPERM;
inode_setattr(inode, attr);
return 0;
}
-/*
- * bmap is needed for demand-loading and paging: if this function
- * doesn't exist for a filesystem, then those things are impossible:
- * executables cannot be run from the filesystem etc...
- *
- * This isn't as bad as it sounds: the read-routines might still work,
- * so the filesystem would be otherwise ok (for example, you might have
- * a DOS filesystem, which doesn't lend itself to bmap very well, but
- * you could still transfer files to/from the filesystem)
- */
-int bmap(struct inode * inode, int block)
+int bmap(struct inode *inode, int block)
{
- if (inode->i_op && inode->i_op->bmap)
- return inode->i_op->bmap(inode,block);
+ if(inode->i_op && inode->i_op->bmap)
+ return inode->i_op->bmap(inode, block);
return 0;
}
void invalidate_inodes(kdev_t dev)
{
- struct inode * inode, * next;
- int i;
+ struct inode *inode;
+ int pass = 0;
- next = first_inode;
- for(i = nr_inodes ; i > 0 ; i--) {
- inode = next;
- next = inode->i_next; /* clear_inode() changes the queues.. */
- if (inode->i_dev != dev)
- continue;
- if (inode->i_count || inode->i_dirt || inode->i_lock) {
- printk("VFS: inode busy on removed device %s\n",
- kdevname(dev));
- continue;
- }
+ inode = free_inodes.head;
+repeat:
+ while(inode) {
+ struct inode *next = inode->i_next;
+ if(inode->i_dev != dev)
+ goto next;
clear_inode(inode);
+ next:
+ inode = next;
+ }
+ if(pass == 0) {
+ inode = inuse_list;
+ pass = 1;
+ goto repeat;
}
}
void sync_inodes(kdev_t dev)
{
- int i;
- struct inode * inode;
+ struct inode *inode;
+ int pass = 0;
- inode = first_inode;
- for(i = 0; i < nr_inodes*2; i++, inode = inode->i_next) {
- if (dev && inode->i_dev != dev)
- continue;
+ inode = free_inodes.head;
+repeat:
+ while(inode) {
+ struct inode *next = inode->i_next;
+ if(dev && inode->i_dev != dev)
+ goto next;
wait_on_inode(inode);
- if (inode->i_dirt)
- write_inode(inode);
+ write_inode(inode);
+ next:
+ inode = next;
+ }
+ if(pass == 0) {
+ inode = inuse_list;
+ pass = 1;
+ goto repeat;
}
}
-void iput(struct inode * inode)
+static struct wait_queue *inode_wait, *update_wait;
+
+void iput(struct inode *inode)
{
- if (!inode)
+ if(!inode)
return;
wait_on_inode(inode);
- if (!inode->i_count) {
- printk("VFS: iput: trying to free free inode\n");
- printk("VFS: device %s, inode %lu, mode=0%07o\n",
- kdevname(inode->i_rdev), inode->i_ino, (int) inode->i_mode);
+ if(!inode->i_count) {
+ printk("VFS: Freeing free inode, tell DaveM\n");
return;
}
- if (inode->i_pipe)
+ if(inode->i_pipe)
wake_up_interruptible(&PIPE_WAIT(*inode));
-repeat:
- if (inode->i_count>1) {
+we_slept:
+ if(inode->i_count > 1) {
inode->i_count--;
- return;
+ } else {
+ wake_up(&inode_wait);
+ if(inode->i_pipe) {
+ free_page((unsigned long)PIPE_BASE(*inode));
+ PIPE_BASE(*inode) = NULL;
+ }
+ if(inode->i_sb &&
+ inode->i_sb->s_op &&
+ inode->i_sb->s_op->put_inode) {
+ inode->i_sb->s_op->put_inode(inode);
+ if(!inode->i_nlink)
+ return;
+ }
+ if(inode->i_dirt) {
+ write_inode(inode);
+ wait_on_inode(inode);
+ goto we_slept;
+ }
+ if(IS_WRITABLE(inode) &&
+ inode->i_sb &&
+ inode->i_sb->dq_op) {
+ inode->i_lock = 1;
+ inode->i_sb->dq_op->drop(inode);
+ unlock_inode(inode);
+ goto we_slept;
+ }
+ /* There is a serious race leading to here, watch out. */
+ if(--inode->i_count == 0) {
+ remove_inuse(inode);
+ put_inode_last(inode); /* Place at end of LRU free queue */
+ }
}
+}
- wake_up(&inode_wait);
- if (inode->i_pipe) {
- unsigned long page = (unsigned long) PIPE_BASE(*inode);
- PIPE_BASE(*inode) = NULL;
- free_page(page);
- }
+static kmem_cache_t *inode_cachep;
- if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->put_inode) {
- inode->i_sb->s_op->put_inode(inode);
- if (!inode->i_nlink)
- return;
- }
+static void grow_inodes(void)
+{
+ int i = 16;
- if (inode->i_dirt) {
- write_inode(inode); /* we can sleep - so do again */
- wait_on_inode(inode);
- goto repeat;
+ while(i--) {
+ struct inode *inode;
+
+ inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
+ if(!inode)
+ return;
+ memset(inode, 0, sizeof(*inode));
+ put_inode_head(inode);
+ nr_inodes++;
}
+}
- if (IS_WRITABLE(inode)) {
- if (inode->i_sb && inode->i_sb->dq_op) {
- /* Here we can sleep also. Let's do it again
- * Dmitry Gorodchanin 02/11/96
- */
- inode->i_lock = 1;
- inode->i_sb->dq_op->drop(inode);
- unlock_inode(inode);
- goto repeat;
- }
+/* We have to be really careful, it's really easy to run yourself into
+ * inefficient sequences of events. The first problem is that when you
+ * steal a non-referenced inode you run the risk of zaping a considerable
+ * number of page cache entries, which might get refernced once again.
+ * But if you are growing the inode set to quickly, you suck up ram
+ * and cause other problems.
+ *
+ * We approach the problem in the following way, we take two things into
+ * consideration. Firstly we take a look at how much we have "committed"
+ * to this inode already (i_nrpages), this accounts for the cost of getting
+ * those pages back if someone should reference that inode soon. We also
+ * attempt to factor in i_blocks, which says "how much of a problem could
+ * this potentially be". It still needs some tuning though. -DaveM
+ */
+#define BLOCK_FACTOR_SHIFT 5 /* It is not factored in as much. */
+static struct inode *find_best_candidate_weighted(struct inode *inode)
+{
+ struct inode *best = NULL;
+
+ if(inode) {
+ unsigned long bestscore = 1000;
+ int limit = nr_free_inodes >> 2;
+ do {
+ if(!(inode->i_lock | inode->i_dirt)) {
+ int myscore = inode->i_nrpages;
+
+ myscore += (inode->i_blocks >> BLOCK_FACTOR_SHIFT);
+ if(myscore < bestscore) {
+ bestscore = myscore;
+ best = inode;
+ }
+ }
+ inode = inode->i_next;
+ } while(inode && --limit);
}
-
- inode->i_count--;
+ return best;
+}
- if (inode->i_mmap) {
- printk("iput: inode %lu on device %s still has mappings.\n",
- inode->i_ino, kdevname(inode->i_dev));
- inode->i_mmap = NULL;
+static inline struct inode *find_best_free(struct inode *inode)
+{
+ if(inode) {
+ int limit = nr_free_inodes >> 5;
+ do {
+ if(!inode->i_nrpages)
+ return inode;
+ inode = inode->i_next;
+ } while(inode && --limit);
}
-
- nr_free_inodes++;
- return;
+ return NULL;
}
-struct inode * get_empty_inode(void)
+struct inode *get_empty_inode(void)
{
static int ino = 0;
- struct inode * inode, * best;
- unsigned long badness;
- int i;
+ struct inode *inode;
- if (nr_inodes < max_inodes && nr_free_inodes < (nr_inodes >> 1))
- grow_inodes();
repeat:
- inode = first_inode;
- best = NULL;
- badness = 1000;
- for (i = nr_inodes/2; i > 0; i--,inode = inode->i_next) {
- if (!inode->i_count) {
- unsigned long i = 999;
- if (!(inode->i_lock | inode->i_dirt))
- i = inode->i_nrpages;
- if (i < badness) {
- best = inode;
- if (!i)
- goto found_good;
- badness = i;
- }
- }
- }
- if (nr_inodes < max_inodes) {
- if (grow_inodes() == 0)
- goto repeat;
- best = NULL;
- }
- if (!best) {
- printk("VFS: No free inodes - contact Linus\n");
- sleep_on(&inode_wait);
+ inode = find_best_free(free_inodes.head);
+ if(!inode)
+ goto pressure;
+got_it:
+ inode->i_count++;
+ truncate_inode_pages(inode, 0);
+ wait_on_inode(inode);
+ if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op)
+ inode->i_sb->dq_op->drop(inode);
+ unhash_inode(inode);
+ remove_free_inode(inode);
+
+ memset(inode, 0, sizeof(*inode));
+ inode->i_count = 1;
+ inode->i_nlink = 1;
+ inode->i_version = ++event;
+ sema_init(&inode->i_sem, 1);
+ inode->i_ino = ++ino;
+ inode->i_dev = 0;
+ put_inuse(inode);
+ return inode;
+pressure:
+ if(nr_inodes < max_inodes) {
+ grow_inodes();
goto repeat;
}
- if (best->i_lock) {
- wait_on_inode(best);
+ inode = find_best_candidate_weighted(free_inodes.head);
+ if(!inode) {
+ printk("VFS: No free inodes, contact DaveM\n");
+ sleep_on(&inode_wait);
goto repeat;
}
- if (best->i_dirt) {
- write_inode(best);
+ if(inode->i_lock) {
+ wait_on_inode(inode);
goto repeat;
- }
- if (best->i_count)
+ } else if(inode->i_dirt) {
+ write_inode(inode);
goto repeat;
-found_good:
- clear_inode(best);
- best->i_count = 1;
- best->i_nlink = 1;
- best->i_version = ++event;
- best->i_sem.count = 1;
- best->i_ino = ++ino;
- best->i_dev = 0;
- nr_free_inodes--;
- if (nr_free_inodes < 0) {
- printk ("VFS: get_empty_inode: bad free inode count.\n");
- nr_free_inodes = 0;
}
- return best;
+ goto got_it;
}
-struct inode * get_pipe_inode(void)
+struct inode *get_pipe_inode(void)
{
- struct inode * inode;
extern struct inode_operations pipe_inode_operations;
-
- if (!(inode = get_empty_inode()))
- return NULL;
- if (!(PIPE_BASE(*inode) = (char*) __get_free_page(GFP_USER))) {
- iput(inode);
- return NULL;
+ struct inode *inode = get_empty_inode();
+
+ if(inode) {
+ unsigned long page = __get_free_page(GFP_USER);
+ if(!page) {
+ iput(inode);
+ inode = NULL;
+ } else {
+ PIPE_BASE(*inode) = (char *) page;
+ inode->i_op = &pipe_inode_operations;
+ inode->i_count = 2;
+ PIPE_WAIT(*inode) = NULL;
+ PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
+ PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ PIPE_LOCK(*inode) = 0;
+ inode->i_pipe = 1;
+ inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = PAGE_SIZE;
+ }
}
- inode->i_op = &pipe_inode_operations;
- inode->i_count = 2; /* sum of readers/writers */
- PIPE_WAIT(*inode) = NULL;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
- PIPE_LOCK(*inode) = 0;
- inode->i_pipe = 1;
- inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
return inode;
}
-struct inode *__iget(struct super_block * sb, int nr, int crossmntp)
-{
- static struct wait_queue * update_wait = NULL;
- struct inode_hash_entry * h;
- struct inode * inode;
- struct inode * empty = NULL;
+static int inode_updating[INODE_HASHSZ];
- if (!sb)
- panic("VFS: iget with sb==NULL");
- h = hash(sb->s_dev, nr);
-repeat:
- for (inode = h->inode; inode ; inode = inode->i_hash_next)
- if (inode->i_dev == sb->s_dev && inode->i_ino == nr)
- goto found_it;
- if (!empty) {
- /*
- * If we sleep here before we have found an inode
- * we need to make sure nobody does anything bad
- * to the inode while we sleep, because otherwise
- * we may return an inode that is not valid any
- * more when we wake up..
- */
- h->updating++;
- empty = get_empty_inode();
- if (!--h->updating)
- wake_up(&update_wait);
- if (empty)
- goto repeat;
- return (NULL);
- }
- inode = empty;
- inode->i_sb = sb;
- inode->i_dev = sb->s_dev;
- inode->i_ino = nr;
- inode->i_flags = sb->s_flags;
- put_last_free(inode);
- insert_inode_hash(inode);
- read_inode(inode);
- goto return_it;
-
-found_it:
- if (!inode->i_count)
- nr_free_inodes--;
- inode->i_count++;
- wait_on_inode(inode);
- if (inode->i_dev != sb->s_dev || inode->i_ino != nr) {
- printk("Whee.. inode changed from under us. Tell Linus\n");
- iput(inode);
- goto repeat;
- }
- if (crossmntp && inode->i_mount) {
- struct inode * tmp = inode->i_mount;
- tmp->i_count++;
- iput(inode);
- inode = tmp;
+struct inode *__iget(struct super_block *sb, int nr, int crossmntp)
+{
+ unsigned int hashent = hashfn(sb->s_dev, nr);
+ struct inode *inode, *empty = NULL;
+
+we_slept:
+ if((inode = find_inode(hashent, sb->s_dev, nr)) == NULL) {
+ if(empty == NULL) {
+ inode_updating[hashent]++;
+ empty = get_empty_inode();
+ if(!--inode_updating[hashent])
+ wake_up(&update_wait);
+ goto we_slept;
+ }
+ inode = empty;
+ inode->i_sb = sb;
+ inode->i_dev = sb->s_dev;
+ inode->i_ino = nr;
+ inode->i_flags = sb->s_flags;
+ hash_inode(inode);
+ read_inode(inode);
+ } else {
+ if(!inode->i_count++) {
+ remove_free_inode(inode);
+ put_inuse(inode);
+ }
wait_on_inode(inode);
+ if(crossmntp && inode->i_mount) {
+ struct inode *mp = inode->i_mount;
+ mp->i_count++;
+ iput(inode);
+ wait_on_inode(inode = mp);
+ }
+ if(empty)
+ iput(empty);
}
- if (empty)
- iput(empty);
-
-return_it:
- while (h->updating)
+ while(inode_updating[hashent])
sleep_on(&update_wait);
return inode;
}
-/*
- * The "new" scheduling primitives (new as of 0.97 or so) allow this to
- * be done without disabling interrupts (other than in the actual queue
- * updating things: only a couple of 386 instructions). This should be
- * much better for interrupt latency.
- */
-static void __wait_on_inode(struct inode * inode)
+void inode_init(void)
{
- struct wait_queue wait = { current, NULL };
+ int i;
- add_wait_queue(&inode->i_wait, &wait);
-repeat:
- current->state = TASK_UNINTERRUPTIBLE;
- if (inode->i_lock) {
- schedule();
- goto repeat;
- }
- remove_wait_queue(&inode->i_wait, &wait);
- current->state = TASK_RUNNING;
+ inode_cachep = kmem_cache_create("inode", sizeof(struct inode),
+ sizeof(unsigned long) * 4,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if(!inode_cachep)
+ panic("Cannot create inode SLAB cache\n");
+
+ for(i = 0; i < INODE_HASHSZ; i++)
+ inode_hash[i] = NULL;
}