summaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c1162
1 files changed, 959 insertions, 203 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index f6ab04693..0472487e0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1,283 +1,1039 @@
/*
- * linux/fs/dcache.c
+ * fs/dcache.c
*
- * (C) Copyright 1994 Linus Torvalds
+ * Complete reimplementation
+ * (C) 1997 Thomas Schoebel-Theuer
*/
-/* Speeded up searches a bit and threaded the mess. -DaveM */
+/* The new dcache is exclusively called from the VFS, not from
+ * the specific fs'es any more. Despite having the same name as in the
+ * old code, it has less to do with it.
+ *
+ * It serves many purposes:
+ *
+ * 1) Any inode that has been retrieved with lookup() and is in use
+ * (i_count>0), has access to its full absolute path name, by going
+ * to inode->i_dentry and then recursively following the entry->d_parent
+ * chain. Use d_path() as predefined method for that.
+ * You may find out the corresponding inode belonging to
+ * a dentry by calling d_inode(). This can be used as an easy way for
+ * determining .. and its absolute pathname, an old UNIX problem that
+ * deserved a solution for a long time.
+ * Note that hardlinked inodes may have multiple dentries assigned to
+ * (via the d_next chain), reflecting multiple alias pathnames.
+ *
+ * 2) If not disabled by filesystem types specifying FS_NO_DCACHE,
+ * the dentries of unused (aged) inodes are retained for speeding up
+ * lookup()s, by allowing hashed inquiry starting from the dentry of
+ * the parent directory.
+ *
+ * 3) It can remeber so-called "negative entries", that is dentries for
+ * pathnames that are known to *not* exist, so unneccessary repeated
+ * lookup()s for non-existant names can be saved.
+ *
+ * 4) It provides a means for keeping deleted files (inode->i_nlink==0)
+ * accessible in the so-called *basket*. Inodes in the basket have been
+ * removed with unlink() while being in use (i_count>0), so they would
+ * normally use up space on the disk and be accessile through their
+ * filedescriptor, but would not be accessible for lookup() any more.
+ * The basket simply keeps such files in the dcache (for potential
+ * dcache lookup) until they are either eventually removed completely,
+ * or transferred to the second-level basket, the so-called *ibasket*.
+ * The ibasket is implemented in the new inode code, on request of
+ * filesystem types that have the flag FS_IBASKET set, and proliferates
+ * the unlinked files when i_count has gone to zero, at least as long
+ * as there is space on the disk and enough inodes remain available
+ * and no umount() has started.
+ *
+ * 5) Preliminary dentries can be added by readdir(). While normal dentries
+ * directly point to the inode via u.d_inode only the inode number is
+ * known from readdir(), but not more. They can be converted to
+ * normal dentries by using d_inode().
+ */
/*
- * The directory cache is a "two-level" cache, each level doing LRU on
- * its entries. Adding new entries puts them at the end of the LRU
- * queue on the first-level cache, while the second-level cache is
- * fed by any cache hits.
+ * Notes on the allocation strategy:
*
- * The idea is that new additions (from readdir(), for example) will not
- * flush the cache of entries that have really been used.
- *
- * There is a global hash-table over both caches that hashes the entries
- * based on the directory inode number and device as well as on a
- * string-hash computed over the name.
+ * The dcache is a full slave cache of the inodes. Whenever an inode
+ * is cleared, all the dentries associated with it will recursively
+ * disappear. dentries have no own reference counting; this has to
+ * be obeyed for SMP.
+ * If directories could go out of inode cache while
+ * successors are alive, this would interrupt the d_parent chain of
+ * the live successors. To prevent this without using zombies, all
+ * directories are thus prevented from __iput() as long as successors
+ * are alive.
*/
-#include <linux/fs.h>
+#include <linux/config.h>
#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/dalloc.h>
+#include <linux/dlists.h>
-#include <asm/unaligned.h>
-#include <asm/spinlock.h>
+/* this should be removed after the beta phase */
+/* #define DEBUG */
+/*#undef DEBUG*/
+/* #define DEBUG_DDIR_COUNT */
-spinlock_t dcache_lock = SPIN_LOCK_UNLOCKED;
+#define D_HASHSIZE 64
-/*
- * Don't bother caching long names.. They just take up space in the cache, and
- * for a name cache you just want to cache the "normal" names anyway which tend
- * to be short.
+/* local flags for d_flag */
+#define D_DIR 32
+#define D_HASHED 64
+#define D_ZOMBIE 128
+#define D_PRELIMINARY 256
+#define D_INC_DDIR 512
+
+/* local flags for d_del() */
+#define D_RECURSIVE 4
+#define D_NO_FREE 8
+
+/* adjust these constants if you know a probability distribution ... */
+#define D_SMALL 16
+#define D_MEDIUM 64
+#define D_LARGE 256
+#define D_HUGE D_MAXLEN
+
+#define BASE_DHEADER(x) (struct dheader*)((unsigned long)(x) & ~(PAGE_SIZE-1))
+#define BYTE_ADD(x,n) (void*)((char*)(x) + (n))
+#define BYTE_SUB(x,n) (void*)((char*)(x) - (n))
+
+/* This is for global allocation of dentries. Remove this when
+ * converting to SLAB.
*/
-#define DCACHE_NAME_LEN 15
-#define DCACHE_SIZE 1024
-#define DCACHE_HASH_QUEUES 256 /* keep this a pow2 */
+struct dheader {
+ struct dentry * emptylist;
+ short free, maxfree;
+ struct dheader * next;
+ struct dheader * prev;
+};
-/*
- * The dir_cache_entry must be in this order: we do ugly things with the pointers
+struct anchors {
+ struct dheader * free; /* each contains at least 1 empty dentry */
+ struct dheader * full; /* all the used up ones */
+ struct dheader * dir_free;
+ struct dheader * dir_full;
+};
+
+/* This is only used for directory dentries. Think of it as an extension
+ * of the dentry.
+ * It is defined as separate struct, so it uses up space only
+ * where necessary.
*/
-struct dir_cache_entry {
- struct dir_cache_entry *next;
- struct dir_cache_entry **pprev;
- kdev_t dc_dev;
- unsigned long dir;
- unsigned long version;
- unsigned long ino;
- unsigned char name_len;
- char name[DCACHE_NAME_LEN];
- struct dir_cache_entry ** lru_head;
- struct dir_cache_entry * next_lru, * prev_lru;
+struct ddir {
+ struct dentry * dd_hashtable[D_HASHSIZE];
+ struct dentry * dd_neglist;
+ struct dentry * dd_basketlist;
+ struct dentry * dd_zombielist;
+ unsigned short dd_alloced; /* # d_alloc()ed, but not yet d_add()ed */
+ unsigned short dd_hashed; /* # of entries in hashtable */
+ unsigned short dd_true_hashed; /* # non-preliminaries in hashtable */
+ unsigned short dd_negs; /* # of negative entries */
};
-#define dcache_offset(x) ((unsigned long)&((struct dir_cache_entry*)0)->x)
-#define dcache_datalen (dcache_offset(lru_head) - dcache_offset(dc_dev))
+DEF_INSERT(header,struct dheader,next,prev)
+DEF_REMOVE(header,struct dheader,next,prev)
-#define COPYDATA(de, newde) \
-memcpy((void *) &newde->dc_dev, (void *) &de->dc_dev, dcache_datalen)
+DEF_INSERT(alias,struct dentry,d_next,d_prev)
+DEF_REMOVE(alias,struct dentry,d_next,d_prev)
-static struct dir_cache_entry level1_cache[DCACHE_SIZE];
-static struct dir_cache_entry level2_cache[DCACHE_SIZE];
+DEF_INSERT(hash,struct dentry,d_hash_next,d_hash_prev)
+DEF_REMOVE(hash,struct dentry,d_hash_next,d_hash_prev)
-/*
- * The LRU-lists are doubly-linked circular lists, and do not change in size
- * so these pointers always have something to point to (after _init)
- */
-static struct dir_cache_entry * level1_head;
-static struct dir_cache_entry * level2_head;
+DEF_INSERT(basket,struct dentry,d_basket_next,d_basket_prev)
+DEF_REMOVE(basket,struct dentry,d_basket_next,d_basket_prev)
-/* The hash queues are layed out in a slightly different manner. */
-static struct dir_cache_entry *hash_table[DCACHE_HASH_QUEUES];
+static struct anchors anchors[4];
-#define hash_fn(dev,dir,namehash) \
- ((HASHDEV(dev) ^ (dir) ^ (namehash)) & (DCACHE_HASH_QUEUES - 1))
+struct dentry * the_root = NULL;
-/*
- * Stupid name"hash" algorithm. Write something better if you want to,
- * but I doubt it matters that much.
+unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end)
+{
+ memset(anchors, 0, sizeof(anchors));
+ return mem_start;
+}
+
+#ifdef DEBUG
+/* throw this away after the beta phase */
+/*************************************************************************/
+extern void xcheck(char * txt, struct inode * p);
+
+static int x_alloc = 0;
+static int x_freed = 0;
+static int x_free = 0;
+
+static void * tst[20000];
+static int cnt = 0;
+
+static void ins(void* ptr)
+{
+ extern int inodes_stat;
+ tst[cnt++] = ptr;
+ if(cnt % 1000 == 0)
+ printk("------%d allocated: %d: %d %d %d\n", inodes_stat, cnt,
+ x_alloc, x_freed, x_free);
+ if(cnt>=20000) panic("stop");
+}
+
+#if 0
+static inline int search(void* ptr)
+{
+ int i;
+ for(i = cnt-1; i>=0; i--)
+ if(tst[i] == ptr)
+ return i;
+ return -1;
+}
+
+#define TST(n,x) if(search(x)<0) printk("%s bad ptr %p line %d\n", n, x, __LINE__)
+#else
+#define TST(n,x) /*nothing*/
+#endif
+
+void LOG(char * txt, struct dentry * entry)
+{
+ static int count = 0;
+ if(entry) {
+ TST(txt,entry);
+ }
+ if(count) {
+ count--;
+ printk("%s: entry=%p\n", txt, entry);
+ }
+}
+
+#ifdef DEBUG_DDIR_COUNT
+static struct ddir * d_dir(struct dentry * entry);
+void recursive_test(struct dentry * entry)
+{
+ int i;
+ struct ddir * ddir = d_dir(entry);
+ int sons = 0;
+
+ if(ddir->dd_zombielist)
+ sons++;
+ for(i=0; i < D_HASHSIZE; i++) {
+ struct dentry ** base = &ddir->dd_hashtable[i];
+ struct dentry * tmp = *base;
+ if(tmp) do {
+ TST("__clear",tmp);
+ if(!(tmp->d_flag & D_HASHED)) {
+ printk("VFS: dcache entry not hashed!\n");
+ printpath(*base); printk("\n");
+ printpath(tmp);
+ }
+ if(!(tmp->d_flag & D_PRELIMINARY))
+ sons++;
+ if(tmp->d_flag & D_DIR)
+ recursive_test(tmp);
+ tmp = tmp->d_hash_next;
+ } while(tmp && tmp != *base);
+ }
+ if(!sons && !(entry->d_flag & D_PRELIMINARY) && entry->u.d_inode) {
+ struct inode * inode = entry->u.d_inode;
+ if(!atomic_read(&inode->i_count)) {
+ if(!(inode->i_status & 1/*ST_AGED*/)) {
+ printpath(entry);
+ printk(" is not aged!\n");
+ }
+ if(inode->i_ddir_count) {
+ printpath(entry);
+ printk(" has ddir_count blockage!\n");
+ }
+ }
+ }
+}
+#else
+#define recursive_test(e) /*nothing*/
+#endif
+#else
+#define TST(n,x) /*nothing*/
+#define LOG(n,x) /*nothing*/
+#define xcheck(t,i) /*nothing*/
+#define recursive_test(e) /*nothing*/
+/*****************************************************************************/
+#endif
+
+void printpath(struct dentry * entry)
+{
+ if(!IS_ROOT(entry))
+ printpath(entry->d_parent);
+ printk("/%s", entry->d_name);
+}
+
+static inline long has_sons(struct ddir * ddir)
+{
+ return ((ddir->dd_alloced | ddir->dd_hashed) ||
+ ddir->dd_neglist ||
+ ddir->dd_basketlist ||
+ ddir->dd_zombielist);
+}
+
+static inline int has_true_sons(struct ddir * ddir)
+{
+ return (ddir->dd_alloced | ddir->dd_true_hashed);
+}
+
+/* Only hold the i_ddir_count pseudo refcount when neccessary (i.e. when
+ * they have true_sons), to prevent keeping too much dir inodes in use.
*/
-static unsigned long namehash(const char * name, int len)
+static inline void inc_ddir(struct dentry * entry, struct inode * inode)
{
- unsigned long hash = 0;
+ if(!(entry->d_flag & D_INC_DDIR)) {
+ entry->d_flag |= D_INC_DDIR;
+#ifdef DEBUG
+ if(inode->i_ddir_count) {
+ printpath(entry);
+ printk(" ddir_count=%d\n", inode->i_ddir_count);
+ }
+#endif
+ inode->i_ddir_count++;
+ _get_inode(inode);
+ }
+}
- while ((len -= sizeof(unsigned long)) > 0) {
- hash += get_unaligned((unsigned long *)name);
- name += sizeof(unsigned long);
+static inline blocking void dec_ddir(struct dentry * entry, struct inode * inode)
+{
+ if(entry->d_flag & D_INC_DDIR) {
+ entry->d_flag &= ~D_INC_DDIR;
+ inode->i_ddir_count--;
+ if(!inode->i_ddir_count)
+ __iput(inode);
}
- return hash +
- (get_unaligned((unsigned long *)name) &
- ~(~0UL << ((len + sizeof(unsigned long)) << 3)));
}
-static inline struct dir_cache_entry **get_hlist(struct inode *dir,
- const char *name, int len)
+/* Do not inline this many times. */
+static void d_panic(void)
{
- return hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name, len));
+ panic("VFS: dcache directory corruption");
}
-static inline void remove_lru(struct dir_cache_entry * de)
+static inline struct ddir * d_dir(struct dentry * entry)
{
- struct dir_cache_entry * next = de->next_lru;
- struct dir_cache_entry * prev = de->prev_lru;
+ struct ddir * res = BYTE_SUB(entry, sizeof(struct ddir));
- next->prev_lru = prev;
- prev->next_lru = next;
+ if(!(entry->d_flag & D_DIR))
+ d_panic();
+#ifdef DEBUG
+ if(!entry)
+ panic("entry NULL!");
+ if(BASE_DHEADER(res) != BASE_DHEADER(entry))
+ printk("Scheisse!!!\n");
+#endif
+ return res;
}
-static inline void add_lru(struct dir_cache_entry * de, struct dir_cache_entry *head)
+static /*inline*/ struct dheader * dinit(int isdir, int size)
{
- struct dir_cache_entry * prev = head->prev_lru;
+ struct dheader * res = (struct dheader*)__get_free_page(GFP_KERNEL);
+ int restlen = PAGE_SIZE - sizeof(struct dheader);
+ struct dentry * ptr = BYTE_ADD(res, sizeof(struct dheader));
- de->next_lru = head;
- de->prev_lru = prev;
- prev->next_lru = de;
- head->prev_lru = de;
+ if(!res)
+ return NULL;
+ memset(res, 0, sizeof(struct dheader));
+ if(isdir) {
+ ptr = BYTE_ADD(ptr, sizeof(struct ddir));
+ size += sizeof(struct ddir);
+ }
+ if(BASE_DHEADER(ptr) != res)
+ panic("Bad kernel page alignment");
+ size += sizeof(struct dentry) - D_MAXLEN;
+ res->emptylist = NULL;
+ res->free = 0;
+ while(restlen >= size) {
+#ifdef DEBUG
+ ins(ptr);
+ if(BASE_DHEADER(ptr) != res)
+ panic("Wrong dinit!");
+#endif
+ ptr->d_next = res->emptylist;
+ res->emptylist = ptr;
+ ptr = BYTE_ADD(ptr, size);
+ res->free++;
+ restlen -= size;
+ }
+ res->maxfree = res->free;
+ return res;
}
-static inline void update_lru(struct dir_cache_entry * de)
+static /*inline*/ struct dentry * __dalloc(struct anchors * anchor,
+ struct dentry * parent, int isdir,
+ int len, int size)
{
- if (de == *de->lru_head)
- *de->lru_head = de->next_lru;
- else {
- remove_lru(de);
- add_lru(de,*de->lru_head);
+ struct dheader ** free = isdir ? &anchor->dir_free : &anchor->free;
+ struct dheader ** full = isdir ? &anchor->dir_full : &anchor->full;
+ struct dheader * base = *free;
+ struct dentry * res;
+
+ if(!base) {
+ base = dinit(isdir, size);
+ if(!base)
+ return NULL;
+ insert_header(free, base);
+ }
+ base->free--;
+ res = base->emptylist;
+ if(!(base->emptylist = res->d_next)) {
+ remove_header(free, base);
+ insert_header(full, base);
+ }
+ memset(res, 0, sizeof(struct dentry) - D_MAXLEN);
+ if(isdir) {
+ res->d_flag = D_DIR;
+ memset(d_dir(res), 0, sizeof(struct ddir));
}
+ res->d_len = len;
+ res->d_parent = parent;
+ if(parent) {
+ struct ddir * pdir = d_dir(parent);
+#ifdef DEBUG
+ if(pdir->dd_alloced > 1 && !IS_ROOT(parent)) {
+ printpath(parent);
+ printk(" dd_alloced=%d\n", pdir->dd_alloced);
+ }
+#endif
+ pdir->dd_alloced++;
+ }
+#ifdef DEBUG
+ x_alloc++;
+#endif
+ return res;
}
-/*
- * Hash queue manipulation. Look out for the casts..
- *
- * What casts? 8-) -DaveM
- */
-static inline void remove_hash(struct dir_cache_entry * de)
+struct dentry * d_alloc(struct dentry * parent, int len, int isdir)
{
- if(de->pprev) {
- if(de->next)
- de->next->pprev = de->pprev;
- *de->pprev = de->next;
- de->pprev = NULL;
+ int i, size;
+
+#ifdef DEBUG
+ if(the_root)
+ recursive_test(the_root);
+ LOG("d_alloc", parent);
+#endif
+ if(len >= D_MEDIUM) {
+ if(len >= D_LARGE) {
+ i = 3;
+ size = D_HUGE;
+ } else {
+ i = 2;
+ size = D_LARGE;
+ }
+ } else if(len >= D_SMALL) {
+ i = 1;
+ size = D_MEDIUM;
+ } else {
+ i = 0;
+ size = D_SMALL;
}
+ return __dalloc(&anchors[i], parent, isdir, len, size);
}
-static inline void add_hash(struct dir_cache_entry * de, struct dir_cache_entry ** hash)
+extern blocking struct dentry * d_alloc_root(struct inode * root_inode)
{
- if((de->next = *hash) != NULL)
- (*hash)->pprev = &de->next;
- *hash = de;
- de->pprev = hash;
+ struct dentry * res = the_root;
+
+ if(res) {
+ d_del(res, D_NO_CLEAR_INODE); /* invalidate everything beyond */
+ } else {
+ struct ddir * ddir;
+
+ the_root = res = d_alloc(NULL, 0, 1);
+ LOG("d_alloc_root", res);
+ res->d_parent = res;
+ res->d_name[0]='\0';
+ ddir = d_dir(res);
+ ddir->dd_alloced = 999; /* protect from deletion */
+ }
+ insert_alias(&root_inode->i_dentry, res);
+ root_inode->i_dent_count++;
+ root_inode->i_ddir_count++;
+ res->u.d_inode = root_inode;
+ return res;
}
-/*
- * Find a directory cache entry given all the necessary info.
- */
-static inline struct dir_cache_entry * find_entry(struct inode * dir, const char * name, unsigned char len, struct dir_cache_entry ** hash)
+static inline unsigned long d_hash(char first, char last)
+{
+ return ((unsigned long)first ^ ((unsigned long)last << 4)) & (D_HASHSIZE-1);
+}
+
+static inline struct dentry ** d_base_entry(struct ddir * pdir, struct dentry * entry)
+{
+ return &pdir->dd_hashtable[d_hash(entry->d_name[0],
+ entry->d_name[entry->d_len-1])];
+}
+
+static inline struct dentry ** d_base_qstr(struct ddir * pdir,
+ struct qstr * s1,
+ struct qstr * s2)
{
- struct dir_cache_entry *de;
+ unsigned long hash;
- de = *hash;
- goto inside;
- for (;;) {
- de = de->next;
-inside:
- if (!de)
- break;
- if((de->name_len == (unsigned char) len) &&
- (de->dc_dev == dir->i_dev) &&
- (de->dir == dir->i_ino) &&
- (de->version == dir->i_version) &&
- (!memcmp(de->name, name, len)))
- break;
+ if(s2 && s2->len) {
+ hash = d_hash(s1->name[0], s2->name[s2->len-1]);
+ } else {
+ hash = d_hash(s1->name[0], s1->name[s1->len-1]);
}
- return de;
+ return &pdir->dd_hashtable[hash];
}
-/*
- * Move a successfully used entry to level2. If already at level2,
- * move it to the end of the LRU queue..
+
+static /*inline*/ blocking void _d_remove_from_parent(struct dentry * entry,
+ struct ddir * pdir,
+ struct inode * inode,
+ int flags)
+{
+ if(entry->d_flag & D_HASHED) {
+ struct dentry ** base = d_base_entry(pdir, entry);
+
+ remove_hash(base, entry);
+ entry->d_flag &= ~D_HASHED;
+ pdir->dd_hashed--;
+ if(!(entry->d_flag & D_PRELIMINARY)) {
+ pdir->dd_true_hashed--;
+ if(!inode) {
+#ifdef DEBUG
+ if(!entry->d_next || !entry->d_prev) {
+ printpath(entry);
+ printk(" flags=%x d_flag=%x negs=%d "
+ "hashed=%d\n", flags, entry->d_flag,
+ pdir->dd_negs, pdir->dd_hashed);
+ }
+#endif
+ remove_alias(&pdir->dd_neglist, entry);
+ pdir->dd_negs--;
+ }
+ }
+ } else if(!(entry->d_flag & D_ZOMBIE)) {
+#ifdef DEBUG
+ if(!pdir->dd_alloced) printk("dd_alloced is 0!\n");
+#endif
+ pdir->dd_alloced--;
+ }
+ if(entry->d_flag & D_BASKET) {
+ remove_basket(&pdir->dd_basketlist, entry);
+ entry->d_flag &= ~D_BASKET;
+ }
+}
+
+/* Theoretically, zombies should never or extremely seldom appear,
+ * so this code is nearly superfluous.
+ * A way to get zombies is while using inodes (i_count>0), unlink()
+ * them as well as rmdir() the parent dir => the parent dir becomes a zombie.
+ * Zombies are *not* in the hashtable, because somebody could re-creat()
+ * that filename in it's parent dir again.
+ * Besides coding errors during beta phase, when forcing an umount()
+ * (e.g. at shutdown time), inodes could be in use such that the parent
+ * dir is cleared, resulting also in zombies.
*/
-static inline void move_to_level2(struct dir_cache_entry * old_de, struct dir_cache_entry ** hash)
+static /*inline*/ void _d_handle_zombie(struct dentry * entry,
+ struct ddir * ddir,
+ struct ddir * pdir)
{
- struct dir_cache_entry * de;
+ if(entry->d_flag & D_DIR) {
+ if(entry->d_flag & D_ZOMBIE) {
+ if(!has_sons(ddir)) {
+ entry->d_flag &= ~D_ZOMBIE;
+ remove_hash(&pdir->dd_zombielist, entry);
+ if(!pdir->dd_zombielist &&
+ (entry->d_parent->d_flag & D_ZOMBIE)) {
+ d_del(entry->d_parent, D_NORMAL);
+ }
+ }
+ } else if(has_sons(ddir)) {
+ entry->d_flag |= D_ZOMBIE;
+ insert_hash(&pdir->dd_zombielist, entry);
- if (old_de->lru_head == &level2_head) {
- update_lru(old_de);
- return;
- }
- de = level2_head;
- level2_head = de->next_lru;
- remove_hash(de);
- COPYDATA(old_de, de);
- add_hash(de, hash);
+ /* This condition is no longer a bug, with the removal
+ * of recursive_clear() this happens naturally during
+ * an unmount attempt of a filesystem which is busy.
+ */
+#if 0
+ /* Not sure when this message should show up... */
+ if(!IS_ROOT(entry)) {
+ printk("VFS: clearing dcache directory "
+ "with successors\n");
+#ifdef DEBUG
+ printpath(entry);
+ printk(" d_flag=%x alloced=%d negs=%d hashed=%d "
+ "basket=%p zombies=%p\n",
+ entry->d_flag, ddir->dd_alloced,
+ ddir->dd_negs, ddir->dd_hashed,
+ ddir->dd_basketlist, ddir->dd_zombielist);
+#endif
+ }
+#endif
+ }
+ }
}
-int dcache_lookup(struct inode * dir, const char * name, int len, unsigned long * ino)
+static /*inline*/ blocking void _d_del(struct dentry * entry,
+ struct anchors * anchor,
+ int flags)
{
- int ret = 0;
+ struct dheader ** free;
+ struct dheader ** full;
+ struct dheader * base = BASE_DHEADER(entry);
+ struct ddir * ddir = NULL;
+ struct ddir * pdir;
+ struct inode * inode = entry->d_flag & D_PRELIMINARY ? NULL : entry->u.d_inode;
- if(len <= DCACHE_NAME_LEN) {
- struct dir_cache_entry **hash = get_hlist(dir, name, len);
- struct dir_cache_entry *de;
+#ifdef DEBUG
+ if(inode)
+ xcheck("_d_del", inode);
+#endif
+ if(!entry->d_parent) {
+ printk("VFS: dcache parent is NULL\n");
+ return;
+ }
+ if(entry->d_flag & D_DIR) {
+ free = &anchor->dir_free;
+ full = &anchor->dir_full;
+ } else {
+ free = &anchor->free;
+ full = &anchor->full;
+ }
+ pdir = d_dir(entry->d_parent);
+ if(!IS_ROOT(entry))
+ _d_remove_from_parent(entry, pdir, inode, flags);
+
+ /* This may block, be careful! _d_remove_from_parent() is
+ * thus called before.
+ */
+ if(entry->d_flag & D_DIR)
+ ddir = d_dir(entry);
+ if(IS_ROOT(entry))
+ return;
+
+ if(flags & D_NO_FREE) {
+ /* Make it re-d_add()able */
+ pdir->dd_alloced++;
+ entry->d_flag &= D_DIR;
+ } else
+ _d_handle_zombie(entry, ddir, pdir);
- spin_lock(&dcache_lock);
- de = find_entry(dir, name, (unsigned char) len, hash);
- if(de) {
- *ino = de->ino;
- move_to_level2(de, hash);
- ret = 1;
+ /* This dec_ddir() must occur after zombie handling. */
+ if(!has_true_sons(pdir))
+ dec_ddir(entry->d_parent, entry->d_parent->u.d_inode);
+
+ entry->u.d_inode = NULL;
+ if(inode) {
+ remove_alias(&inode->i_dentry, entry);
+ inode->i_dent_count--;
+ if (entry->d_flag & D_DIR)
+ dec_ddir(entry, inode);
+
+ if(!(flags & D_NO_CLEAR_INODE) &&
+ !(atomic_read(&inode->i_count) +
+ inode->i_ddir_count +
+ inode->i_dent_count)) {
+#ifdef DEBUG
+ printk("#");
+#endif
+ /* This may block also. */
+ _clear_inode(inode, 0, 0);
+ }
+ }
+ if(!(flags & D_NO_FREE) && !(entry->d_flag & D_ZOMBIE)) {
+ base->free++;
+ if(base->free == base->maxfree) {
+#ifndef DEBUG
+ remove_header(free, base);
+ free_page((unsigned long)base);
+ goto done;
+#endif
+ }
+ entry->d_next = base->emptylist;
+ base->emptylist = entry;
+ if(!entry->d_next) {
+ remove_header(full, base);
+ insert_header(free, base);
}
- spin_unlock(&dcache_lock);
+#ifdef DEBUG
+ x_freed++;
+#endif
}
- return ret;
+#ifndef DEBUG
+done:
+#else
+ x_free++;
+#endif
}
-void dcache_add(struct inode * dir, const char * name, int len, unsigned long ino)
+blocking void d_del(struct dentry * entry, int flags)
{
- if (len <= DCACHE_NAME_LEN) {
- struct dir_cache_entry **hash = get_hlist(dir, name, len);
- struct dir_cache_entry *de;
+ int i;
- spin_lock(&dcache_lock);
- de = find_entry(dir, name, (unsigned char) len, hash);
- if (de) {
- de->ino = ino;
- update_lru(de);
+ if(!entry)
+ return;
+ LOG("d_clear", entry);
+ if(entry->d_len >= D_MEDIUM) {
+ if(entry->d_len >= D_LARGE) {
+ i = 3;
} else {
- de = level1_head;
- level1_head = de->next_lru;
- remove_hash(de);
- de->dc_dev = dir->i_dev;
- de->dir = dir->i_ino;
- de->version = dir->i_version;
- de->ino = ino;
- de->name_len = len;
- memcpy(de->name, name, len);
- add_hash(de, hash);
+ i = 2;
+ }
+ } else if(entry->d_len >= D_SMALL) {
+ i = 1;
+ } else {
+ i = 0;
+ }
+ _d_del(entry, &anchors[i], flags);
+}
+
+static inline struct dentry * __dlookup(struct dentry ** base,
+ struct qstr * name,
+ struct qstr * appendix)
+{
+ struct dentry * tmp = *base;
+
+ if(tmp && name->len) {
+ int totallen = name->len;
+
+ if(appendix)
+ totallen += appendix->len;
+ do {
+ if(tmp->d_len == totallen &&
+ !(tmp->d_flag & D_DUPLICATE) &&
+ !strncmp(tmp->d_name, name->name, name->len) &&
+ (!appendix || !strncmp(tmp->d_name+name->len,
+ appendix->name, appendix->len)))
+ return tmp;
+ tmp = tmp->d_hash_next;
+ } while(tmp != *base);
+ }
+ return NULL;
+}
+
+struct dentry * d_lookup(struct inode * dir,
+ struct qstr * name,
+ struct qstr * appendix)
+{
+ if(dir->i_dentry) {
+ struct ddir * ddir = d_dir(dir->i_dentry);
+ struct dentry ** base = d_base_qstr(ddir, name, appendix);
+
+ return __dlookup(base, name, appendix);
+ }
+ return NULL;
+}
+
+static /*inline*/ blocking void _d_insert_to_parent(struct dentry * entry,
+ struct ddir * pdir,
+ struct inode * inode,
+ struct qstr * ininame,
+ int flags)
+{
+ struct dentry ** base;
+ struct dentry * parent = entry->d_parent;
+
+#ifdef DEBUG
+ if(!pdir->dd_alloced)
+ printk("dd_alloced is 0!\n");
+#endif
+ base = d_base_qstr(pdir, ininame, NULL);
+ if(!(flags & (D_NOCHECKDUP|D_DUPLICATE)) &&
+ __dlookup(base, ininame, NULL)) {
+ d_del(entry, D_NO_CLEAR_INODE);
+ return;
+ }
+ if(entry->d_flag & D_HASHED) {
+ printk("VFS: dcache entry is already hashed\n");
+ return;
+ }
+ if(!(flags & D_PRELIMINARY))
+ pdir->dd_true_hashed++;
+ pdir->dd_hashed++;
+ insert_hash(base, entry);
+ entry->d_flag |= D_HASHED;
+ pdir->dd_alloced--;
+ if(flags & D_BASKET)
+ insert_basket(&pdir->dd_basketlist, entry);
+
+#ifdef DEBUG
+ if(inode && inode->i_dentry && (entry->d_flag & D_DIR)) {
+ struct dentry * tmp = inode->i_dentry;
+ printk("Auweia inode=%p entry=%p (%p %p %s)\n",
+ inode, entry, parent->u.d_inode, parent, parent->d_name);
+ printk("entry path="); printpath(entry); printk("\n");
+ do {
+ TST("auweia",tmp);
+ printk("alias path="); printpath(tmp); printk("\n");
+ tmp = tmp->d_next;
+ } while(tmp != inode->i_dentry);
+ printk("\n");
+ }
+#endif
+ if(has_true_sons(pdir))
+ inc_ddir(parent, parent->u.d_inode);
+ if(!inode && !(flags & D_PRELIMINARY)) {
+ insert_alias(&pdir->dd_neglist, entry);
+ pdir->dd_negs++;
+
+ /* Don't allow the negative list to grow too much ... */
+ while(pdir->dd_negs > (pdir->dd_true_hashed >> 1) + 5)
+ d_del(pdir->dd_neglist->d_prev, D_REMOVE);
+ }
+}
+
+blocking void d_add(struct dentry * entry, struct inode * inode,
+ struct qstr * ininame, int flags)
+{
+ struct dentry * parent = entry->d_parent;
+ struct qstr dummy;
+ struct ddir * pdir;
+
+#ifdef DEBUG
+ if(inode)
+ xcheck("d_add", inode);
+ if(IS_ROOT(entry)) {
+ printk("VFS: d_add for root dentry ");
+ printpath(entry);
+ printk(" -> ");
+ if(ininame)
+ printk("%s", ininame->name);
+ printk("\n");
+ return;
+ }
+ if(!parent)
+ panic("d_add with parent==NULL");
+ LOG("d_add", entry);
+#endif
+ if(ininame) {
+ if(ininame->len != entry->d_len) {
+ printk("VFS: d_add with wrong string length");
+ entry->d_len = ininame->len; /* kludge */
+ }
+ memcpy(entry->d_name, ininame->name, ininame->len);
+ entry->d_name[ininame->len] = '\0';
+ } else {
+ dummy.name = entry->d_name;
+ dummy.len = entry->d_len;
+ ininame = &dummy;
+ }
+ if(entry->d_flag & D_HASHED)
+ printk("VFS: d_add of already added dcache entry\n");
+
+ pdir = d_dir(parent);
+ _d_insert_to_parent(entry, pdir, inode, ininame, flags);
+ entry->d_flag |= flags;
+ if(inode && !(flags & D_PRELIMINARY)) {
+ if(entry->d_flag & D_DIR) {
+ if(inode->i_dentry) {
+ printk("VFS: creating dcache directory alias\n");
+ return;
+ }
}
- spin_unlock(&dcache_lock);
+ insert_alias(&inode->i_dentry, entry);
+ inode->i_dent_count++;
}
+ entry->u.d_inode = inode;
}
-unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end)
+blocking struct dentry * d_entry(struct dentry * parent,
+ struct qstr * name,
+ struct inode * inode)
{
- int i;
- struct dir_cache_entry * p;
+ struct ddir * pdir = d_dir(parent);
+ struct dentry ** base = d_base_qstr(pdir, name, NULL);
+ struct dentry * found = __dlookup(base, name, NULL);
- /*
- * Init level1 LRU lists..
- */
- p = level1_cache;
- do {
- p[1].prev_lru = p;
- p[0].next_lru = p+1;
- p[0].lru_head = &level1_head;
- } while (++p < level1_cache + DCACHE_SIZE-1);
- level1_cache[0].prev_lru = p;
- p[0].next_lru = &level1_cache[0];
- p[0].lru_head = &level1_head;
- level1_head = level1_cache;
-
- /*
- * Init level2 LRU lists..
- */
- p = level2_cache;
- do {
- p[1].prev_lru = p;
- p[0].next_lru = p+1;
- p[0].lru_head = &level2_head;
- } while (++p < level2_cache + DCACHE_SIZE-1);
- level2_cache[0].prev_lru = p;
- p[0].next_lru = &level2_cache[0];
- p[0].lru_head = &level2_head;
- level2_head = level2_cache;
-
- /*
- * Empty hash queues..
- */
- for (i = 0 ; i < DCACHE_HASH_QUEUES ; i++)
- hash_table[i] = NULL;
+ if(!found) {
+ int isdir = (inode && S_ISDIR(inode->i_mode));
- return mem_start;
+ found = d_alloc(parent, name->len, isdir);
+ if(found) {
+ d_add(found, inode, name,
+ isdir ? (D_DIR|D_NOCHECKDUP) : D_NOCHECKDUP);
+ } else
+ printk("VFS: problem with d_alloc\n");
+ }
+ return found;
+}
+
+blocking void d_entry_preliminary(struct dentry * parent,
+ struct qstr * name,
+ unsigned long ino)
+{
+ struct ddir * pdir = d_dir(parent);
+ struct dentry ** base = d_base_qstr(pdir, name, NULL);
+ struct dentry * found = __dlookup(base, name, NULL);
+
+ if(!found && ino) {
+ struct dentry * new = d_alloc(parent, name->len, 0);
+
+ if(new) {
+ d_add(new, NULL, name, D_PRELIMINARY|D_NOCHECKDUP);
+ new->u.d_ino = ino;
+ } else
+ printk("VFS: problem with d_alloc\n");
+ }
+}
+
+blocking void d_move(struct dentry * entry, struct inode * newdir,
+ struct qstr * newname, struct qstr * newapp)
+{
+ struct ddir tmp;
+ struct dentry * new;
+ struct inode * inode;
+ int len;
+ int flags;
+
+ if(!entry)
+ return;
+ inode = entry->u.d_inode;
+ flags = entry->d_flag;
+ if((flags & D_PRELIMINARY) || !inode) {
+ if(!(flags & D_PRELIMINARY))
+ printk("VFS: trying to move negative dcache entry\n");
+ d_del(entry, D_NO_CLEAR_INODE);
+ return;
+ }
+#if 0
+printk("d_move %p '%s' -> '%s%s' dent_count=%d\n", inode, entry->d_name,
+ newname->name, newapp ? newapp->name : "", inode->i_dent_count);
+#endif
+ if(flags & D_ZOMBIE) {
+ printk("VFS: moving zombie entry\n");
+ }
+ if(flags & D_DIR) {
+ struct ddir * ddir = d_dir(entry);
+
+ memcpy(&tmp, ddir, sizeof(struct ddir));
+
+ /* Simulate empty dir for d_del(). */
+ memset(ddir, 0, sizeof(struct ddir));
+ }
+ len = newname->len;
+ if(newapp) {
+ len += newapp->len;
+ flags |= D_BASKET;
+ } else
+ flags &= ~D_BASKET;
+ new = d_alloc(newdir->i_dentry, len, flags & D_DIR);
+ memcpy(new->d_name, newname->name, newname->len);
+ if(newapp)
+ memcpy(new->d_name+newname->len, newapp->name, newapp->len);
+ new->d_name[len] = '\0';
+ d_del(entry, D_NO_CLEAR_INODE);
+ d_add(new, inode, NULL, flags & (D_DIR|D_BASKET));
+ if(flags & D_DIR) {
+ struct ddir * ddir = d_dir(new);
+
+ memcpy(ddir, &tmp, sizeof(struct ddir));
+ }
+}
+
+int d_path(struct dentry * entry, struct inode * chroot, char * buf)
+{
+ if(IS_ROOT(entry) || (chroot && entry->u.d_inode == chroot &&
+ !(entry->d_flag & D_PRELIMINARY))) {
+ *buf = '/';
+ return 1;
+ } else {
+ int len = d_path(entry->d_parent, chroot, buf);
+
+ buf += len;
+ if(len > 1) {
+ *buf++ = '/';
+ len++;
+ }
+ memcpy(buf, entry->d_name, entry->d_len);
+ return len + entry->d_len;
+ }
+}
+
+struct dentry * d_basket(struct dentry * dir_entry)
+{
+ if(dir_entry && (dir_entry->d_flag & D_DIR)) {
+ struct ddir * ddir = d_dir(dir_entry);
+
+ return ddir->dd_basketlist;
+ } else
+ return NULL;
+}
+
+int d_isbasket(struct dentry * entry)
+{
+ return entry->d_flag & D_BASKET;
+}
+
+blocking struct inode * d_inode(struct dentry ** changing_entry)
+{
+ struct dentry * entry = *changing_entry;
+ struct inode * inode;
+
+#ifdef CONFIG_DCACHE_PRELOAD
+ if(entry->d_flag & D_PRELIMINARY) {
+ struct qstr name = { entry->d_name, entry->d_len };
+ struct ddir * pdir = d_dir(entry->d_parent);
+ struct dentry ** base = d_base_qstr(pdir, &name, NULL);
+ struct dentry * found;
+ unsigned long ino;
+ struct inode * dir = entry->d_parent->u.d_inode;
+ TST("d_inode",entry);
+ ino = entry->u.d_ino;
+ if(!dir)
+ d_panic();
+
+ /* Prevent concurrent d_lookup()s or d_inode()s before
+ * giving up vfs_lock. This just removes from the parent,
+ * but does not deallocate it.
+ */
+
+ /* !!!!!!! Aiee, here is an unresolved race if somebody
+ * unlink()s the inode during the iget(). The problem is
+ * that we need to synchronize externally. Proposed solution:
+ * put a rw_lock (read-mode) on the parent dir for each
+ * iget(), lookup() and so on, and a write-mode lock for
+ * everything that changes the dir (e.g. unlink()), and do
+ * this consistently everywhere in the generic VFS (not in
+ * the concrete filesystems). This should kill similar
+ * races everywhere, with a single clean concept.
+ * Later, the synchronization stuff can be cleaned out
+ * of the concrete fs'es.
+ */
+ d_del(entry, D_NO_CLEAR_INODE|D_NO_FREE);
+ vfs_unlock();
+
+ /* This circumvents the normal lookup() of pathnames.
+ * Therefore, preliminary entries must not be used
+ * (see FS_NO_DCACHE and FS_NO_PRELIM) if the fs does not
+ * permit fetching *valid* inodes with plain iget().
+ */
+ inode = __iget(dir->i_sb, ino, 0);
+ vfs_lock();
+ if(!inode) {
+ printk("VFS: preliminary dcache entry was invalid\n");
+ *changing_entry = NULL;
+ return NULL;
+ }
+ xcheck("d_inode iget()", inode);
+ if((found = __dlookup(base, &name, NULL))) {
+ d_del(entry, D_NO_CLEAR_INODE);
+ *changing_entry = found;
+ } else if(S_ISDIR(inode->i_mode)) {
+ struct dentry * new = d_alloc(entry->d_parent, entry->d_len, 1);
+ if(new)
+ d_add(new, inode, &name, D_DIR);
+ *changing_entry = new;
+
+ /* Finally deallocate old entry. */
+ d_del(entry, D_NO_CLEAR_INODE);
+ } else {
+ /* Re-insert to the parent, but now as normal dentry. */
+ d_add(entry, inode, NULL, 0);
+ }
+ return inode;
+ }
+#endif
+ inode = entry->u.d_inode;
+ if(inode) {
+#ifdef DEBUG
+ xcheck("d_inode", inode);
+#endif
+ iinc_zero(inode);
+ }
+ return inode;
}