diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 1248 |
1 files changed, 413 insertions, 835 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 043e35b6c..27950290a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -10,13 +10,16 @@ * data, of course), but instead letting the caller do it. */ -/* - * NOTE! There is one discordant note here: checking floppies for - * disk change. This is where it fits best, I think, as it should - * invalidate changed floppy-disk-caches. - */ - /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */ +/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */ + +/* Removed a lot of unnecessary code and simplified things now that + * the buffer cache isn't our primary cache - Andrew Tridgell 12/96 + */ + +/* Speed up hash, lru, and free list operations. Use gfp() for allocating + * hash table, use SLAB cache for buffer heads. -DaveM + */ #include <linux/sched.h> #include <linux/kernel.h> @@ -25,12 +28,14 @@ #include <linux/locks.h> #include <linux/errno.h> #include <linux/malloc.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/swapctl.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/vmalloc.h> +#include <linux/blkdev.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -40,39 +45,37 @@ #define NR_SIZES 5 static char buffersize_index[17] = {-1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4}; -static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192}; #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9]) #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) +#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of + unused buffer heads */ +#define HASH_PAGES 4 /* number of pages to use for the hash table */ +#define HASH_PAGES_ORDER 2 +#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *)) +#define HASH_MASK (NR_HASH-1) static int grow_buffers(int pri, int size); -static int shrink_specific_buffers(unsigned int priority, int size); -static int maybe_shrink_lav_buffers(int); -static int nr_hash = 0; /* Size of hash table */ static struct buffer_head ** hash_table; static struct buffer_head * lru_list[NR_LIST] = {NULL, }; -/* next_to_age is an array of pointers into the lru lists, used to - cycle through the buffers aging their contents when deciding which - buffers to discard when more memory is needed */ -static struct buffer_head * next_to_age[NR_LIST] = {NULL, }; static struct buffer_head * free_list[NR_SIZES] = {NULL, }; +static kmem_cache_t *bh_cachep; + static struct buffer_head * unused_list = NULL; -struct buffer_head * reuse_list = NULL; +static struct buffer_head * reuse_list = NULL; static struct wait_queue * buffer_wait = NULL; -int nr_buffers = 0; -int nr_buffers_type[NR_LIST] = {0,}; -int nr_buffers_size[NR_SIZES] = {0,}; -int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},}; -int buffer_usage[NR_SIZES] = {0,}; /* Usage counts used to determine load average */ -int buffers_lav[NR_SIZES] = {0,}; /* Load average of buffer usage */ -int nr_free[NR_SIZES] = {0,}; +static int nr_buffers = 0; +static int nr_buffers_type[NR_LIST] = {0,}; +static int nr_buffer_heads = 0; +static int nr_unused_buffer_heads = 0; +static int refilled = 0; /* Set NZ when a buffer freelist is refilled + this is used by the loop device */ + +/* This is used by some architectures to estimate available memory. */ int buffermem = 0; -int nr_buffer_heads = 0; -int refilled = 0; /* Set NZ when a buffer freelist is refilled */ -extern int *blksize_size[]; /* Here is the parameter block for the bdflush process. If you add or * remove any of the parameters, make sure to update kernel/sysctl.c. @@ -81,8 +84,10 @@ extern int *blksize_size[]; static void wakeup_bdflush(int); #define N_PARAM 9 -#define LAV +/* The dummy values in this structure are left in there for compatibility + * with old programs that play with the /proc entries. + */ union bdflush_param{ struct { int nfract; /* Percentage of buffer cache dirty to @@ -93,26 +98,17 @@ union bdflush_param{ each time we call refill */ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ - int clu_nfract; /* Percentage of buffer cache to scan to - search for free clusters */ + int dummy1; /* unused */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ - int lav_const; /* Constant used for load average (time - constant */ - int lav_ratio; /* Used to determine how low a lav for a - particular size can go before we start to - trim back the buffers */ + int dummy2; /* unused */ + int dummy3; /* unused */ } b_un; unsigned int data[N_PARAM]; } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; -/* The lav constant is set for 1 minute, as long as the update process runs - every 5 seconds. If you change the frequency of update, the time - constant will also change. */ - - /* These are the min and max parameter values that we will allow to be assigned */ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1}; int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5}; @@ -145,29 +141,31 @@ repeat: } /* Call sync_buffers with wait!=0 to ensure that the call does not - return until all buffer writes have completed. Sync() may return - before the writes have finished; fsync() may not. */ - + * return until all buffer writes have completed. Sync() may return + * before the writes have finished; fsync() may not. + */ /* Godamity-damn. Some buffers (bitmaps for filesystems) - spontaneously dirty themselves without ever brelse being called. - We will ultimately want to put these in a separate list, but for - now we search all of the lists for dirty buffers */ - + * spontaneously dirty themselves without ever brelse being called. + * We will ultimately want to put these in a separate list, but for + * now we search all of the lists for dirty buffers. + */ static int sync_buffers(kdev_t dev, int wait) { int i, retry, pass = 0, err = 0; struct buffer_head * bh, *next; /* One pass for no-wait, three for wait: - 0) write out all dirty, unlocked buffers; - 1) write out all dirty buffers, waiting if locked; - 2) wait for completion by waiting for all buffers to unlock. */ + * 0) write out all dirty, unlocked buffers; + * 1) write out all dirty buffers, waiting if locked; + * 2) wait for completion by waiting for all buffers to unlock. + */ do { retry = 0; repeat: - /* We search all lists as a failsafe mechanism, not because we expect - there to be dirty buffers on any of the other lists. */ + /* We search all lists as a failsafe mechanism, not because we expect + * there to be dirty buffers on any of the other lists. + */ bh = lru_list[BUF_DIRTY]; if (!bh) goto repeat2; @@ -181,7 +179,8 @@ repeat: continue; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is - requested AND pass > 0. */ + * requested AND pass > 0. + */ if (!wait || !pass) { retry = 1; continue; @@ -189,18 +188,27 @@ repeat: wait_on_buffer (bh); goto repeat; } + /* If an unlocked buffer is not uptodate, there has - been an IO error. Skip it. */ + * been an IO error. Skip it. + */ if (wait && buffer_req(bh) && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_uptodate(bh)) { err = 1; continue; } + /* Don't write clean buffers. Don't write ANY buffers - on the third pass. */ + * on the third pass. + */ if (!buffer_dirty(bh) || pass >= 2) continue; - /* don't bother about locked buffers */ + + /* Don't bother about locked buffers. + * + * XXX We checked if it was locked above and there is no + * XXX way we could have slept in between. -DaveM + */ if (buffer_locked(bh)) continue; bh->b_count++; @@ -226,7 +234,8 @@ repeat: continue; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is - requested AND pass > 0. */ + * requested AND pass > 0. + */ if (!wait || !pass) { retry = 1; continue; @@ -236,10 +245,11 @@ repeat: } } - /* If we are waiting for the sync to succeed, and if any dirty - blocks were written, then repeat; on the second pass, only - wait for buffers being written (do not pass to write any - more buffers on the second pass). */ + /* If we are waiting for the sync to succeed, and if any dirty + * blocks were written, then repeat; on the second pass, only + * wait for buffers being written (do not pass to write any + * more buffers on the second pass). + */ } while (wait && retry && ++pass<=2); return err; } @@ -264,7 +274,9 @@ int fsync_dev(kdev_t dev) asmlinkage int sys_sync(void) { + lock_kernel(); fsync_dev(0); + unlock_kernel(); return 0; } @@ -277,29 +289,39 @@ asmlinkage int sys_fsync(unsigned int fd) { struct file * file; struct inode * inode; + int err = 0; + lock_kernel(); if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) - return -EBADF; - if (!file->f_op || !file->f_op->fsync) - return -EINVAL; - if (file->f_op->fsync(inode,file)) - return -EIO; - return 0; + err = -EBADF; + else if (!file->f_op || !file->f_op->fsync) + err = -EINVAL; + else if (file->f_op->fsync(inode,file)) + err = -EIO; + unlock_kernel(); + return err; } asmlinkage int sys_fdatasync(unsigned int fd) { struct file * file; struct inode * inode; + int err = -EBADF; + lock_kernel(); if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) - return -EBADF; + goto out; + err = -EINVAL; if (!file->f_op || !file->f_op->fsync) - return -EINVAL; + goto out; /* this needs further work, at the moment it is identical to fsync() */ if (file->f_op->fsync(inode,file)) - return -EIO; - return 0; + err = -EIO; + else + err = 0; +out: + unlock_kernel(); + return err; } void invalidate_buffers(kdev_t dev) @@ -327,18 +349,17 @@ void invalidate_buffers(kdev_t dev) } } -#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash) +#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK) #define hash(dev,block) hash_table[_hashfn(dev,block)] static inline void remove_from_hash_queue(struct buffer_head * bh) { - if (bh->b_next) - bh->b_next->b_prev = bh->b_prev; - if (bh->b_prev) - bh->b_prev->b_next = bh->b_next; - if (hash(bh->b_dev,bh->b_blocknr) == bh) - hash(bh->b_dev,bh->b_blocknr) = bh->b_next; - bh->b_next = bh->b_prev = NULL; + if (bh->b_pprev) { + if(bh->b_next) + bh->b_next->b_pprev = bh->b_pprev; + *bh->b_pprev = bh->b_next; + bh->b_pprev = NULL; + } } static inline void remove_from_lru_list(struct buffer_head * bh) @@ -354,11 +375,6 @@ static inline void remove_from_lru_list(struct buffer_head * bh) lru_list[bh->b_list] = bh->b_next_free; if (lru_list[bh->b_list] == bh) lru_list[bh->b_list] = NULL; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = bh->b_next_free; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = NULL; - bh->b_next_free = bh->b_prev_free = NULL; } @@ -371,7 +387,6 @@ static inline void remove_from_free_list(struct buffer_head * bh) panic("Free list corrupted"); if(!free_list[isize]) panic("Free list empty"); - nr_free[isize]--; if(bh->b_next_free == bh) free_list[isize] = NULL; else { @@ -391,58 +406,55 @@ static inline void remove_from_queues(struct buffer_head * bh) return; } nr_buffers_type[bh->b_list]--; - nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--; remove_from_hash_queue(bh); remove_from_lru_list(bh); } static inline void put_last_lru(struct buffer_head * bh) { - if (!bh) - return; - if (bh == lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh->b_next_free; - if (next_to_age[bh->b_list] == bh) - next_to_age[bh->b_list] = bh->b_next_free; - return; - } - if(bh->b_dev == B_FREE) - panic("Wrong block for lru list"); - remove_from_lru_list(bh); -/* add to back of free list */ + if (bh) { + struct buffer_head **bhp = &lru_list[bh->b_list]; - if(!lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh; - lru_list[bh->b_list]->b_prev_free = bh; - } - if (!next_to_age[bh->b_list]) - next_to_age[bh->b_list] = bh; + if (bh == *bhp) { + *bhp = bh->b_next_free; + return; + } + + if(bh->b_dev == B_FREE) + panic("Wrong block for lru list"); + + /* Add to back of free list. */ + remove_from_lru_list(bh); + if(!*bhp) { + *bhp = bh; + (*bhp)->b_prev_free = bh; + } - bh->b_next_free = lru_list[bh->b_list]; - bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; - lru_list[bh->b_list]->b_prev_free->b_next_free = bh; - lru_list[bh->b_list]->b_prev_free = bh; + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + } } static inline void put_last_free(struct buffer_head * bh) { - int isize; - if (!bh) - return; + if (bh) { + struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)]; - isize = BUFSIZE_INDEX(bh->b_size); - bh->b_dev = B_FREE; /* So it is obvious we are on the free list */ - /* add to back of free list */ - if(!free_list[isize]) { - free_list[isize] = bh; - bh->b_prev_free = bh; - } + bh->b_dev = B_FREE; /* So it is obvious we are on the free list. */ - nr_free[isize]++; - bh->b_next_free = free_list[isize]; - bh->b_prev_free = free_list[isize]->b_prev_free; - free_list[isize]->b_prev_free->b_next_free = bh; - free_list[isize]->b_prev_free = bh; + /* Add to back of free list. */ + if(!*bhp) { + *bhp = bh; + bh->b_prev_free = bh; + } + + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + } } static inline void insert_into_queues(struct buffer_head * bh) @@ -450,30 +462,34 @@ static inline void insert_into_queues(struct buffer_head * bh) /* put at end of free list */ if(bh->b_dev == B_FREE) { put_last_free(bh); - return; - } - if(!lru_list[bh->b_list]) { - lru_list[bh->b_list] = bh; - bh->b_prev_free = bh; + } else { + struct buffer_head **bhp = &lru_list[bh->b_list]; + + if(!*bhp) { + *bhp = bh; + bh->b_prev_free = bh; + } + + if (bh->b_next_free) + panic("VFS: buffer LRU pointers corrupted"); + + bh->b_next_free = *bhp; + bh->b_prev_free = (*bhp)->b_prev_free; + (*bhp)->b_prev_free->b_next_free = bh; + (*bhp)->b_prev_free = bh; + + nr_buffers_type[bh->b_list]++; + + /* Put the buffer in new hash-queue if it has a device. */ + if (bh->b_dev) { + struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr); + if((bh->b_next = *bhp) != NULL) + (*bhp)->b_pprev = &bh->b_next; + *bhp = bh; + bh->b_pprev = bhp; /* Exists in bh hashes. */ + } else + bh->b_pprev = NULL; /* Not in bh hashes. */ } - if (!next_to_age[bh->b_list]) - next_to_age[bh->b_list] = bh; - if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted"); - bh->b_next_free = lru_list[bh->b_list]; - bh->b_prev_free = lru_list[bh->b_list]->b_prev_free; - lru_list[bh->b_list]->b_prev_free->b_next_free = bh; - lru_list[bh->b_list]->b_prev_free = bh; - nr_buffers_type[bh->b_list]++; - nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++; -/* put the buffer in new hash-queue if it has a device */ - bh->b_prev = NULL; - bh->b_next = NULL; - if (!(bh->b_dev)) - return; - bh->b_next = hash(bh->b_dev,bh->b_blocknr); - hash(bh->b_dev,bh->b_blocknr) = bh; - if (bh->b_next) - bh->b_next->b_prev = bh; } static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size) @@ -481,14 +497,14 @@ static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size) struct buffer_head * tmp; for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next) - if (tmp->b_blocknr == block && tmp->b_dev == dev) + if (tmp->b_blocknr == block && tmp->b_dev == dev) { if (tmp->b_size == size) return tmp; - else { - printk("VFS: Wrong blocksize on device %s\n", - kdevname(dev)); - return NULL; - } + + printk("VFS: Wrong blocksize on device %s\n", + kdevname(dev)); + return NULL; + } return NULL; } @@ -508,15 +524,36 @@ struct buffer_head * get_hash_table(kdev_t dev, int block, int size) return NULL; bh->b_count++; wait_on_buffer(bh); - if (bh->b_dev == dev && bh->b_blocknr == block - && bh->b_size == size) + if (bh->b_dev == dev && + bh->b_blocknr == block && + bh->b_size == size) return bh; bh->b_count--; } } +unsigned int get_hardblocksize(kdev_t dev) +{ + /* + * Get the hard sector size for the given device. If we don't know + * what it is, return 0. + */ + if (hardsect_size[MAJOR(dev)] != NULL) { + int blksize = hardsect_size[MAJOR(dev)][MINOR(dev)]; + if (blksize != 0) + return blksize; + } + + /* + * We don't know what the hardware sector size for this device is. + * Return 0 indicating that we don't know. + */ + return 0; +} + void set_blocksize(kdev_t dev, int size) { + extern int *blksize_size[]; int i, nlist; struct buffer_head * bh, *bhnext; @@ -540,13 +577,15 @@ void set_blocksize(kdev_t dev, int size) sync_buffers(dev, 2); blksize_size[MAJOR(dev)][MINOR(dev)] = size; - /* We need to be quite careful how we do this - we are moving entries - around on the free list, and we can get in a loop if we are not careful.*/ - + /* We need to be quite careful how we do this - we are moving entries + * around on the free list, and we can get in a loop if we are not careful. + */ for(nlist = 0; nlist < NR_LIST; nlist++) { bh = lru_list[nlist]; for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) { - if(!bh) break; + if(!bh) + break; + bhnext = bh->b_next_free; if (bh->b_dev != dev) continue; @@ -565,191 +604,154 @@ void set_blocksize(kdev_t dev, int size) } } -#define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh)) - -void refill_freelist(int size) +/* Check if a buffer is OK to be reclaimed. */ +static inline int can_reclaim(struct buffer_head *bh, int size) { - struct buffer_head * bh, * tmp; - struct buffer_head * candidate[NR_LIST]; - unsigned int best_time, winner; - int isize = BUFSIZE_INDEX(size); - int buffers[NR_LIST]; - int i; - int needed; + if (bh->b_count || + buffer_protected(bh) || + buffer_locked(bh)) + return 0; + + if (atomic_read(&mem_map[MAP_NR((unsigned long) bh->b_data)].count) != 1 || + buffer_dirty(bh)) { + refile_buffer(bh); + return 0; + } - /* First see if we even need this. Sometimes it is advantageous - to request some blocks in a filesystem that we know that we will - be needing ahead of time. */ + if (bh->b_size != size) + return 0; - if (nr_free[isize] > 100) - return; + return 1; +} - ++refilled; - /* If there are too many dirty buffers, we wake up the update process - now so as to ensure that there are still clean buffers available - for user processes to use (and dirty) */ +/* Find a candidate buffer to be reclaimed. */ +static struct buffer_head *find_candidate(struct buffer_head *list, + int *list_len, int size) +{ + struct buffer_head *bh; - /* We are going to try to locate this much memory */ - needed =bdf_prm.b_un.nrefill * size; + for (bh = list; + bh && (*list_len) > 0; + bh = bh->b_next_free, (*list_len)--) { + if (size != bh->b_size) { + /* This provides a mechanism for freeing blocks + * of other sizes, this is necessary now that we + * no longer have the lav code. + */ + try_to_free_buffer(bh,&bh,1); + if (!bh) + break; + continue; + } - while (nr_free_pages > min_free_pages*2 && needed > 0 && - grow_buffers(GFP_BUFFER, size)) { - needed -= PAGE_SIZE; + if (buffer_locked(bh) && + (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) { + /* Buffers are written in the order they are placed + * on the locked list. If we encounter a locked + * buffer here, this means that the rest of them + * are also locked. + */ + (*list_len) = 0; + return NULL; + } + + if (can_reclaim(bh,size)) + return bh; } - if(needed <= 0) return; + return NULL; +} + +static void refill_freelist(int size) +{ + struct buffer_head * bh; + struct buffer_head * candidate[BUF_DIRTY]; + unsigned int best_time, winner; + int buffers[BUF_DIRTY]; + int i; + int needed; - /* See if there are too many buffers of a different size. - If so, victimize them */ + refilled = 1; + /* If there are too many dirty buffers, we wake up the update process + * now so as to ensure that there are still clean buffers available + * for user processes to use (and dirty). + */ + + /* We are going to try to locate this much memory. */ + needed = bdf_prm.b_un.nrefill * size; - while(maybe_shrink_lav_buffers(size)) - { - if(!grow_buffers(GFP_BUFFER, size)) break; - needed -= PAGE_SIZE; - if(needed <= 0) return; - }; + while ((nr_free_pages > min_free_pages*2) && + (needed > 0) && + grow_buffers(GFP_BUFFER, size)) + needed -= PAGE_SIZE; +repeat: /* OK, we cannot grow the buffer cache, now try to get some - from the lru list */ + * from the lru list. + * + * First set the candidate pointers to usable buffers. This + * should be quick nearly all of the time. + */ - /* First set the candidate pointers to usable buffers. This - should be quick nearly all of the time. */ + if(needed <= 0) + return; -repeat0: - for(i=0; i<NR_LIST; i++){ - if(i == BUF_DIRTY || i == BUF_SHARED || - nr_buffers_type[i] == 0) { - candidate[i] = NULL; - buffers[i] = 0; - continue; - } + for(i=0; i<BUF_DIRTY; i++) { buffers[i] = nr_buffers_type[i]; - for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--) - { - if(buffers[i] < 0) panic("Here is the problem"); - tmp = bh->b_next_free; - if (!bh) break; - - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 || - buffer_dirty(bh)) { - refile_buffer(bh); - continue; - } - - if (bh->b_count || buffer_protected(bh) || bh->b_size != size) - continue; - - /* Buffers are written in the order they are placed - on the locked list. If we encounter a locked - buffer here, this means that the rest of them - are also locked */ - if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) { - buffers[i] = 0; - break; - } - - if (BADNESS(bh)) continue; - break; - }; - if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */ - else candidate[i] = bh; - if(candidate[i] && candidate[i]->b_count) panic("Here is the problem"); + candidate[i] = find_candidate(lru_list[i], &buffers[i], size); } - repeat: - if(needed <= 0) return; - - /* Now see which candidate wins the election */ + /* Now see which candidate wins the election. */ winner = best_time = UINT_MAX; - for(i=0; i<NR_LIST; i++){ - if(!candidate[i]) continue; - if(candidate[i]->b_lru_time < best_time){ + for(i=0; i<BUF_DIRTY; i++) { + if(!candidate[i]) + continue; + if(candidate[i]->b_lru_time < best_time) { best_time = candidate[i]->b_lru_time; winner = i; } } - /* If we have a winner, use it, and then get a new candidate from that list */ + /* If we have a winner, use it, and then get a new candidate from that list. */ if(winner != UINT_MAX) { i = winner; - bh = candidate[i]; - candidate[i] = bh->b_next_free; - if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */ - if (bh->b_count || bh->b_size != size) - panic("Busy buffer in candidate list\n"); - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1) - panic("Shared buffer in candidate list\n"); - if (buffer_protected(bh)) - panic("Protected buffer in candidate list\n"); - if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n"); + while (needed>0 && (bh=candidate[i])) { + candidate[i] = bh->b_next_free; + if(candidate[i] == bh) + candidate[i] = NULL; /* Got last one */ + remove_from_queues(bh); + bh->b_dev = B_FREE; + put_last_free(bh); + needed -= bh->b_size; + buffers[i]--; + if(buffers[i] == 0) + candidate[i] = NULL; - if(bh->b_dev == B_FREE) - panic("Wrong list"); - remove_from_queues(bh); - bh->b_dev = B_FREE; - put_last_free(bh); - needed -= bh->b_size; - buffers[i]--; - if(buffers[i] < 0) panic("Here is the problem"); - - if(buffers[i] == 0) candidate[i] = NULL; - - /* Now all we need to do is advance the candidate pointer - from the winner list to the next usable buffer */ - if(candidate[i] && buffers[i] > 0){ - if(buffers[i] <= 0) panic("Here is another problem"); - for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) { - if(buffers[i] < 0) panic("Here is the problem"); - tmp = bh->b_next_free; - if (!bh) break; - - if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 || - buffer_dirty(bh)) { - refile_buffer(bh); - continue; - }; - - if (bh->b_count || buffer_protected(bh) || bh->b_size != size) - continue; - - /* Buffers are written in the order they are - placed on the locked list. If we encounter - a locked buffer here, this means that the - rest of them are also locked */ - if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) { - buffers[i] = 0; - break; - } - - if (BADNESS(bh)) continue; - break; - }; - if(!buffers[i]) candidate[i] = NULL; /* Nothing here */ - else candidate[i] = bh; - if(candidate[i] && candidate[i]->b_count) - panic("Here is the problem"); + if (candidate[i] && !can_reclaim(candidate[i],size)) + candidate[i] = find_candidate(candidate[i], + &buffers[i], size); } - - goto repeat; + if (needed >= 0) + goto repeat; } - if(needed <= 0) return; + if(needed <= 0) + return; /* Too bad, that was not enough. Try a little harder to grow some. */ - if (nr_free_pages > min_free_pages + 5) { if (grow_buffers(GFP_BUFFER, size)) { needed -= PAGE_SIZE; - goto repeat0; - }; + goto repeat; + } } - /* and repeat until we find something good */ + /* And repeat until we find something good. */ if (!grow_buffers(GFP_ATOMIC, size)) wakeup_bdflush(1); needed -= PAGE_SIZE; - goto repeat0; + goto repeat; } /* @@ -767,12 +769,10 @@ struct buffer_head * getblk(kdev_t dev, int block, int size) struct buffer_head * bh; int isize = BUFSIZE_INDEX(size); - /* Update this for the buffer size lav. */ - buffer_usage[isize]++; - /* If there are too many dirty buffers, we wake up the update process - now so as to ensure that there are still clean buffers available - for user processes to use (and dirty) */ + * now so as to ensure that there are still clean buffers available + * for user processes to use (and dirty). + */ repeat: bh = get_hash_table(dev, block, size); if (bh) { @@ -785,7 +785,8 @@ repeat: return bh; } - while(!free_list[isize]) refill_freelist(size); + while(!free_list[isize]) + refill_freelist(size); if (find_buffer(dev,block,size)) goto repeat; @@ -793,8 +794,9 @@ repeat: bh = free_list[isize]; remove_from_free_list(bh); -/* OK, FINALLY we know that this buffer is the only one of its kind, */ -/* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */ + /* OK, FINALLY we know that this buffer is the only one of its kind, + * and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean. + */ bh->b_count=1; bh->b_flushtime=0; bh->b_state=(1<<BH_Touched); @@ -809,7 +811,7 @@ void set_writetime(struct buffer_head * buf, int flag) int newtime; if (buffer_dirty(buf)) { - /* Move buffer to dirty list if jiffies is clear */ + /* Move buffer to dirty list if jiffies is clear. */ newtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer); if(!buf->b_flushtime || buf->b_flushtime > newtime) @@ -827,7 +829,6 @@ void set_writetime(struct buffer_head * buf, int flag) void refile_buffer(struct buffer_head * buf) { int dispose; - int isize; if(buf->b_dev == B_FREE) { printk("Attempt to refile free buffer\n"); @@ -835,17 +836,14 @@ void refile_buffer(struct buffer_head * buf) } if (buffer_dirty(buf)) dispose = BUF_DIRTY; - else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf)) - dispose = BUF_SHARED; else if (buffer_locked(buf)) dispose = BUF_LOCKED; - else if (buf->b_list == BUF_SHARED) - dispose = BUF_UNSHARED; else dispose = BUF_CLEAN; - if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies; - if(dispose != buf->b_list) { - if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED) + if(dispose == BUF_CLEAN) + buf->b_lru_time = jiffies; + if(dispose != buf->b_list) { + if(dispose == BUF_DIRTY) buf->b_lru_time = jiffies; if(dispose == BUF_LOCKED && (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super) @@ -854,19 +852,21 @@ void refile_buffer(struct buffer_head * buf) buf->b_list = dispose; insert_into_queues(buf); if (dispose == BUF_DIRTY) { - /* This buffer is dirty, maybe we need to start flushing. */ - /* If too high a percentage of the buffers are dirty... */ - if (nr_buffers_type[BUF_DIRTY] > - (nr_buffers - nr_buffers_type[BUF_SHARED]) * - bdf_prm.b_un.nfract/100) - wakeup_bdflush(0); - /* If this is a loop device, and - * more than half of the buffers of this size are dirty... */ - /* (Prevents no-free-buffers deadlock with loop device.) */ - isize = BUFSIZE_INDEX(buf->b_size); - if (MAJOR(buf->b_dev) == LOOP_MAJOR && - nr_buffers_st[isize][BUF_DIRTY]*2>nr_buffers_size[isize]) - wakeup_bdflush(1); + int too_many = (nr_buffers * bdf_prm.b_un.nfract/100); + + /* This buffer is dirty, maybe we need to start flushing. + * If too high a percentage of the buffers are dirty... + */ + if (nr_buffers_type[BUF_DIRTY] > too_many) + wakeup_bdflush(0); + + /* If this is a loop device, and + * more than half of the buffers are dirty... + * (Prevents no-free-buffers deadlock with loop device.) + */ + if (MAJOR(buf->b_dev) == LOOP_MAJOR && + nr_buffers_type[BUF_DIRTY]*2>nr_buffers) + wakeup_bdflush(1); } } } @@ -878,7 +878,7 @@ void __brelse(struct buffer_head * buf) { wait_on_buffer(buf); - /* If dirty, mark the time this buffer should be written back */ + /* If dirty, mark the time this buffer should be written back. */ set_writetime(buf, 0); refile_buffer(buf); @@ -977,13 +977,13 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize, else bhlist[j++] = bh; } - /* Request the read for these buffers, and then release them */ + /* Request the read for these buffers, and then release them. */ if (j>1) ll_rw_block(READA, (j-1), bhlist+1); for(i=1; i<j; i++) brelse(bhlist[i]); - /* Wait for this buffer, and then continue on */ + /* Wait for this buffer, and then continue on. */ bh = bhlist[0]; wait_on_buffer(bh); if (buffer_uptodate(bh)) @@ -992,11 +992,15 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize, return NULL; } -/* - * See fs/inode.c for the weird use of volatile.. - */ static void put_unused_buffer_head(struct buffer_head * bh) { + if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) { + nr_buffer_heads--; + kmem_cache_free(bh_cachep, bh); + return; + } + memset(bh,0,sizeof(*bh)); + nr_unused_buffer_heads++; bh->b_next_free = unused_list; unused_list = bh; wake_up(&buffer_wait); @@ -1004,24 +1008,20 @@ static void put_unused_buffer_head(struct buffer_head * bh) static void get_more_buffer_heads(void) { - int i; struct buffer_head * bh; - for (;;) { - if (unused_list) - return; - - /* - * This is critical. We can't swap out pages to get + while (!unused_list) { + /* This is critical. We can't swap out pages to get * more buffer heads, because the swap-out may need - * more buffer-heads itself. Thus GFP_ATOMIC. + * more buffer-heads itself. Thus SLAB_ATOMIC. */ - bh = (struct buffer_head *) get_free_page(GFP_ATOMIC); - if (bh) - break; + if((bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC)) != NULL) { + put_unused_buffer_head(bh); + nr_buffer_heads++; + return; + } - /* - * Uhhuh. We're _really_ low on memory. Now we just + /* Uhhuh. We're _really_ low on memory. Now we just * wait for old buffer heads to become free due to * finishing IO.. */ @@ -1029,10 +1029,6 @@ static void get_more_buffer_heads(void) sleep_on(&buffer_wait); } - for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) { - bh->b_next_free = unused_list; /* only make link */ - unused_list = bh++; - } } /* @@ -1051,17 +1047,15 @@ static void get_more_buffer_heads(void) static inline void recover_reusable_buffer_heads(void) { if (reuse_list) { - struct buffer_head *bh; - unsigned long flags; + struct buffer_head *head; + + head = xchg(&reuse_list, NULL); - save_flags(flags); do { - cli(); - bh = reuse_list; - reuse_list = bh->b_next_free; - restore_flags(flags); + struct buffer_head *bh = head; + head = head->b_next_free; put_unused_buffer_head(bh); - } while (reuse_list); + } while (head); } } @@ -1075,6 +1069,7 @@ static struct buffer_head * get_unused_buffer_head(void) return NULL; bh = unused_list; unused_list = bh->b_next_free; + nr_unused_buffer_heads--; return bh; } @@ -1351,7 +1346,7 @@ int generic_readpage(struct inode * inode, struct page * page) int *p, nr[PAGE_SIZE/512]; int i; - page->count++; + atomic_inc(&page->count); set_bit(PG_locked, &page->flags); set_bit(PG_free_after, &page->flags); @@ -1400,7 +1395,6 @@ static int grow_buffers(int pri, int size) tmp = bh; while (1) { - nr_free[isize]++; if (insert_point) { tmp->b_next_free = insert_point->b_next_free; tmp->b_prev_free = insert_point; @@ -1412,7 +1406,6 @@ static int grow_buffers(int pri, int size) } insert_point = tmp; ++nr_buffers; - ++nr_buffers_size[isize]; if (tmp->b_this_page) tmp = tmp->b_this_page; else @@ -1442,7 +1435,6 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp, { unsigned long page; struct buffer_head * tmp, * p; - int isize = BUFSIZE_INDEX(bh->b_size); *bhp = bh; page = (unsigned long) bh->b_data; @@ -1464,193 +1456,20 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp, p = tmp; tmp = tmp->b_this_page; nr_buffers--; - nr_buffers_size[isize]--; - if (p == *bhp) - { - *bhp = p->b_prev_free; - if (p == *bhp) /* Was this the last in the list? */ - *bhp = NULL; - } + if (p == *bhp) { + *bhp = p->b_prev_free; + if (p == *bhp) /* Was this the last in the list? */ + *bhp = NULL; + } remove_from_queues(p); put_unused_buffer_head(p); } while (tmp != bh); buffermem -= PAGE_SIZE; mem_map[MAP_NR(page)].buffers = NULL; free_page(page); - return !mem_map[MAP_NR(page)].count; + return !atomic_read(&mem_map[MAP_NR(page)].count); } -/* Age buffers on a given page, according to whether they have been - visited recently or not. */ -static inline void age_buffer(struct buffer_head *bh) -{ - struct buffer_head *tmp = bh; - int touched = 0; - - /* - * When we age a page, we mark all other buffers in the page - * with the "has_aged" flag. Then, when these aliased buffers - * come up for aging, we skip them until next pass. This - * ensures that a page full of multiple buffers only gets aged - * once per pass through the lru lists. - */ - if (clear_bit(BH_Has_aged, &bh->b_state)) - return; - - do { - touched |= clear_bit(BH_Touched, &tmp->b_state); - tmp = tmp->b_this_page; - set_bit(BH_Has_aged, &tmp->b_state); - } while (tmp != bh); - clear_bit(BH_Has_aged, &bh->b_state); - - if (touched) - touch_page(mem_map + MAP_NR((unsigned long) bh->b_data)); - else - age_page(mem_map + MAP_NR((unsigned long) bh->b_data)); -} - -/* - * Consult the load average for buffers and decide whether or not - * we should shrink the buffers of one size or not. If we decide yes, - * do it and return 1. Else return 0. Do not attempt to shrink size - * that is specified. - * - * I would prefer not to use a load average, but the way things are now it - * seems unavoidable. The way to get rid of it would be to force clustering - * universally, so that when we reclaim buffers we always reclaim an entire - * page. Doing this would mean that we all need to move towards QMAGIC. - */ - -static int maybe_shrink_lav_buffers(int size) -{ - int nlist; - int isize; - int total_lav, total_n_buffers, n_sizes; - - /* Do not consider the shared buffers since they would not tend - to have getblk called very often, and this would throw off - the lav. They are not easily reclaimable anyway (let the swapper - make the first move). */ - - total_lav = total_n_buffers = n_sizes = 0; - for(nlist = 0; nlist < NR_SIZES; nlist++) - { - total_lav += buffers_lav[nlist]; - if(nr_buffers_size[nlist]) n_sizes++; - total_n_buffers += nr_buffers_size[nlist]; - total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; - } - - /* See if we have an excessive number of buffers of a particular - size - if so, victimize that bunch. */ - - isize = (size ? BUFSIZE_INDEX(size) : -1); - - if (n_sizes > 1) - for(nlist = 0; nlist < NR_SIZES; nlist++) - { - if(nlist == isize) continue; - if(nr_buffers_size[nlist] && - bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < - total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED])) - if(shrink_specific_buffers(6, bufferindex_size[nlist])) - return 1; - } - return 0; -} - -/* - * Try to free up some pages by shrinking the buffer-cache - * - * Priority tells the routine how hard to try to shrink the - * buffers: 6 means "don't bother too much", while a value - * of 0 means "we'd better get some free pages now". - * - * "limit" is meant to limit the shrink-action only to pages - * that are in the 0 - limit address range, for DMA re-allocations. - * We ignore that right now. - */ - -static int shrink_specific_buffers(unsigned int priority, int size) -{ - struct buffer_head *bh; - int nlist; - int i, isize, isize1; - -#ifdef DEBUG - if(size) printk("Shrinking buffers of size %d\n", size); -#endif - /* First try the free lists, and see if we can get a complete page - from here */ - isize1 = (size ? BUFSIZE_INDEX(size) : -1); - - for(isize = 0; isize<NR_SIZES; isize++){ - if(isize1 != -1 && isize1 != isize) continue; - bh = free_list[isize]; - if(!bh) continue; - for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) { - if (bh->b_count || buffer_protected(bh) || - !bh->b_this_page) - continue; - if (!age_of((unsigned long) bh->b_data) && - try_to_free_buffer(bh, &bh, 6)) - return 1; - if(!bh) break; - /* Some interrupt must have used it after we - freed the page. No big deal - keep looking */ - } - } - - /* Not enough in the free lists, now try the lru list */ - - for(nlist = 0; nlist < NR_LIST; nlist++) { - repeat1: - if(priority > 2 && nlist == BUF_SHARED) continue; - i = nr_buffers_type[nlist]; - i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority; - for ( ; i > 0; i-- ) { - bh = next_to_age[nlist]; - if (!bh) - break; - next_to_age[nlist] = bh->b_next_free; - - /* First, age the buffer. */ - age_buffer(bh); - /* We may have stalled while waiting for I/O - to complete. */ - if(bh->b_list != nlist) goto repeat1; - if (bh->b_count || buffer_protected(bh) || - !bh->b_this_page) - continue; - if(size && bh->b_size != size) continue; - if (buffer_locked(bh)) - if (priority) - continue; - else - wait_on_buffer(bh); - if (buffer_dirty(bh)) { - bh->b_count++; - bh->b_flushtime = 0; - ll_rw_block(WRITEA, 1, &bh); - bh->b_count--; - continue; - } - /* At priority 6, only consider really old - (age==0) buffers for reclaiming. At - priority 0, consider any buffers. */ - if ((age_of((unsigned long) bh->b_data) >> - (6-priority)) > 0) - continue; - if (try_to_free_buffer(bh, &bh, 0)) - return 1; - if(!bh) break; - } - } - return 0; -} - - /* ================== Debugging =================== */ void show_buffers(void) @@ -1658,17 +1477,18 @@ void show_buffers(void) struct buffer_head * bh; int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; int protected = 0; - int shared; - int nlist, isize; + int nlist; + static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"}; printk("Buffer memory: %6dkB\n",buffermem>>10); printk("Buffer heads: %6d\n",nr_buffer_heads); printk("Buffer blocks: %6d\n",nr_buffers); for(nlist = 0; nlist < NR_LIST; nlist++) { - shared = found = locked = dirty = used = lastused = protected = 0; + found = locked = dirty = used = lastused = protected = 0; bh = lru_list[nlist]; if(!bh) continue; + do { found++; if (buffer_locked(bh)) @@ -1677,260 +1497,42 @@ void show_buffers(void) protected++; if (buffer_dirty(bh)) dirty++; - if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1) - shared++; if (bh->b_count) used++, lastused = found; bh = bh->b_next_free; } while (bh != lru_list[nlist]); - printk("Buffer[%d] mem: %d buffers, %d used (last=%d), " - "%d locked, %d protected, %d dirty %d shrd\n", - nlist, found, used, lastused, - locked, protected, dirty, shared); - }; - printk("Size [LAV] Free Clean Unshar Lck Lck1 Dirty Shared \n"); - for(isize = 0; isize<NR_SIZES; isize++){ - printk("%5d [%5d]: %7d ", bufferindex_size[isize], - buffers_lav[isize], nr_free[isize]); - for(nlist = 0; nlist < NR_LIST; nlist++) - printk("%7d ", nr_buffers_st[isize][nlist]); - printk("\n"); - } -} - - -/* ====================== Cluster patches for ext2 ==================== */ - -/* - * try_to_reassign() checks if all the buffers on this particular page - * are unused, and reassign to a new cluster them if this is true. - */ -static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp, - kdev_t dev, unsigned int starting_block) -{ - unsigned long page; - struct buffer_head * tmp, * p; - - *bhp = bh; - page = (unsigned long) bh->b_data; - page &= PAGE_MASK; - if(mem_map[MAP_NR(page)].count != 1) return 0; - tmp = bh; - do { - if (!tmp) - return 0; - - if (tmp->b_count || buffer_protected(tmp) || - buffer_dirty(tmp) || buffer_locked(tmp)) - return 0; - tmp = tmp->b_this_page; - } while (tmp != bh); - tmp = bh; - - while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) - tmp = tmp->b_this_page; - - /* This is the buffer at the head of the page */ - bh = tmp; - do { - p = tmp; - tmp = tmp->b_this_page; - remove_from_queues(p); - p->b_dev = dev; - mark_buffer_uptodate(p, 0); - clear_bit(BH_Req, &p->b_state); - p->b_blocknr = starting_block++; - insert_into_queues(p); - } while (tmp != bh); - return 1; -} - -/* - * Try to find a free cluster by locating a page where - * all of the buffers are unused. We would like this function - * to be atomic, so we do not call anything that might cause - * the process to sleep. The priority is somewhat similar to - * the priority used in shrink_buffers. - * - * My thinking is that the kernel should end up using whole - * pages for the buffer cache as much of the time as possible. - * This way the other buffers on a particular page are likely - * to be very near each other on the free list, and we will not - * be expiring data prematurely. For now we only cannibalize buffers - * of the same size to keep the code simpler. - */ -static int reassign_cluster(kdev_t dev, - unsigned int starting_block, int size) -{ - struct buffer_head *bh; - int isize = BUFSIZE_INDEX(size); - int i; - - /* We want to give ourselves a really good shot at generating - a cluster, and since we only take buffers from the free - list, we "overfill" it a little. */ - - while(nr_free[isize] < 32) refill_freelist(size); - - bh = free_list[isize]; - if(bh) - for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) { - if (!bh->b_this_page) continue; - if (try_to_reassign(bh, &bh, dev, starting_block)) - return 4; - } - return 0; -} - -/* This function tries to generate a new cluster of buffers - * from a new page in memory. We should only do this if we have - * not expanded the buffer cache to the maximum size that we allow. - */ -static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size) -{ - struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE]; - int isize = BUFSIZE_INDEX(size); - unsigned long offset; - unsigned long page; - int nblock; - - page = get_free_page(GFP_NOBUFFER); - if(!page) return 0; - - bh = create_buffers(page, size); - if (!bh) { - free_page(page); - return 0; - }; - nblock = block; - for (offset = 0 ; offset < PAGE_SIZE ; offset += size) { - if (find_buffer(dev, nblock++, size)) - goto not_aligned; - } - tmp = bh; - nblock = 0; - while (1) { - arr[nblock++] = bh; - bh->b_count = 1; - bh->b_flushtime = 0; - bh->b_state = 0; - bh->b_dev = dev; - bh->b_list = BUF_CLEAN; - bh->b_blocknr = block++; - nr_buffers++; - nr_buffers_size[isize]++; - insert_into_queues(bh); - if (bh->b_this_page) - bh = bh->b_this_page; - else - break; - } - buffermem += PAGE_SIZE; - mem_map[MAP_NR(page)].buffers = bh; - bh->b_this_page = tmp; - while (nblock-- > 0) - brelse(arr[nblock]); - return 4; /* ?? */ -not_aligned: - while ((tmp = bh) != NULL) { - bh = bh->b_this_page; - put_unused_buffer_head(tmp); - } - free_page(page); - return 0; -} - -unsigned long generate_cluster(kdev_t dev, int b[], int size) -{ - int i, offset; - - for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) { - if(i && b[i]-1 != b[i-1]) return 0; /* No need to cluster */ - if(find_buffer(dev, b[i], size)) return 0; + printk("%8s: %d buffers, %d used (last=%d), " + "%d locked, %d protected, %d dirty\n", + buf_types[nlist], found, used, lastused, + locked, protected, dirty); }; - - /* OK, we have a candidate for a new cluster */ - - /* See if one size of buffer is over-represented in the buffer cache, - if so reduce the numbers of buffers */ - if(maybe_shrink_lav_buffers(size)) - { - int retval; - retval = try_to_generate_cluster(dev, b[0], size); - if(retval) return retval; - }; - - if (nr_free_pages > min_free_pages*2) - return try_to_generate_cluster(dev, b[0], size); - else - return reassign_cluster(dev, b[0], size); } -unsigned long generate_cluster_swab32(kdev_t dev, int b[], int size) -{ - int i, offset; - - for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) { - if(i && le32_to_cpu(b[i])-1 != - le32_to_cpu(b[i-1])) return 0; /* No need to cluster */ - if(find_buffer(dev, le32_to_cpu(b[i]), size)) return 0; - }; - - /* OK, we have a candidate for a new cluster */ - - /* See if one size of buffer is over-represented in the buffer cache, - if so reduce the numbers of buffers */ - if(maybe_shrink_lav_buffers(size)) - { - int retval; - retval = try_to_generate_cluster(dev, le32_to_cpu(b[0]), size); - if(retval) return retval; - }; - - if (nr_free_pages > min_free_pages*2) - return try_to_generate_cluster(dev, le32_to_cpu(b[0]), size); - else - return reassign_cluster(dev, le32_to_cpu(b[0]), size); -} /* ===================== Init ======================= */ /* - * This initializes the initial buffer free list. nr_buffers_type is set - * to one less the actual number of buffers, as a sop to backwards - * compatibility --- the old code did this (I think unintentionally, - * but I'm not sure), and programs in the ps package expect it. - * - TYT 8/30/92 + * allocate the hash table and init the free list + * Use gfp() for the hash table to decrease TLB misses, use + * SLAB cache for buffer heads. */ void buffer_init(void) { - int i; - int isize = BUFSIZE_INDEX(BLOCK_SIZE); - long memsize = max_mapnr << PAGE_SHIFT; - - if (memsize >= 64*1024*1024) - nr_hash = 65521; - else if (memsize >= 32*1024*1024) - nr_hash = 32749; - else if (memsize >= 16*1024*1024) - nr_hash = 16381; - else if (memsize >= 8*1024*1024) - nr_hash = 8191; - else if (memsize >= 4*1024*1024) - nr_hash = 4093; - else nr_hash = 997; - - hash_table = (struct buffer_head **) vmalloc(nr_hash * - sizeof(struct buffer_head *)); - + hash_table = (struct buffer_head **) + __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER, 0); + if (!hash_table) + panic("Failed to allocate buffer hash table\n"); + memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *)); + + bh_cachep = kmem_cache_create("buffer_head", + sizeof(struct buffer_head), + sizeof(unsigned long) * 4, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if(!bh_cachep) + panic("Cannot create buffer head SLAB cache\n"); - for (i = 0 ; i < nr_hash ; i++) - hash_table[i] = NULL; lru_list[BUF_CLEAN] = 0; grow_buffers(GFP_KERNEL, BLOCK_SIZE); - if (!free_list[isize]) - panic("VFS: Unable to initialize buffer free list!"); - return; } @@ -1966,7 +1568,7 @@ static void wakeup_bdflush(int wait) asmlinkage int sync_old_buffers(void) { - int i, isize; + int i; int ndirty, nwritten; int nlist; int ncount; @@ -1985,6 +1587,7 @@ asmlinkage int sync_old_buffers(void) ndirty = 0; nwritten = 0; repeat: + bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { @@ -2022,13 +1625,6 @@ asmlinkage int sync_old_buffers(void) printk("Wrote %d/%d buffers\n", nwritten, ndirty); #endif - /* We assume that we only come through here on a regular - schedule, like every 5 seconds. Now update load averages. - Shift usage counts to prevent overflow. */ - for(isize = 0; isize<NR_SIZES; isize++){ - CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]); - buffer_usage[isize] = 0; - } return 0; } @@ -2040,37 +1636,42 @@ asmlinkage int sync_old_buffers(void) asmlinkage int sys_bdflush(int func, long data) { - int i, error; + int i, error = -EPERM; + lock_kernel(); if (!suser()) - return -EPERM; + goto out; - if (func == 1) - return sync_old_buffers(); + if (func == 1) { + error = sync_old_buffers(); + goto out; + } /* Basically func 1 means read param 1, 2 means write param 1, etc */ if (func >= 2) { i = (func-2) >> 1; + error = -EINVAL; if (i < 0 || i >= N_PARAM) - return -EINVAL; + goto out; if((func & 1) == 0) { - error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int)); - if (error) - return error; - put_user(bdf_prm.data[i], (int*)data); - return 0; - }; + error = put_user(bdf_prm.data[i], (int*)data); + goto out; + } if (data < bdflush_min[i] || data > bdflush_max[i]) - return -EINVAL; + goto out; bdf_prm.data[i] = data; - return 0; + error = 0; + goto out; }; /* Having func 0 used to launch the actual bdflush and then never - return (unless explicitly killed). We return zero here to - remain semi-compatible with present update(8) programs. */ - - return 0; + * return (unless explicitly killed). We return zero here to + * remain semi-compatible with present update(8) programs. + */ + error = 0; +out: + unlock_kernel(); + return error; } /* This is the actual bdflush daemon itself. It used to be started from @@ -2111,11 +1712,7 @@ int bdflush(void * unused) * and other internals and thus be subject to the SMP locking * rules. (On a uniprocessor box this does nothing). */ - -#ifdef __SMP__ lock_kernel(); - syscall_count++; -#endif for (;;) { #ifdef DEBUG @@ -2132,6 +1729,7 @@ int bdflush(void * unused) ndirty = 0; refilled = 0; repeat: + bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; @@ -2192,29 +1790,9 @@ int bdflush(void * unused) /* If there are still a lot of dirty buffers around, skip the sleep and flush some more */ - - if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * - bdf_prm.b_un.nfract/100) { + if(nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { current->signal = 0; interruptible_sleep_on(&bdflush_wait); } } } - - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-indent-level: 8 - * c-brace-imaginary-offset: 0 - * c-brace-offset: -8 - * c-argdecl-indent: 8 - * c-label-offset: -8 - * c-continued-statement-offset: 8 - * c-continued-brace-offset: 0 - * End: - */ |