summaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c1248
1 files changed, 413 insertions, 835 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 043e35b6c..27950290a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -10,13 +10,16 @@
* data, of course), but instead letting the caller do it.
*/
-/*
- * NOTE! There is one discordant note here: checking floppies for
- * disk change. This is where it fits best, I think, as it should
- * invalidate changed floppy-disk-caches.
- */
-
/* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
+/* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
+
+/* Removed a lot of unnecessary code and simplified things now that
+ * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
+ */
+
+/* Speed up hash, lru, and free list operations. Use gfp() for allocating
+ * hash table, use SLAB cache for buffer heads. -DaveM
+ */
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -25,12 +28,14 @@
#include <linux/locks.h>
#include <linux/errno.h>
#include <linux/malloc.h>
+#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/swapctl.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -40,39 +45,37 @@
#define NR_SIZES 5
static char buffersize_index[17] =
{-1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
-static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
+#define MAX_UNUSED_BUFFERS 30 /* don't ever have more than this number of
+ unused buffer heads */
+#define HASH_PAGES 4 /* number of pages to use for the hash table */
+#define HASH_PAGES_ORDER 2
+#define NR_HASH (HASH_PAGES*PAGE_SIZE/sizeof(struct buffer_head *))
+#define HASH_MASK (NR_HASH-1)
static int grow_buffers(int pri, int size);
-static int shrink_specific_buffers(unsigned int priority, int size);
-static int maybe_shrink_lav_buffers(int);
-static int nr_hash = 0; /* Size of hash table */
static struct buffer_head ** hash_table;
static struct buffer_head * lru_list[NR_LIST] = {NULL, };
-/* next_to_age is an array of pointers into the lru lists, used to
- cycle through the buffers aging their contents when deciding which
- buffers to discard when more memory is needed */
-static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
static struct buffer_head * free_list[NR_SIZES] = {NULL, };
+static kmem_cache_t *bh_cachep;
+
static struct buffer_head * unused_list = NULL;
-struct buffer_head * reuse_list = NULL;
+static struct buffer_head * reuse_list = NULL;
static struct wait_queue * buffer_wait = NULL;
-int nr_buffers = 0;
-int nr_buffers_type[NR_LIST] = {0,};
-int nr_buffers_size[NR_SIZES] = {0,};
-int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
-int buffer_usage[NR_SIZES] = {0,}; /* Usage counts used to determine load average */
-int buffers_lav[NR_SIZES] = {0,}; /* Load average of buffer usage */
-int nr_free[NR_SIZES] = {0,};
+static int nr_buffers = 0;
+static int nr_buffers_type[NR_LIST] = {0,};
+static int nr_buffer_heads = 0;
+static int nr_unused_buffer_heads = 0;
+static int refilled = 0; /* Set NZ when a buffer freelist is refilled
+ this is used by the loop device */
+
+/* This is used by some architectures to estimate available memory. */
int buffermem = 0;
-int nr_buffer_heads = 0;
-int refilled = 0; /* Set NZ when a buffer freelist is refilled */
-extern int *blksize_size[];
/* Here is the parameter block for the bdflush process. If you add or
* remove any of the parameters, make sure to update kernel/sysctl.c.
@@ -81,8 +84,10 @@ extern int *blksize_size[];
static void wakeup_bdflush(int);
#define N_PARAM 9
-#define LAV
+/* The dummy values in this structure are left in there for compatibility
+ * with old programs that play with the /proc entries.
+ */
union bdflush_param{
struct {
int nfract; /* Percentage of buffer cache dirty to
@@ -93,26 +98,17 @@ union bdflush_param{
each time we call refill */
int nref_dirt; /* Dirty buffer threshold for activating bdflush
when trying to refill buffers. */
- int clu_nfract; /* Percentage of buffer cache to scan to
- search for free clusters */
+ int dummy1; /* unused */
int age_buffer; /* Time for normal buffer to age before
we flush it */
int age_super; /* Time for superblock to age before we
flush it */
- int lav_const; /* Constant used for load average (time
- constant */
- int lav_ratio; /* Used to determine how low a lav for a
- particular size can go before we start to
- trim back the buffers */
+ int dummy2; /* unused */
+ int dummy3; /* unused */
} b_un;
unsigned int data[N_PARAM];
} bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
-/* The lav constant is set for 1 minute, as long as the update process runs
- every 5 seconds. If you change the frequency of update, the time
- constant will also change. */
-
-
/* These are the min and max parameter values that we will allow to be assigned */
int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1};
int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
@@ -145,29 +141,31 @@ repeat:
}
/* Call sync_buffers with wait!=0 to ensure that the call does not
- return until all buffer writes have completed. Sync() may return
- before the writes have finished; fsync() may not. */
-
+ * return until all buffer writes have completed. Sync() may return
+ * before the writes have finished; fsync() may not.
+ */
/* Godamity-damn. Some buffers (bitmaps for filesystems)
- spontaneously dirty themselves without ever brelse being called.
- We will ultimately want to put these in a separate list, but for
- now we search all of the lists for dirty buffers */
-
+ * spontaneously dirty themselves without ever brelse being called.
+ * We will ultimately want to put these in a separate list, but for
+ * now we search all of the lists for dirty buffers.
+ */
static int sync_buffers(kdev_t dev, int wait)
{
int i, retry, pass = 0, err = 0;
struct buffer_head * bh, *next;
/* One pass for no-wait, three for wait:
- 0) write out all dirty, unlocked buffers;
- 1) write out all dirty buffers, waiting if locked;
- 2) wait for completion by waiting for all buffers to unlock. */
+ * 0) write out all dirty, unlocked buffers;
+ * 1) write out all dirty buffers, waiting if locked;
+ * 2) wait for completion by waiting for all buffers to unlock.
+ */
do {
retry = 0;
repeat:
- /* We search all lists as a failsafe mechanism, not because we expect
- there to be dirty buffers on any of the other lists. */
+ /* We search all lists as a failsafe mechanism, not because we expect
+ * there to be dirty buffers on any of the other lists.
+ */
bh = lru_list[BUF_DIRTY];
if (!bh)
goto repeat2;
@@ -181,7 +179,8 @@ repeat:
continue;
if (buffer_locked(bh)) {
/* Buffer is locked; skip it unless wait is
- requested AND pass > 0. */
+ * requested AND pass > 0.
+ */
if (!wait || !pass) {
retry = 1;
continue;
@@ -189,18 +188,27 @@ repeat:
wait_on_buffer (bh);
goto repeat;
}
+
/* If an unlocked buffer is not uptodate, there has
- been an IO error. Skip it. */
+ * been an IO error. Skip it.
+ */
if (wait && buffer_req(bh) && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_uptodate(bh)) {
err = 1;
continue;
}
+
/* Don't write clean buffers. Don't write ANY buffers
- on the third pass. */
+ * on the third pass.
+ */
if (!buffer_dirty(bh) || pass >= 2)
continue;
- /* don't bother about locked buffers */
+
+ /* Don't bother about locked buffers.
+ *
+ * XXX We checked if it was locked above and there is no
+ * XXX way we could have slept in between. -DaveM
+ */
if (buffer_locked(bh))
continue;
bh->b_count++;
@@ -226,7 +234,8 @@ repeat:
continue;
if (buffer_locked(bh)) {
/* Buffer is locked; skip it unless wait is
- requested AND pass > 0. */
+ * requested AND pass > 0.
+ */
if (!wait || !pass) {
retry = 1;
continue;
@@ -236,10 +245,11 @@ repeat:
}
}
- /* If we are waiting for the sync to succeed, and if any dirty
- blocks were written, then repeat; on the second pass, only
- wait for buffers being written (do not pass to write any
- more buffers on the second pass). */
+ /* If we are waiting for the sync to succeed, and if any dirty
+ * blocks were written, then repeat; on the second pass, only
+ * wait for buffers being written (do not pass to write any
+ * more buffers on the second pass).
+ */
} while (wait && retry && ++pass<=2);
return err;
}
@@ -264,7 +274,9 @@ int fsync_dev(kdev_t dev)
asmlinkage int sys_sync(void)
{
+ lock_kernel();
fsync_dev(0);
+ unlock_kernel();
return 0;
}
@@ -277,29 +289,39 @@ asmlinkage int sys_fsync(unsigned int fd)
{
struct file * file;
struct inode * inode;
+ int err = 0;
+ lock_kernel();
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
- return -EBADF;
- if (!file->f_op || !file->f_op->fsync)
- return -EINVAL;
- if (file->f_op->fsync(inode,file))
- return -EIO;
- return 0;
+ err = -EBADF;
+ else if (!file->f_op || !file->f_op->fsync)
+ err = -EINVAL;
+ else if (file->f_op->fsync(inode,file))
+ err = -EIO;
+ unlock_kernel();
+ return err;
}
asmlinkage int sys_fdatasync(unsigned int fd)
{
struct file * file;
struct inode * inode;
+ int err = -EBADF;
+ lock_kernel();
if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
- return -EBADF;
+ goto out;
+ err = -EINVAL;
if (!file->f_op || !file->f_op->fsync)
- return -EINVAL;
+ goto out;
/* this needs further work, at the moment it is identical to fsync() */
if (file->f_op->fsync(inode,file))
- return -EIO;
- return 0;
+ err = -EIO;
+ else
+ err = 0;
+out:
+ unlock_kernel();
+ return err;
}
void invalidate_buffers(kdev_t dev)
@@ -327,18 +349,17 @@ void invalidate_buffers(kdev_t dev)
}
}
-#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
+#define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))&HASH_MASK)
#define hash(dev,block) hash_table[_hashfn(dev,block)]
static inline void remove_from_hash_queue(struct buffer_head * bh)
{
- if (bh->b_next)
- bh->b_next->b_prev = bh->b_prev;
- if (bh->b_prev)
- bh->b_prev->b_next = bh->b_next;
- if (hash(bh->b_dev,bh->b_blocknr) == bh)
- hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
- bh->b_next = bh->b_prev = NULL;
+ if (bh->b_pprev) {
+ if(bh->b_next)
+ bh->b_next->b_pprev = bh->b_pprev;
+ *bh->b_pprev = bh->b_next;
+ bh->b_pprev = NULL;
+ }
}
static inline void remove_from_lru_list(struct buffer_head * bh)
@@ -354,11 +375,6 @@ static inline void remove_from_lru_list(struct buffer_head * bh)
lru_list[bh->b_list] = bh->b_next_free;
if (lru_list[bh->b_list] == bh)
lru_list[bh->b_list] = NULL;
- if (next_to_age[bh->b_list] == bh)
- next_to_age[bh->b_list] = bh->b_next_free;
- if (next_to_age[bh->b_list] == bh)
- next_to_age[bh->b_list] = NULL;
-
bh->b_next_free = bh->b_prev_free = NULL;
}
@@ -371,7 +387,6 @@ static inline void remove_from_free_list(struct buffer_head * bh)
panic("Free list corrupted");
if(!free_list[isize])
panic("Free list empty");
- nr_free[isize]--;
if(bh->b_next_free == bh)
free_list[isize] = NULL;
else {
@@ -391,58 +406,55 @@ static inline void remove_from_queues(struct buffer_head * bh)
return;
}
nr_buffers_type[bh->b_list]--;
- nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
remove_from_hash_queue(bh);
remove_from_lru_list(bh);
}
static inline void put_last_lru(struct buffer_head * bh)
{
- if (!bh)
- return;
- if (bh == lru_list[bh->b_list]) {
- lru_list[bh->b_list] = bh->b_next_free;
- if (next_to_age[bh->b_list] == bh)
- next_to_age[bh->b_list] = bh->b_next_free;
- return;
- }
- if(bh->b_dev == B_FREE)
- panic("Wrong block for lru list");
- remove_from_lru_list(bh);
-/* add to back of free list */
+ if (bh) {
+ struct buffer_head **bhp = &lru_list[bh->b_list];
- if(!lru_list[bh->b_list]) {
- lru_list[bh->b_list] = bh;
- lru_list[bh->b_list]->b_prev_free = bh;
- }
- if (!next_to_age[bh->b_list])
- next_to_age[bh->b_list] = bh;
+ if (bh == *bhp) {
+ *bhp = bh->b_next_free;
+ return;
+ }
+
+ if(bh->b_dev == B_FREE)
+ panic("Wrong block for lru list");
+
+ /* Add to back of free list. */
+ remove_from_lru_list(bh);
+ if(!*bhp) {
+ *bhp = bh;
+ (*bhp)->b_prev_free = bh;
+ }
- bh->b_next_free = lru_list[bh->b_list];
- bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
- lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
- lru_list[bh->b_list]->b_prev_free = bh;
+ bh->b_next_free = *bhp;
+ bh->b_prev_free = (*bhp)->b_prev_free;
+ (*bhp)->b_prev_free->b_next_free = bh;
+ (*bhp)->b_prev_free = bh;
+ }
}
static inline void put_last_free(struct buffer_head * bh)
{
- int isize;
- if (!bh)
- return;
+ if (bh) {
+ struct buffer_head **bhp = &free_list[BUFSIZE_INDEX(bh->b_size)];
- isize = BUFSIZE_INDEX(bh->b_size);
- bh->b_dev = B_FREE; /* So it is obvious we are on the free list */
- /* add to back of free list */
- if(!free_list[isize]) {
- free_list[isize] = bh;
- bh->b_prev_free = bh;
- }
+ bh->b_dev = B_FREE; /* So it is obvious we are on the free list. */
- nr_free[isize]++;
- bh->b_next_free = free_list[isize];
- bh->b_prev_free = free_list[isize]->b_prev_free;
- free_list[isize]->b_prev_free->b_next_free = bh;
- free_list[isize]->b_prev_free = bh;
+ /* Add to back of free list. */
+ if(!*bhp) {
+ *bhp = bh;
+ bh->b_prev_free = bh;
+ }
+
+ bh->b_next_free = *bhp;
+ bh->b_prev_free = (*bhp)->b_prev_free;
+ (*bhp)->b_prev_free->b_next_free = bh;
+ (*bhp)->b_prev_free = bh;
+ }
}
static inline void insert_into_queues(struct buffer_head * bh)
@@ -450,30 +462,34 @@ static inline void insert_into_queues(struct buffer_head * bh)
/* put at end of free list */
if(bh->b_dev == B_FREE) {
put_last_free(bh);
- return;
- }
- if(!lru_list[bh->b_list]) {
- lru_list[bh->b_list] = bh;
- bh->b_prev_free = bh;
+ } else {
+ struct buffer_head **bhp = &lru_list[bh->b_list];
+
+ if(!*bhp) {
+ *bhp = bh;
+ bh->b_prev_free = bh;
+ }
+
+ if (bh->b_next_free)
+ panic("VFS: buffer LRU pointers corrupted");
+
+ bh->b_next_free = *bhp;
+ bh->b_prev_free = (*bhp)->b_prev_free;
+ (*bhp)->b_prev_free->b_next_free = bh;
+ (*bhp)->b_prev_free = bh;
+
+ nr_buffers_type[bh->b_list]++;
+
+ /* Put the buffer in new hash-queue if it has a device. */
+ if (bh->b_dev) {
+ struct buffer_head **bhp = &hash(bh->b_dev, bh->b_blocknr);
+ if((bh->b_next = *bhp) != NULL)
+ (*bhp)->b_pprev = &bh->b_next;
+ *bhp = bh;
+ bh->b_pprev = bhp; /* Exists in bh hashes. */
+ } else
+ bh->b_pprev = NULL; /* Not in bh hashes. */
}
- if (!next_to_age[bh->b_list])
- next_to_age[bh->b_list] = bh;
- if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
- bh->b_next_free = lru_list[bh->b_list];
- bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
- lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
- lru_list[bh->b_list]->b_prev_free = bh;
- nr_buffers_type[bh->b_list]++;
- nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
-/* put the buffer in new hash-queue if it has a device */
- bh->b_prev = NULL;
- bh->b_next = NULL;
- if (!(bh->b_dev))
- return;
- bh->b_next = hash(bh->b_dev,bh->b_blocknr);
- hash(bh->b_dev,bh->b_blocknr) = bh;
- if (bh->b_next)
- bh->b_next->b_prev = bh;
}
static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
@@ -481,14 +497,14 @@ static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
struct buffer_head * tmp;
for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
- if (tmp->b_blocknr == block && tmp->b_dev == dev)
+ if (tmp->b_blocknr == block && tmp->b_dev == dev) {
if (tmp->b_size == size)
return tmp;
- else {
- printk("VFS: Wrong blocksize on device %s\n",
- kdevname(dev));
- return NULL;
- }
+
+ printk("VFS: Wrong blocksize on device %s\n",
+ kdevname(dev));
+ return NULL;
+ }
return NULL;
}
@@ -508,15 +524,36 @@ struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
return NULL;
bh->b_count++;
wait_on_buffer(bh);
- if (bh->b_dev == dev && bh->b_blocknr == block
- && bh->b_size == size)
+ if (bh->b_dev == dev &&
+ bh->b_blocknr == block &&
+ bh->b_size == size)
return bh;
bh->b_count--;
}
}
+unsigned int get_hardblocksize(kdev_t dev)
+{
+ /*
+ * Get the hard sector size for the given device. If we don't know
+ * what it is, return 0.
+ */
+ if (hardsect_size[MAJOR(dev)] != NULL) {
+ int blksize = hardsect_size[MAJOR(dev)][MINOR(dev)];
+ if (blksize != 0)
+ return blksize;
+ }
+
+ /*
+ * We don't know what the hardware sector size for this device is.
+ * Return 0 indicating that we don't know.
+ */
+ return 0;
+}
+
void set_blocksize(kdev_t dev, int size)
{
+ extern int *blksize_size[];
int i, nlist;
struct buffer_head * bh, *bhnext;
@@ -540,13 +577,15 @@ void set_blocksize(kdev_t dev, int size)
sync_buffers(dev, 2);
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
- /* We need to be quite careful how we do this - we are moving entries
- around on the free list, and we can get in a loop if we are not careful.*/
-
+ /* We need to be quite careful how we do this - we are moving entries
+ * around on the free list, and we can get in a loop if we are not careful.
+ */
for(nlist = 0; nlist < NR_LIST; nlist++) {
bh = lru_list[nlist];
for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
- if(!bh) break;
+ if(!bh)
+ break;
+
bhnext = bh->b_next_free;
if (bh->b_dev != dev)
continue;
@@ -565,191 +604,154 @@ void set_blocksize(kdev_t dev, int size)
}
}
-#define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
-
-void refill_freelist(int size)
+/* Check if a buffer is OK to be reclaimed. */
+static inline int can_reclaim(struct buffer_head *bh, int size)
{
- struct buffer_head * bh, * tmp;
- struct buffer_head * candidate[NR_LIST];
- unsigned int best_time, winner;
- int isize = BUFSIZE_INDEX(size);
- int buffers[NR_LIST];
- int i;
- int needed;
+ if (bh->b_count ||
+ buffer_protected(bh) ||
+ buffer_locked(bh))
+ return 0;
+
+ if (atomic_read(&mem_map[MAP_NR((unsigned long) bh->b_data)].count) != 1 ||
+ buffer_dirty(bh)) {
+ refile_buffer(bh);
+ return 0;
+ }
- /* First see if we even need this. Sometimes it is advantageous
- to request some blocks in a filesystem that we know that we will
- be needing ahead of time. */
+ if (bh->b_size != size)
+ return 0;
- if (nr_free[isize] > 100)
- return;
+ return 1;
+}
- ++refilled;
- /* If there are too many dirty buffers, we wake up the update process
- now so as to ensure that there are still clean buffers available
- for user processes to use (and dirty) */
+/* Find a candidate buffer to be reclaimed. */
+static struct buffer_head *find_candidate(struct buffer_head *list,
+ int *list_len, int size)
+{
+ struct buffer_head *bh;
- /* We are going to try to locate this much memory */
- needed =bdf_prm.b_un.nrefill * size;
+ for (bh = list;
+ bh && (*list_len) > 0;
+ bh = bh->b_next_free, (*list_len)--) {
+ if (size != bh->b_size) {
+ /* This provides a mechanism for freeing blocks
+ * of other sizes, this is necessary now that we
+ * no longer have the lav code.
+ */
+ try_to_free_buffer(bh,&bh,1);
+ if (!bh)
+ break;
+ continue;
+ }
- while (nr_free_pages > min_free_pages*2 && needed > 0 &&
- grow_buffers(GFP_BUFFER, size)) {
- needed -= PAGE_SIZE;
+ if (buffer_locked(bh) &&
+ (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
+ /* Buffers are written in the order they are placed
+ * on the locked list. If we encounter a locked
+ * buffer here, this means that the rest of them
+ * are also locked.
+ */
+ (*list_len) = 0;
+ return NULL;
+ }
+
+ if (can_reclaim(bh,size))
+ return bh;
}
- if(needed <= 0) return;
+ return NULL;
+}
+
+static void refill_freelist(int size)
+{
+ struct buffer_head * bh;
+ struct buffer_head * candidate[BUF_DIRTY];
+ unsigned int best_time, winner;
+ int buffers[BUF_DIRTY];
+ int i;
+ int needed;
- /* See if there are too many buffers of a different size.
- If so, victimize them */
+ refilled = 1;
+ /* If there are too many dirty buffers, we wake up the update process
+ * now so as to ensure that there are still clean buffers available
+ * for user processes to use (and dirty).
+ */
+
+ /* We are going to try to locate this much memory. */
+ needed = bdf_prm.b_un.nrefill * size;
- while(maybe_shrink_lav_buffers(size))
- {
- if(!grow_buffers(GFP_BUFFER, size)) break;
- needed -= PAGE_SIZE;
- if(needed <= 0) return;
- };
+ while ((nr_free_pages > min_free_pages*2) &&
+ (needed > 0) &&
+ grow_buffers(GFP_BUFFER, size))
+ needed -= PAGE_SIZE;
+repeat:
/* OK, we cannot grow the buffer cache, now try to get some
- from the lru list */
+ * from the lru list.
+ *
+ * First set the candidate pointers to usable buffers. This
+ * should be quick nearly all of the time.
+ */
- /* First set the candidate pointers to usable buffers. This
- should be quick nearly all of the time. */
+ if(needed <= 0)
+ return;
-repeat0:
- for(i=0; i<NR_LIST; i++){
- if(i == BUF_DIRTY || i == BUF_SHARED ||
- nr_buffers_type[i] == 0) {
- candidate[i] = NULL;
- buffers[i] = 0;
- continue;
- }
+ for(i=0; i<BUF_DIRTY; i++) {
buffers[i] = nr_buffers_type[i];
- for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
- {
- if(buffers[i] < 0) panic("Here is the problem");
- tmp = bh->b_next_free;
- if (!bh) break;
-
- if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
- buffer_dirty(bh)) {
- refile_buffer(bh);
- continue;
- }
-
- if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
- continue;
-
- /* Buffers are written in the order they are placed
- on the locked list. If we encounter a locked
- buffer here, this means that the rest of them
- are also locked */
- if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
- buffers[i] = 0;
- break;
- }
-
- if (BADNESS(bh)) continue;
- break;
- };
- if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
- else candidate[i] = bh;
- if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
+ candidate[i] = find_candidate(lru_list[i], &buffers[i], size);
}
- repeat:
- if(needed <= 0) return;
-
- /* Now see which candidate wins the election */
+ /* Now see which candidate wins the election. */
winner = best_time = UINT_MAX;
- for(i=0; i<NR_LIST; i++){
- if(!candidate[i]) continue;
- if(candidate[i]->b_lru_time < best_time){
+ for(i=0; i<BUF_DIRTY; i++) {
+ if(!candidate[i])
+ continue;
+ if(candidate[i]->b_lru_time < best_time) {
best_time = candidate[i]->b_lru_time;
winner = i;
}
}
- /* If we have a winner, use it, and then get a new candidate from that list */
+ /* If we have a winner, use it, and then get a new candidate from that list. */
if(winner != UINT_MAX) {
i = winner;
- bh = candidate[i];
- candidate[i] = bh->b_next_free;
- if(candidate[i] == bh) candidate[i] = NULL; /* Got last one */
- if (bh->b_count || bh->b_size != size)
- panic("Busy buffer in candidate list\n");
- if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
- panic("Shared buffer in candidate list\n");
- if (buffer_protected(bh))
- panic("Protected buffer in candidate list\n");
- if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
+ while (needed>0 && (bh=candidate[i])) {
+ candidate[i] = bh->b_next_free;
+ if(candidate[i] == bh)
+ candidate[i] = NULL; /* Got last one */
+ remove_from_queues(bh);
+ bh->b_dev = B_FREE;
+ put_last_free(bh);
+ needed -= bh->b_size;
+ buffers[i]--;
+ if(buffers[i] == 0)
+ candidate[i] = NULL;
- if(bh->b_dev == B_FREE)
- panic("Wrong list");
- remove_from_queues(bh);
- bh->b_dev = B_FREE;
- put_last_free(bh);
- needed -= bh->b_size;
- buffers[i]--;
- if(buffers[i] < 0) panic("Here is the problem");
-
- if(buffers[i] == 0) candidate[i] = NULL;
-
- /* Now all we need to do is advance the candidate pointer
- from the winner list to the next usable buffer */
- if(candidate[i] && buffers[i] > 0){
- if(buffers[i] <= 0) panic("Here is another problem");
- for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
- if(buffers[i] < 0) panic("Here is the problem");
- tmp = bh->b_next_free;
- if (!bh) break;
-
- if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
- buffer_dirty(bh)) {
- refile_buffer(bh);
- continue;
- };
-
- if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
- continue;
-
- /* Buffers are written in the order they are
- placed on the locked list. If we encounter
- a locked buffer here, this means that the
- rest of them are also locked */
- if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
- buffers[i] = 0;
- break;
- }
-
- if (BADNESS(bh)) continue;
- break;
- };
- if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
- else candidate[i] = bh;
- if(candidate[i] && candidate[i]->b_count)
- panic("Here is the problem");
+ if (candidate[i] && !can_reclaim(candidate[i],size))
+ candidate[i] = find_candidate(candidate[i],
+ &buffers[i], size);
}
-
- goto repeat;
+ if (needed >= 0)
+ goto repeat;
}
- if(needed <= 0) return;
+ if(needed <= 0)
+ return;
/* Too bad, that was not enough. Try a little harder to grow some. */
-
if (nr_free_pages > min_free_pages + 5) {
if (grow_buffers(GFP_BUFFER, size)) {
needed -= PAGE_SIZE;
- goto repeat0;
- };
+ goto repeat;
+ }
}
- /* and repeat until we find something good */
+ /* And repeat until we find something good. */
if (!grow_buffers(GFP_ATOMIC, size))
wakeup_bdflush(1);
needed -= PAGE_SIZE;
- goto repeat0;
+ goto repeat;
}
/*
@@ -767,12 +769,10 @@ struct buffer_head * getblk(kdev_t dev, int block, int size)
struct buffer_head * bh;
int isize = BUFSIZE_INDEX(size);
- /* Update this for the buffer size lav. */
- buffer_usage[isize]++;
-
/* If there are too many dirty buffers, we wake up the update process
- now so as to ensure that there are still clean buffers available
- for user processes to use (and dirty) */
+ * now so as to ensure that there are still clean buffers available
+ * for user processes to use (and dirty).
+ */
repeat:
bh = get_hash_table(dev, block, size);
if (bh) {
@@ -785,7 +785,8 @@ repeat:
return bh;
}
- while(!free_list[isize]) refill_freelist(size);
+ while(!free_list[isize])
+ refill_freelist(size);
if (find_buffer(dev,block,size))
goto repeat;
@@ -793,8 +794,9 @@ repeat:
bh = free_list[isize];
remove_from_free_list(bh);
-/* OK, FINALLY we know that this buffer is the only one of its kind, */
-/* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
+ /* OK, FINALLY we know that this buffer is the only one of its kind,
+ * and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean.
+ */
bh->b_count=1;
bh->b_flushtime=0;
bh->b_state=(1<<BH_Touched);
@@ -809,7 +811,7 @@ void set_writetime(struct buffer_head * buf, int flag)
int newtime;
if (buffer_dirty(buf)) {
- /* Move buffer to dirty list if jiffies is clear */
+ /* Move buffer to dirty list if jiffies is clear. */
newtime = jiffies + (flag ? bdf_prm.b_un.age_super :
bdf_prm.b_un.age_buffer);
if(!buf->b_flushtime || buf->b_flushtime > newtime)
@@ -827,7 +829,6 @@ void set_writetime(struct buffer_head * buf, int flag)
void refile_buffer(struct buffer_head * buf)
{
int dispose;
- int isize;
if(buf->b_dev == B_FREE) {
printk("Attempt to refile free buffer\n");
@@ -835,17 +836,14 @@ void refile_buffer(struct buffer_head * buf)
}
if (buffer_dirty(buf))
dispose = BUF_DIRTY;
- else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
- dispose = BUF_SHARED;
else if (buffer_locked(buf))
dispose = BUF_LOCKED;
- else if (buf->b_list == BUF_SHARED)
- dispose = BUF_UNSHARED;
else
dispose = BUF_CLEAN;
- if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
- if(dispose != buf->b_list) {
- if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
+ if(dispose == BUF_CLEAN)
+ buf->b_lru_time = jiffies;
+ if(dispose != buf->b_list) {
+ if(dispose == BUF_DIRTY)
buf->b_lru_time = jiffies;
if(dispose == BUF_LOCKED &&
(buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
@@ -854,19 +852,21 @@ void refile_buffer(struct buffer_head * buf)
buf->b_list = dispose;
insert_into_queues(buf);
if (dispose == BUF_DIRTY) {
- /* This buffer is dirty, maybe we need to start flushing. */
- /* If too high a percentage of the buffers are dirty... */
- if (nr_buffers_type[BUF_DIRTY] >
- (nr_buffers - nr_buffers_type[BUF_SHARED]) *
- bdf_prm.b_un.nfract/100)
- wakeup_bdflush(0);
- /* If this is a loop device, and
- * more than half of the buffers of this size are dirty... */
- /* (Prevents no-free-buffers deadlock with loop device.) */
- isize = BUFSIZE_INDEX(buf->b_size);
- if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
- nr_buffers_st[isize][BUF_DIRTY]*2>nr_buffers_size[isize])
- wakeup_bdflush(1);
+ int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
+
+ /* This buffer is dirty, maybe we need to start flushing.
+ * If too high a percentage of the buffers are dirty...
+ */
+ if (nr_buffers_type[BUF_DIRTY] > too_many)
+ wakeup_bdflush(0);
+
+ /* If this is a loop device, and
+ * more than half of the buffers are dirty...
+ * (Prevents no-free-buffers deadlock with loop device.)
+ */
+ if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
+ nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
+ wakeup_bdflush(1);
}
}
}
@@ -878,7 +878,7 @@ void __brelse(struct buffer_head * buf)
{
wait_on_buffer(buf);
- /* If dirty, mark the time this buffer should be written back */
+ /* If dirty, mark the time this buffer should be written back. */
set_writetime(buf, 0);
refile_buffer(buf);
@@ -977,13 +977,13 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize,
else bhlist[j++] = bh;
}
- /* Request the read for these buffers, and then release them */
+ /* Request the read for these buffers, and then release them. */
if (j>1)
ll_rw_block(READA, (j-1), bhlist+1);
for(i=1; i<j; i++)
brelse(bhlist[i]);
- /* Wait for this buffer, and then continue on */
+ /* Wait for this buffer, and then continue on. */
bh = bhlist[0];
wait_on_buffer(bh);
if (buffer_uptodate(bh))
@@ -992,11 +992,15 @@ struct buffer_head * breada(kdev_t dev, int block, int bufsize,
return NULL;
}
-/*
- * See fs/inode.c for the weird use of volatile..
- */
static void put_unused_buffer_head(struct buffer_head * bh)
{
+ if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
+ nr_buffer_heads--;
+ kmem_cache_free(bh_cachep, bh);
+ return;
+ }
+ memset(bh,0,sizeof(*bh));
+ nr_unused_buffer_heads++;
bh->b_next_free = unused_list;
unused_list = bh;
wake_up(&buffer_wait);
@@ -1004,24 +1008,20 @@ static void put_unused_buffer_head(struct buffer_head * bh)
static void get_more_buffer_heads(void)
{
- int i;
struct buffer_head * bh;
- for (;;) {
- if (unused_list)
- return;
-
- /*
- * This is critical. We can't swap out pages to get
+ while (!unused_list) {
+ /* This is critical. We can't swap out pages to get
* more buffer heads, because the swap-out may need
- * more buffer-heads itself. Thus GFP_ATOMIC.
+ * more buffer-heads itself. Thus SLAB_ATOMIC.
*/
- bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
- if (bh)
- break;
+ if((bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC)) != NULL) {
+ put_unused_buffer_head(bh);
+ nr_buffer_heads++;
+ return;
+ }
- /*
- * Uhhuh. We're _really_ low on memory. Now we just
+ /* Uhhuh. We're _really_ low on memory. Now we just
* wait for old buffer heads to become free due to
* finishing IO..
*/
@@ -1029,10 +1029,6 @@ static void get_more_buffer_heads(void)
sleep_on(&buffer_wait);
}
- for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
- bh->b_next_free = unused_list; /* only make link */
- unused_list = bh++;
- }
}
/*
@@ -1051,17 +1047,15 @@ static void get_more_buffer_heads(void)
static inline void recover_reusable_buffer_heads(void)
{
if (reuse_list) {
- struct buffer_head *bh;
- unsigned long flags;
+ struct buffer_head *head;
+
+ head = xchg(&reuse_list, NULL);
- save_flags(flags);
do {
- cli();
- bh = reuse_list;
- reuse_list = bh->b_next_free;
- restore_flags(flags);
+ struct buffer_head *bh = head;
+ head = head->b_next_free;
put_unused_buffer_head(bh);
- } while (reuse_list);
+ } while (head);
}
}
@@ -1075,6 +1069,7 @@ static struct buffer_head * get_unused_buffer_head(void)
return NULL;
bh = unused_list;
unused_list = bh->b_next_free;
+ nr_unused_buffer_heads--;
return bh;
}
@@ -1351,7 +1346,7 @@ int generic_readpage(struct inode * inode, struct page * page)
int *p, nr[PAGE_SIZE/512];
int i;
- page->count++;
+ atomic_inc(&page->count);
set_bit(PG_locked, &page->flags);
set_bit(PG_free_after, &page->flags);
@@ -1400,7 +1395,6 @@ static int grow_buffers(int pri, int size)
tmp = bh;
while (1) {
- nr_free[isize]++;
if (insert_point) {
tmp->b_next_free = insert_point->b_next_free;
tmp->b_prev_free = insert_point;
@@ -1412,7 +1406,6 @@ static int grow_buffers(int pri, int size)
}
insert_point = tmp;
++nr_buffers;
- ++nr_buffers_size[isize];
if (tmp->b_this_page)
tmp = tmp->b_this_page;
else
@@ -1442,7 +1435,6 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
{
unsigned long page;
struct buffer_head * tmp, * p;
- int isize = BUFSIZE_INDEX(bh->b_size);
*bhp = bh;
page = (unsigned long) bh->b_data;
@@ -1464,193 +1456,20 @@ int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
p = tmp;
tmp = tmp->b_this_page;
nr_buffers--;
- nr_buffers_size[isize]--;
- if (p == *bhp)
- {
- *bhp = p->b_prev_free;
- if (p == *bhp) /* Was this the last in the list? */
- *bhp = NULL;
- }
+ if (p == *bhp) {
+ *bhp = p->b_prev_free;
+ if (p == *bhp) /* Was this the last in the list? */
+ *bhp = NULL;
+ }
remove_from_queues(p);
put_unused_buffer_head(p);
} while (tmp != bh);
buffermem -= PAGE_SIZE;
mem_map[MAP_NR(page)].buffers = NULL;
free_page(page);
- return !mem_map[MAP_NR(page)].count;
+ return !atomic_read(&mem_map[MAP_NR(page)].count);
}
-/* Age buffers on a given page, according to whether they have been
- visited recently or not. */
-static inline void age_buffer(struct buffer_head *bh)
-{
- struct buffer_head *tmp = bh;
- int touched = 0;
-
- /*
- * When we age a page, we mark all other buffers in the page
- * with the "has_aged" flag. Then, when these aliased buffers
- * come up for aging, we skip them until next pass. This
- * ensures that a page full of multiple buffers only gets aged
- * once per pass through the lru lists.
- */
- if (clear_bit(BH_Has_aged, &bh->b_state))
- return;
-
- do {
- touched |= clear_bit(BH_Touched, &tmp->b_state);
- tmp = tmp->b_this_page;
- set_bit(BH_Has_aged, &tmp->b_state);
- } while (tmp != bh);
- clear_bit(BH_Has_aged, &bh->b_state);
-
- if (touched)
- touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
- else
- age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
-}
-
-/*
- * Consult the load average for buffers and decide whether or not
- * we should shrink the buffers of one size or not. If we decide yes,
- * do it and return 1. Else return 0. Do not attempt to shrink size
- * that is specified.
- *
- * I would prefer not to use a load average, but the way things are now it
- * seems unavoidable. The way to get rid of it would be to force clustering
- * universally, so that when we reclaim buffers we always reclaim an entire
- * page. Doing this would mean that we all need to move towards QMAGIC.
- */
-
-static int maybe_shrink_lav_buffers(int size)
-{
- int nlist;
- int isize;
- int total_lav, total_n_buffers, n_sizes;
-
- /* Do not consider the shared buffers since they would not tend
- to have getblk called very often, and this would throw off
- the lav. They are not easily reclaimable anyway (let the swapper
- make the first move). */
-
- total_lav = total_n_buffers = n_sizes = 0;
- for(nlist = 0; nlist < NR_SIZES; nlist++)
- {
- total_lav += buffers_lav[nlist];
- if(nr_buffers_size[nlist]) n_sizes++;
- total_n_buffers += nr_buffers_size[nlist];
- total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED];
- }
-
- /* See if we have an excessive number of buffers of a particular
- size - if so, victimize that bunch. */
-
- isize = (size ? BUFSIZE_INDEX(size) : -1);
-
- if (n_sizes > 1)
- for(nlist = 0; nlist < NR_SIZES; nlist++)
- {
- if(nlist == isize) continue;
- if(nr_buffers_size[nlist] &&
- bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers <
- total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
- if(shrink_specific_buffers(6, bufferindex_size[nlist]))
- return 1;
- }
- return 0;
-}
-
-/*
- * Try to free up some pages by shrinking the buffer-cache
- *
- * Priority tells the routine how hard to try to shrink the
- * buffers: 6 means "don't bother too much", while a value
- * of 0 means "we'd better get some free pages now".
- *
- * "limit" is meant to limit the shrink-action only to pages
- * that are in the 0 - limit address range, for DMA re-allocations.
- * We ignore that right now.
- */
-
-static int shrink_specific_buffers(unsigned int priority, int size)
-{
- struct buffer_head *bh;
- int nlist;
- int i, isize, isize1;
-
-#ifdef DEBUG
- if(size) printk("Shrinking buffers of size %d\n", size);
-#endif
- /* First try the free lists, and see if we can get a complete page
- from here */
- isize1 = (size ? BUFSIZE_INDEX(size) : -1);
-
- for(isize = 0; isize<NR_SIZES; isize++){
- if(isize1 != -1 && isize1 != isize) continue;
- bh = free_list[isize];
- if(!bh) continue;
- for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
- if (bh->b_count || buffer_protected(bh) ||
- !bh->b_this_page)
- continue;
- if (!age_of((unsigned long) bh->b_data) &&
- try_to_free_buffer(bh, &bh, 6))
- return 1;
- if(!bh) break;
- /* Some interrupt must have used it after we
- freed the page. No big deal - keep looking */
- }
- }
-
- /* Not enough in the free lists, now try the lru list */
-
- for(nlist = 0; nlist < NR_LIST; nlist++) {
- repeat1:
- if(priority > 2 && nlist == BUF_SHARED) continue;
- i = nr_buffers_type[nlist];
- i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
- for ( ; i > 0; i-- ) {
- bh = next_to_age[nlist];
- if (!bh)
- break;
- next_to_age[nlist] = bh->b_next_free;
-
- /* First, age the buffer. */
- age_buffer(bh);
- /* We may have stalled while waiting for I/O
- to complete. */
- if(bh->b_list != nlist) goto repeat1;
- if (bh->b_count || buffer_protected(bh) ||
- !bh->b_this_page)
- continue;
- if(size && bh->b_size != size) continue;
- if (buffer_locked(bh))
- if (priority)
- continue;
- else
- wait_on_buffer(bh);
- if (buffer_dirty(bh)) {
- bh->b_count++;
- bh->b_flushtime = 0;
- ll_rw_block(WRITEA, 1, &bh);
- bh->b_count--;
- continue;
- }
- /* At priority 6, only consider really old
- (age==0) buffers for reclaiming. At
- priority 0, consider any buffers. */
- if ((age_of((unsigned long) bh->b_data) >>
- (6-priority)) > 0)
- continue;
- if (try_to_free_buffer(bh, &bh, 0))
- return 1;
- if(!bh) break;
- }
- }
- return 0;
-}
-
-
/* ================== Debugging =================== */
void show_buffers(void)
@@ -1658,17 +1477,18 @@ void show_buffers(void)
struct buffer_head * bh;
int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
int protected = 0;
- int shared;
- int nlist, isize;
+ int nlist;
+ static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"};
printk("Buffer memory: %6dkB\n",buffermem>>10);
printk("Buffer heads: %6d\n",nr_buffer_heads);
printk("Buffer blocks: %6d\n",nr_buffers);
for(nlist = 0; nlist < NR_LIST; nlist++) {
- shared = found = locked = dirty = used = lastused = protected = 0;
+ found = locked = dirty = used = lastused = protected = 0;
bh = lru_list[nlist];
if(!bh) continue;
+
do {
found++;
if (buffer_locked(bh))
@@ -1677,260 +1497,42 @@ void show_buffers(void)
protected++;
if (buffer_dirty(bh))
dirty++;
- if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1)
- shared++;
if (bh->b_count)
used++, lastused = found;
bh = bh->b_next_free;
} while (bh != lru_list[nlist]);
- printk("Buffer[%d] mem: %d buffers, %d used (last=%d), "
- "%d locked, %d protected, %d dirty %d shrd\n",
- nlist, found, used, lastused,
- locked, protected, dirty, shared);
- };
- printk("Size [LAV] Free Clean Unshar Lck Lck1 Dirty Shared \n");
- for(isize = 0; isize<NR_SIZES; isize++){
- printk("%5d [%5d]: %7d ", bufferindex_size[isize],
- buffers_lav[isize], nr_free[isize]);
- for(nlist = 0; nlist < NR_LIST; nlist++)
- printk("%7d ", nr_buffers_st[isize][nlist]);
- printk("\n");
- }
-}
-
-
-/* ====================== Cluster patches for ext2 ==================== */
-
-/*
- * try_to_reassign() checks if all the buffers on this particular page
- * are unused, and reassign to a new cluster them if this is true.
- */
-static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
- kdev_t dev, unsigned int starting_block)
-{
- unsigned long page;
- struct buffer_head * tmp, * p;
-
- *bhp = bh;
- page = (unsigned long) bh->b_data;
- page &= PAGE_MASK;
- if(mem_map[MAP_NR(page)].count != 1) return 0;
- tmp = bh;
- do {
- if (!tmp)
- return 0;
-
- if (tmp->b_count || buffer_protected(tmp) ||
- buffer_dirty(tmp) || buffer_locked(tmp))
- return 0;
- tmp = tmp->b_this_page;
- } while (tmp != bh);
- tmp = bh;
-
- while((unsigned long) tmp->b_data & (PAGE_SIZE - 1))
- tmp = tmp->b_this_page;
-
- /* This is the buffer at the head of the page */
- bh = tmp;
- do {
- p = tmp;
- tmp = tmp->b_this_page;
- remove_from_queues(p);
- p->b_dev = dev;
- mark_buffer_uptodate(p, 0);
- clear_bit(BH_Req, &p->b_state);
- p->b_blocknr = starting_block++;
- insert_into_queues(p);
- } while (tmp != bh);
- return 1;
-}
-
-/*
- * Try to find a free cluster by locating a page where
- * all of the buffers are unused. We would like this function
- * to be atomic, so we do not call anything that might cause
- * the process to sleep. The priority is somewhat similar to
- * the priority used in shrink_buffers.
- *
- * My thinking is that the kernel should end up using whole
- * pages for the buffer cache as much of the time as possible.
- * This way the other buffers on a particular page are likely
- * to be very near each other on the free list, and we will not
- * be expiring data prematurely. For now we only cannibalize buffers
- * of the same size to keep the code simpler.
- */
-static int reassign_cluster(kdev_t dev,
- unsigned int starting_block, int size)
-{
- struct buffer_head *bh;
- int isize = BUFSIZE_INDEX(size);
- int i;
-
- /* We want to give ourselves a really good shot at generating
- a cluster, and since we only take buffers from the free
- list, we "overfill" it a little. */
-
- while(nr_free[isize] < 32) refill_freelist(size);
-
- bh = free_list[isize];
- if(bh)
- for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
- if (!bh->b_this_page) continue;
- if (try_to_reassign(bh, &bh, dev, starting_block))
- return 4;
- }
- return 0;
-}
-
-/* This function tries to generate a new cluster of buffers
- * from a new page in memory. We should only do this if we have
- * not expanded the buffer cache to the maximum size that we allow.
- */
-static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
-{
- struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
- int isize = BUFSIZE_INDEX(size);
- unsigned long offset;
- unsigned long page;
- int nblock;
-
- page = get_free_page(GFP_NOBUFFER);
- if(!page) return 0;
-
- bh = create_buffers(page, size);
- if (!bh) {
- free_page(page);
- return 0;
- };
- nblock = block;
- for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
- if (find_buffer(dev, nblock++, size))
- goto not_aligned;
- }
- tmp = bh;
- nblock = 0;
- while (1) {
- arr[nblock++] = bh;
- bh->b_count = 1;
- bh->b_flushtime = 0;
- bh->b_state = 0;
- bh->b_dev = dev;
- bh->b_list = BUF_CLEAN;
- bh->b_blocknr = block++;
- nr_buffers++;
- nr_buffers_size[isize]++;
- insert_into_queues(bh);
- if (bh->b_this_page)
- bh = bh->b_this_page;
- else
- break;
- }
- buffermem += PAGE_SIZE;
- mem_map[MAP_NR(page)].buffers = bh;
- bh->b_this_page = tmp;
- while (nblock-- > 0)
- brelse(arr[nblock]);
- return 4; /* ?? */
-not_aligned:
- while ((tmp = bh) != NULL) {
- bh = bh->b_this_page;
- put_unused_buffer_head(tmp);
- }
- free_page(page);
- return 0;
-}
-
-unsigned long generate_cluster(kdev_t dev, int b[], int size)
-{
- int i, offset;
-
- for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
- if(i && b[i]-1 != b[i-1]) return 0; /* No need to cluster */
- if(find_buffer(dev, b[i], size)) return 0;
+ printk("%8s: %d buffers, %d used (last=%d), "
+ "%d locked, %d protected, %d dirty\n",
+ buf_types[nlist], found, used, lastused,
+ locked, protected, dirty);
};
-
- /* OK, we have a candidate for a new cluster */
-
- /* See if one size of buffer is over-represented in the buffer cache,
- if so reduce the numbers of buffers */
- if(maybe_shrink_lav_buffers(size))
- {
- int retval;
- retval = try_to_generate_cluster(dev, b[0], size);
- if(retval) return retval;
- };
-
- if (nr_free_pages > min_free_pages*2)
- return try_to_generate_cluster(dev, b[0], size);
- else
- return reassign_cluster(dev, b[0], size);
}
-unsigned long generate_cluster_swab32(kdev_t dev, int b[], int size)
-{
- int i, offset;
-
- for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
- if(i && le32_to_cpu(b[i])-1 !=
- le32_to_cpu(b[i-1])) return 0; /* No need to cluster */
- if(find_buffer(dev, le32_to_cpu(b[i]), size)) return 0;
- };
-
- /* OK, we have a candidate for a new cluster */
-
- /* See if one size of buffer is over-represented in the buffer cache,
- if so reduce the numbers of buffers */
- if(maybe_shrink_lav_buffers(size))
- {
- int retval;
- retval = try_to_generate_cluster(dev, le32_to_cpu(b[0]), size);
- if(retval) return retval;
- };
-
- if (nr_free_pages > min_free_pages*2)
- return try_to_generate_cluster(dev, le32_to_cpu(b[0]), size);
- else
- return reassign_cluster(dev, le32_to_cpu(b[0]), size);
-}
/* ===================== Init ======================= */
/*
- * This initializes the initial buffer free list. nr_buffers_type is set
- * to one less the actual number of buffers, as a sop to backwards
- * compatibility --- the old code did this (I think unintentionally,
- * but I'm not sure), and programs in the ps package expect it.
- * - TYT 8/30/92
+ * allocate the hash table and init the free list
+ * Use gfp() for the hash table to decrease TLB misses, use
+ * SLAB cache for buffer heads.
*/
void buffer_init(void)
{
- int i;
- int isize = BUFSIZE_INDEX(BLOCK_SIZE);
- long memsize = max_mapnr << PAGE_SHIFT;
-
- if (memsize >= 64*1024*1024)
- nr_hash = 65521;
- else if (memsize >= 32*1024*1024)
- nr_hash = 32749;
- else if (memsize >= 16*1024*1024)
- nr_hash = 16381;
- else if (memsize >= 8*1024*1024)
- nr_hash = 8191;
- else if (memsize >= 4*1024*1024)
- nr_hash = 4093;
- else nr_hash = 997;
-
- hash_table = (struct buffer_head **) vmalloc(nr_hash *
- sizeof(struct buffer_head *));
-
+ hash_table = (struct buffer_head **)
+ __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER, 0);
+ if (!hash_table)
+ panic("Failed to allocate buffer hash table\n");
+ memset(hash_table,0,NR_HASH*sizeof(struct buffer_head *));
+
+ bh_cachep = kmem_cache_create("buffer_head",
+ sizeof(struct buffer_head),
+ sizeof(unsigned long) * 4,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if(!bh_cachep)
+ panic("Cannot create buffer head SLAB cache\n");
- for (i = 0 ; i < nr_hash ; i++)
- hash_table[i] = NULL;
lru_list[BUF_CLEAN] = 0;
grow_buffers(GFP_KERNEL, BLOCK_SIZE);
- if (!free_list[isize])
- panic("VFS: Unable to initialize buffer free list!");
- return;
}
@@ -1966,7 +1568,7 @@ static void wakeup_bdflush(int wait)
asmlinkage int sync_old_buffers(void)
{
- int i, isize;
+ int i;
int ndirty, nwritten;
int nlist;
int ncount;
@@ -1985,6 +1587,7 @@ asmlinkage int sync_old_buffers(void)
ndirty = 0;
nwritten = 0;
repeat:
+
bh = lru_list[nlist];
if(bh)
for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
@@ -2022,13 +1625,6 @@ asmlinkage int sync_old_buffers(void)
printk("Wrote %d/%d buffers\n", nwritten, ndirty);
#endif
- /* We assume that we only come through here on a regular
- schedule, like every 5 seconds. Now update load averages.
- Shift usage counts to prevent overflow. */
- for(isize = 0; isize<NR_SIZES; isize++){
- CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
- buffer_usage[isize] = 0;
- }
return 0;
}
@@ -2040,37 +1636,42 @@ asmlinkage int sync_old_buffers(void)
asmlinkage int sys_bdflush(int func, long data)
{
- int i, error;
+ int i, error = -EPERM;
+ lock_kernel();
if (!suser())
- return -EPERM;
+ goto out;
- if (func == 1)
- return sync_old_buffers();
+ if (func == 1) {
+ error = sync_old_buffers();
+ goto out;
+ }
/* Basically func 1 means read param 1, 2 means write param 1, etc */
if (func >= 2) {
i = (func-2) >> 1;
+ error = -EINVAL;
if (i < 0 || i >= N_PARAM)
- return -EINVAL;
+ goto out;
if((func & 1) == 0) {
- error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
- if (error)
- return error;
- put_user(bdf_prm.data[i], (int*)data);
- return 0;
- };
+ error = put_user(bdf_prm.data[i], (int*)data);
+ goto out;
+ }
if (data < bdflush_min[i] || data > bdflush_max[i])
- return -EINVAL;
+ goto out;
bdf_prm.data[i] = data;
- return 0;
+ error = 0;
+ goto out;
};
/* Having func 0 used to launch the actual bdflush and then never
- return (unless explicitly killed). We return zero here to
- remain semi-compatible with present update(8) programs. */
-
- return 0;
+ * return (unless explicitly killed). We return zero here to
+ * remain semi-compatible with present update(8) programs.
+ */
+ error = 0;
+out:
+ unlock_kernel();
+ return error;
}
/* This is the actual bdflush daemon itself. It used to be started from
@@ -2111,11 +1712,7 @@ int bdflush(void * unused)
* and other internals and thus be subject to the SMP locking
* rules. (On a uniprocessor box this does nothing).
*/
-
-#ifdef __SMP__
lock_kernel();
- syscall_count++;
-#endif
for (;;) {
#ifdef DEBUG
@@ -2132,6 +1729,7 @@ int bdflush(void * unused)
ndirty = 0;
refilled = 0;
repeat:
+
bh = lru_list[nlist];
if(bh)
for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty;
@@ -2192,29 +1790,9 @@ int bdflush(void * unused)
/* If there are still a lot of dirty buffers around, skip the sleep
and flush some more */
-
- if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) *
- bdf_prm.b_un.nfract/100) {
+ if(nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
current->signal = 0;
interruptible_sleep_on(&bdflush_wait);
}
}
}
-
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-indent-level: 8
- * c-brace-imaginary-offset: 0
- * c-brace-offset: -8
- * c-argdecl-indent: 8
- * c-label-offset: -8
- * c-continued-statement-offset: 8
- * c-continued-brace-offset: 0
- * End:
- */