summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-12-06 23:51:34 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-12-06 23:51:34 +0000
commit230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree5dd821c8d33f450470588e7a543f74bf74306e9e /mm
parentc9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)
Merge with Linux 2.1.67.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c127
-rw-r--r--mm/mmap.c73
-rw-r--r--mm/swap_state.c49
-rw-r--r--mm/swapfile.c114
-rw-r--r--mm/vmscan.c52
5 files changed, 256 insertions, 159 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c02a31696..b597bdde7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -24,6 +24,7 @@
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
+#include <linux/blkdev.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -445,12 +446,20 @@ static void profile_readahead(int async, struct file *filp)
#define MIN_READAHEAD PageAlignSize(4096*3)
#endif
+static inline int get_max_readahead(struct inode * inode)
+{
+ if (!inode->i_dev || !max_readahead[MAJOR(inode->i_dev)])
+ return MAX_READAHEAD;
+ return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)];
+}
+
static inline unsigned long generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode,
unsigned long ppos, struct page * page,
unsigned long page_cache)
{
unsigned long max_ahead, ahead;
unsigned long raend;
+ int max_readahead = get_max_readahead(inode);
raend = filp->f_raend & PAGE_MASK;
max_ahead = 0;
@@ -534,8 +543,8 @@ static inline unsigned long generic_file_readahead(int reada_ok, struct file * f
filp->f_ramax += filp->f_ramax;
- if (filp->f_ramax > MAX_READAHEAD)
- filp->f_ramax = MAX_READAHEAD;
+ if (filp->f_ramax > max_readahead)
+ filp->f_ramax = max_readahead;
#ifdef PROFILE_READAHEAD
profile_readahead((reada_ok == 2), filp);
@@ -555,12 +564,14 @@ static inline unsigned long generic_file_readahead(int reada_ok, struct file * f
* of the logic when it comes to error handling etc.
*/
-long generic_file_read(struct inode * inode, struct file * filp,
- char * buf, unsigned long count)
+ssize_t generic_file_read(struct file * filp, char * buf,
+ size_t count, loff_t *ppos)
{
- int error, read;
- unsigned long pos, ppos, page_cache;
+ struct inode *inode = filp->f_dentry->d_inode;
+ ssize_t error, read;
+ size_t pos, pgpos, page_cache;
int reada_ok;
+ int max_readahead = get_max_readahead(inode);
if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
@@ -570,8 +581,8 @@ long generic_file_read(struct inode * inode, struct file * filp,
read = 0;
page_cache = 0;
- pos = filp->f_pos;
- ppos = pos & PAGE_MASK;
+ pos = *ppos;
+ pgpos = pos & PAGE_MASK;
/*
* If the current position is outside the previous read-ahead window,
* we reset the current read-ahead context and set read ahead max to zero
@@ -579,7 +590,7 @@ long generic_file_read(struct inode * inode, struct file * filp,
* otherwise, we assume that the file accesses are sequential enough to
* continue read-ahead.
*/
- if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) {
+ if (pgpos > filp->f_raend || pgpos + filp->f_rawin < filp->f_raend) {
reada_ok = 0;
filp->f_raend = 0;
filp->f_ralen = 0;
@@ -600,15 +611,15 @@ long generic_file_read(struct inode * inode, struct file * filp,
} else {
unsigned long needed;
- needed = ((pos + count) & PAGE_MASK) - ppos;
+ needed = ((pos + count) & PAGE_MASK) - pgpos;
if (filp->f_ramax < needed)
filp->f_ramax = needed;
if (reada_ok && filp->f_ramax < MIN_READAHEAD)
filp->f_ramax = MIN_READAHEAD;
- if (filp->f_ramax > MAX_READAHEAD)
- filp->f_ramax = MAX_READAHEAD;
+ if (filp->f_ramax > max_readahead)
+ filp->f_ramax = max_readahead;
}
for (;;) {
@@ -736,7 +747,7 @@ page_read_error:
break;
}
- filp->f_pos = pos;
+ *ppos = pos;
filp->f_reada = 1;
if (page_cache)
free_page(page_cache);
@@ -911,7 +922,8 @@ static inline int do_write_page(struct inode * inode, struct file * file,
old_fs = get_fs();
set_fs(KERNEL_DS);
retval = -EIO;
- if (size == file->f_op->write(inode, file, (const char *) page, size))
+ if (size == file->f_op->write(file, (const char *) page,
+ size, &file->f_pos))
retval = 0;
set_fs(old_fs);
return retval;
@@ -1210,9 +1222,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
static int msync_interval(struct vm_area_struct * vma,
unsigned long start, unsigned long end, int flags)
{
- if (!vma->vm_dentry)
- return 0;
- if (vma->vm_ops->sync) {
+ if (vma->vm_dentry && vma->vm_ops && vma->vm_ops->sync) {
int error;
error = vma->vm_ops->sync(vma, start, end-start, flags);
if (!error && (flags & MS_SYNC)) {
@@ -1301,21 +1311,23 @@ out:
* file system has to do this all by itself, unfortunately.
* okir@monad.swb.de
*/
-long
-generic_file_write(struct inode *inode, struct file *file, const char *buf, unsigned long count)
+ssize_t
+generic_file_write(struct file *file, const char *buf,
+ size_t count, loff_t *ppos)
{
+ struct inode *inode = file->f_dentry->d_inode;
struct page *page, **hash;
unsigned long page_cache = 0;
- unsigned long ppos, offset;
- unsigned int bytes, written;
+ unsigned long pgpos, offset;
+ unsigned long bytes, written;
unsigned long pos;
- int status, sync, didread;
+ long status, sync, didread;
if (!inode->i_op || !inode->i_op->updatepage)
return -EIO;
sync = file->f_flags & O_SYNC;
- pos = file->f_pos;
+ pos = *ppos;
written = 0;
status = 0;
@@ -1328,13 +1340,13 @@ generic_file_write(struct inode *inode, struct file *file, const char *buf, unsi
* allocate a free page.
*/
offset = (pos & ~PAGE_MASK);
- ppos = pos & PAGE_MASK;
+ pgpos = pos & PAGE_MASK;
if ((bytes = PAGE_SIZE - offset) > count)
bytes = count;
- hash = page_hash(inode, ppos);
- if (!(page = __find_page(inode, ppos, *hash))) {
+ hash = page_hash(inode, pgpos);
+ if (!(page = __find_page(inode, pgpos, *hash))) {
if (!page_cache) {
page_cache = __get_free_page(GFP_KERNEL);
if (!page_cache) {
@@ -1344,7 +1356,7 @@ generic_file_write(struct inode *inode, struct file *file, const char *buf, unsi
continue;
}
page = mem_map + MAP_NR(page_cache);
- add_to_page_cache(page, inode, ppos, hash);
+ add_to_page_cache(page, inode, pgpos, hash);
page_cache = 0;
}
@@ -1364,7 +1376,7 @@ page_wait:
* after the current end of file.
*/
if (!PageUptodate(page)) {
- if (bytes < PAGE_SIZE && ppos < inode->i_size) {
+ if (bytes < PAGE_SIZE && pgpos < inode->i_size) {
if (didread < 2)
status = inode->i_op->readpage(inode, page);
else
@@ -1390,7 +1402,7 @@ done_with_page:
pos += status;
buf += status;
}
- file->f_pos = pos;
+ *ppos = pos;
if (pos > inode->i_size)
inode->i_size = pos;
@@ -1400,3 +1412,60 @@ done_with_page:
return written;
return status;
}
+
+/*
+ * Support routines for directory cacheing using the page cache.
+ */
+
+/*
+ * Finds the page at the specified offset, installing a new page
+ * if requested. The count is incremented and the page is locked.
+ *
+ * Note: we don't have to worry about races here, as the caller
+ * is holding the inode semaphore.
+ */
+unsigned long get_cached_page(struct inode * inode, unsigned long offset,
+ int new)
+{
+ struct page * page;
+ struct page ** hash;
+ unsigned long page_cache;
+
+ hash = page_hash(inode, offset);
+ page = __find_page(inode, offset, *hash);
+ if (!page) {
+ if (!new)
+ goto out;
+ page_cache = get_free_page(GFP_KERNEL);
+ if (!page_cache)
+ goto out;
+ page = mem_map + MAP_NR(page_cache);
+ add_to_page_cache(page, inode, offset, hash);
+ }
+ if (atomic_read(&page->count) != 2)
+ printk("get_cached_page: page count=%d\n",
+ atomic_read(&page->count));
+ if (test_bit(PG_locked, &page->flags))
+ printk("get_cached_page: page already locked!\n");
+ set_bit(PG_locked, &page->flags);
+
+out:
+ return page_address(page);
+}
+
+/*
+ * Unlock and free a page.
+ */
+void put_cached_page(unsigned long addr)
+{
+ struct page * page = mem_map + MAP_NR(addr);
+
+ if (!test_bit(PG_locked, &page->flags))
+ printk("put_cached_page: page not locked!\n");
+ if (atomic_read(&page->count) != 2)
+ printk("put_cached_page: page count=%d\n",
+ atomic_read(&page->count));
+ clear_bit(PG_locked, &page->flags);
+ wake_up(&page->wait);
+ __free_page(page);
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index 10481ef95..501b31913 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -92,25 +92,21 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
struct mm_struct *mm = current->mm;
lock_kernel();
- retval = mm->brk;
if (brk < mm->end_code)
goto out;
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
- if (oldbrk == newbrk) {
- retval = mm->brk = brk;
- goto out;
- }
+ if (oldbrk == newbrk)
+ goto set_brk;
/* Always allow shrinking brk. */
if (brk <= mm->brk) {
- retval = mm->brk = brk;
- do_munmap(newbrk, oldbrk-newbrk);
+ if (!do_munmap(newbrk, oldbrk-newbrk))
+ goto set_brk;
goto out;
}
/* Check against rlimit and stack.. */
- retval = mm->brk;
rlim = current->rlim[RLIMIT_DATA].rlim_cur;
if (rlim >= RLIM_INFINITY)
rlim = ~0;
@@ -126,12 +122,14 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
goto out;
/* Ok, looks good - let it rip. */
- if(do_mmap(NULL, oldbrk, newbrk-oldbrk,
+ if (do_mmap(NULL, oldbrk, newbrk-oldbrk,
PROT_READ|PROT_WRITE|PROT_EXEC,
- MAP_FIXED|MAP_PRIVATE, 0) == oldbrk)
- mm->brk = brk;
- retval = mm->brk;
+ MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
+ goto out;
+set_brk:
+ mm->brk = brk;
out:
+ retval = mm->brk;
unlock_kernel();
return retval;
}
@@ -163,7 +161,7 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
{
struct mm_struct * mm = current->mm;
struct vm_area_struct * vma;
- int correct_wcount = 0;
+ int correct_wcount = 0, error;
if ((len = PAGE_ALIGN(len)) == 0)
return addr;
@@ -262,26 +260,24 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
vma->vm_dentry = NULL;
vma->vm_pte = 0;
- do_munmap(addr, len); /* Clear old maps */
+ /* Clear old maps */
+ error = -ENOMEM;
+ if (do_munmap(addr, len))
+ goto free_vma;
/* Check against address space limit. */
if ((mm->total_vm << PAGE_SHIFT) + len
- > current->rlim[RLIMIT_AS].rlim_cur) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
+ > current->rlim[RLIMIT_AS].rlim_cur)
+ goto free_vma;
/* Private writable mapping? Check memory availability.. */
- if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) {
- if (!(flags & MAP_NORESERVE) &&
- !vm_enough_memory(len >> PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
- }
+ if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
+ !(flags & MAP_NORESERVE) &&
+ !vm_enough_memory(len >> PAGE_SHIFT))
+ goto free_vma;
+ error = 0;
if (file) {
- int error = 0;
if (vma->vm_flags & VM_DENYWRITE) {
if (file->f_dentry->d_inode->i_writecount > 0)
error = -ETXTBSY;
@@ -298,23 +294,22 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
if (!error)
error = file->f_op->mmap(file, vma);
- if (error) {
- if (correct_wcount)
- file->f_dentry->d_inode->i_writecount++;
- kmem_cache_free(vm_area_cachep, vma);
- return error;
- }
}
+ /* Fix up the count if necessary, then check for an error */
+ if (correct_wcount)
+ file->f_dentry->d_inode->i_writecount++;
+ if (error)
+ goto free_vma;
+ /*
+ * merge_segments may merge our vma, so we can't refer to it
+ * after the call. Save the values we need now ...
+ */
flags = vma->vm_flags;
+ addr = vma->vm_start; /* can addr have changed?? */
insert_vm_struct(mm, vma);
- if (correct_wcount)
- file->f_dentry->d_inode->i_writecount++;
merge_segments(mm, vma->vm_start, vma->vm_end);
- addr = vma->vm_start;
-
- /* merge_segments might have merged our vma, so we can't use it any more */
mm->total_vm += len >> PAGE_SHIFT;
if ((flags & VM_LOCKED) && !(flags & VM_IO)) {
unsigned long start = addr;
@@ -328,6 +323,10 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
} while (len > 0);
}
return addr;
+
+free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+ return error;
}
/* Get an address range which is currently unmapped.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e0cfe1fef..75f284124 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -18,9 +18,6 @@
#include <linux/swapctl.h>
#include <linux/init.h>
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for cop_to/from_user */
#include <asm/bitops.h>
#include <asm/pgtable.h>
@@ -60,31 +57,47 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
return 0;
}
+/*
+ * If swap_map[] reaches 127, the entries are treated as "permanent".
+ */
void swap_duplicate(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
if (!entry)
- return;
- offset = SWP_OFFSET(entry);
+ goto out;
type = SWP_TYPE(entry);
if (type & SHM_SWP_TYPE)
- return;
- if (type >= nr_swapfiles) {
- printk("Trying to duplicate nonexistent swap-page\n");
- return;
- }
+ goto out;
+ if (type >= nr_swapfiles)
+ goto bad_file;
p = type + swap_info;
- if (offset >= p->max) {
- printk("swap_duplicate: weirdness\n");
- return;
- }
- if (!p->swap_map[offset]) {
- printk("swap_duplicate: trying to duplicate unused page\n");
- return;
+ offset = SWP_OFFSET(entry);
+ if (offset >= p->max)
+ goto bad_offset;
+ if (!p->swap_map[offset])
+ goto bad_unused;
+ if (p->swap_map[offset] < 126)
+ p->swap_map[offset]++;
+ else {
+ static int overflow = 0;
+ if (overflow++ < 5)
+ printk("swap_duplicate: entry %08lx map count=%d\n",
+ entry, p->swap_map[offset]);
+ p->swap_map[offset] = 127;
}
- p->swap_map[offset]++;
+out:
return;
+
+bad_file:
+ printk("swap_duplicate: Trying to duplicate nonexistent swap-page\n");
+ goto out;
+bad_offset:
+ printk("swap_duplicate: offset exceeds max\n");
+ goto out;
+bad_unused:
+ printk("swap_duplicate: unused page\n");
+ goto out;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b76b34237..13d2436ba 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -21,11 +21,7 @@
#include <linux/malloc.h>
#include <linux/blkdev.h> /* for blk_size */
#include <linux/vmalloc.h>
-#include <linux/dcache.h>
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for copy_to/from_user */
#include <asm/bitops.h>
#include <asm/pgtable.h>
@@ -122,52 +118,60 @@ unsigned long get_swap_page(void)
}
}
+/*
+ * If the swap count overflows (swap_map[] == 127), the entry is considered
+ * "permanent" and can't be reclaimed until the swap device is closed.
+ */
void swap_free(unsigned long entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
if (!entry)
- return;
+ goto out;
type = SWP_TYPE(entry);
if (type & SHM_SWP_TYPE)
- return;
- if (type >= nr_swapfiles) {
- printk("Trying to free nonexistent swap-page\n");
- return;
- }
+ goto out;
+ if (type >= nr_swapfiles)
+ goto bad_nofile;
p = & swap_info[type];
+ if (!(p->flags & SWP_USED))
+ goto bad_device;
+ if (p->prio > swap_info[swap_list.next].prio)
+ swap_list.next = swap_list.head;
offset = SWP_OFFSET(entry);
- if (offset >= p->max) {
- printk("swap_free: weirdness\n");
- return;
- }
- if (!(p->flags & SWP_USED)) {
- printk("Trying to free swap from unused swap-device\n");
- return;
- }
+ if (offset >= p->max)
+ goto bad_offset;
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
p->highest_bit = offset;
if (!p->swap_map[offset])
- printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
- else
+ goto bad_free;
+ if (p->swap_map[offset] < 127) {
if (!--p->swap_map[offset])
nr_swap_pages++;
- if (p->prio > swap_info[swap_list.next].prio) {
- swap_list.next = swap_list.head;
}
+out:
+ return;
+
+bad_nofile:
+ printk("swap_free: Trying to free nonexistent swap-page\n");
+ goto out;
+bad_device:
+ printk("swap_free: Trying to free swap from unused swap-device\n");
+ goto out;
+bad_offset:
+ printk("swap_free: offset exceeds max\n");
+ goto out;
+bad_free:
+ printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
+ goto out;
}
/*
- * Trying to stop swapping from a file is fraught with races, so
- * we repeat quite a bit here when we have to pause. swapoff()
- * isn't exactly timing-critical, so who cares (but this is /really/
- * inefficient, ugh).
- *
- * We return 1 after having slept, which makes the process start over
- * from the beginning for this process..
+ * The swap entry has been read in advance, and we return 1 to indicate
+ * that the page has been used or is no longer needed.
*/
static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, unsigned long entry, unsigned long page)
@@ -198,9 +202,8 @@ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
if (pte_val(pte) != entry)
return 0;
set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
- flush_tlb_page(vma, address);
++vma->vm_mm->rss;
- swap_free(pte_val(pte));
+ swap_free(entry);
return 1;
}
@@ -296,18 +299,6 @@ static int unuse_process(struct mm_struct * mm, unsigned long entry,
return 0;
}
-static unsigned long find_swap_entry(int type)
-{
- struct swap_info_struct * p = &swap_info[type];
- int i;
-
- for (i = 1 ; i < p->max ; i++) {
- if (p->swap_map[i] > 0 && p->swap_map[i] != 0x80)
- return SWP_ENTRY(type, i);
- }
- return 0;
-}
-
/*
* We completely avoid races by reading each swap page in advance,
* and then search for the process using it. All the necessary
@@ -315,14 +306,13 @@ static unsigned long find_swap_entry(int type)
*/
static int try_to_unuse(unsigned int type)
{
- unsigned long page = 0;
+ struct swap_info_struct * si = &swap_info[type];
struct task_struct *p;
+ unsigned long page = 0;
unsigned long entry;
+ int i;
- /*
- * Find all swap entries in use ...
- */
- while ((entry = find_swap_entry(type)) != 0) {
+ while (1) {
if (!page) {
page = __get_free_page(GFP_KERNEL);
if (!page)
@@ -330,8 +320,16 @@ static int try_to_unuse(unsigned int type)
}
/*
- * Read in the page, and then free the swap page.
- */
+ * Find a swap page in use and read it in.
+ */
+ for (i = 1 , entry = 0; i < si->max ; i++) {
+ if (si->swap_map[i] > 0 && si->swap_map[i] != 0x80) {
+ entry = SWP_ENTRY(type, i);
+ break;
+ }
+ }
+ if (!entry)
+ break;
read_swap_page(entry, (char *) page);
read_lock(&tasklist_lock);
@@ -344,9 +342,19 @@ static int try_to_unuse(unsigned int type)
unlock:
read_unlock(&tasklist_lock);
if (page) {
- printk("try_to_unuse: didn't find entry %8lx\n",
- entry);
- swap_free(entry);
+ /*
+ * If we couldn't find an entry, there are several
+ * possible reasons: someone else freed it first,
+ * we freed the last reference to an overflowed entry,
+ * or the system has lost track of the use counts.
+ */
+ if (si->swap_map[i] != 0) {
+ if (si->swap_map[i] != 127)
+ printk("try_to_unuse: entry %08lx "
+ "not in use\n", entry);
+ si->swap_map[i] = 0;
+ nr_swap_pages++;
+ }
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f1ab1fae..53c4e58bf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -17,16 +17,12 @@
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/string.h>
-#include <linux/stat.h>
#include <linux/swap.h>
-#include <linux/fs.h>
#include <linux/swapctl.h>
#include <linux/smp_lock.h>
#include <linux/slab.h>
+#include <linux/dcache.h>
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for copy_to/from_user */
#include <asm/bitops.h>
#include <asm/pgtable.h>
@@ -355,6 +351,11 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
int i=6;
int stop;
+ /* Let the dcache know we're looking for memory ... */
+ shrink_dcache_memory();
+ /* Always trim SLAB caches when memory gets low. */
+ (void) kmem_cache_reap(0, dma, wait);
+
/* we don't try as hard if we're not waiting.. */
stop = 3;
if (wait)
@@ -366,19 +367,9 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
return 1;
state = 1;
case 1:
- /*
- * We shouldn't have a priority here:
- * If we're low on memory we should
- * unconditionally throw away _all_
- * kmalloc caches!
- */
- if (kmem_cache_reap(0, dma, wait))
- return 1;
- state = 2;
- case 2:
if (shm_swap(i, dma))
return 1;
- state = 3;
+ state = 2;
default:
if (swap_out(i, dma, wait))
return 1;
@@ -425,6 +416,7 @@ void kswapd_setup(void)
printk ("Starting kswapd v%.*s\n", i, s);
}
+#define MAX_SWAP_FAIL 3
/*
* The background pageout daemon.
* Started as a kernel thread from the init process.
@@ -452,6 +444,8 @@ int kswapd(void *unused)
init_swap_timer();
while (1) {
+ int fail;
+
kswapd_awake = 0;
current->signal = 0;
run_task_queue(&tq_disk);
@@ -462,13 +456,27 @@ int kswapd(void *unused)
* We now only swap out as many pages as needed.
* When we are truly low on memory, we swap out
* synchronously (WAIT == 1). -- Rik.
+ * If we've had too many consecutive failures,
+ * go back to sleep to let other tasks run.
+ */
+ for (fail = 0; fail++ < MAX_SWAP_FAIL;) {
+ int pages, wait;
+
+ pages = nr_free_pages;
+ if (nr_free_pages >= min_free_pages)
+ pages += atomic_read(&nr_async_pages);
+ if (pages >= free_pages_high)
+ break;
+ wait = (pages < free_pages_low);
+ if (try_to_free_page(GFP_KERNEL, 0, wait))
+ fail = 0;
+ }
+ /*
+ * Report failure if we couldn't reach the minimum goal.
*/
- while(nr_free_pages < min_free_pages)
- try_to_free_page(GFP_KERNEL, 0, 1);
- while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_low)
- try_to_free_page(GFP_KERNEL, 0, 1);
- while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_high)
- try_to_free_page(GFP_KERNEL, 0, 0);
+ if (nr_free_pages < min_free_pages)
+ printk("kswapd: failed, got %d of %d\n",
+ nr_free_pages, min_free_pages);
}
}