Merge with Linux 2.1.67.

author: Ralf Baechle <ralf@linux-mips.org> 1997-12-06 23:51:34 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1997-12-06 23:51:34 +0000
commit: 230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree: 5dd821c8d33f450470588e7a543f74bf74306e9e /mm
parent: c9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)
5 files changed, 256 insertions, 159 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c02a31696..b597bdde7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -24,6 +24,7 @@
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <linux/blkdev.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -445,12 +446,20 @@ static void profile_readahead(int async, struct file *filp)
 #define MIN_READAHEAD PageAlignSize(4096*3)
 #endif
 
+static inline int get_max_readahead(struct inode * inode)
+{
+	if (!inode->i_dev || !max_readahead[MAJOR(inode->i_dev)])
+		return MAX_READAHEAD;
+	return max_readahead[MAJOR(inode->i_dev)][MINOR(inode->i_dev)];
+}
+
 static inline unsigned long generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode,
 	unsigned long ppos, struct page * page,
 	unsigned long page_cache)
 {
 	unsigned long max_ahead, ahead;
 	unsigned long raend;
+	int max_readahead = get_max_readahead(inode);
 
 	raend = filp->f_raend & PAGE_MASK;
 	max_ahead = 0;
@@ -534,8 +543,8 @@ static inline unsigned long generic_file_readahead(int reada_ok, struct file * f
 
 		filp->f_ramax += filp->f_ramax;
 
-		if (filp->f_ramax > MAX_READAHEAD)
-			filp->f_ramax = MAX_READAHEAD;
+		if (filp->f_ramax > max_readahead)
+			filp->f_ramax = max_readahead;
 
 #ifdef PROFILE_READAHEAD
 		profile_readahead((reada_ok == 2), filp);
@@ -555,12 +564,14 @@ static inline unsigned long generic_file_readahead(int reada_ok, struct file * f
  * of the logic when it comes to error handling etc.
  */
 
-long generic_file_read(struct inode * inode, struct file * filp,
-	char * buf, unsigned long count)
+ssize_t generic_file_read(struct file * filp, char * buf,
+			  size_t count, loff_t *ppos)
 {
-	int error, read;
-	unsigned long pos, ppos, page_cache;
+	struct inode *inode = filp->f_dentry->d_inode;
+	ssize_t error, read;
+	size_t pos, pgpos, page_cache;
 	int reada_ok;
+	int max_readahead = get_max_readahead(inode);
 
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		return -EFAULT;
@@ -570,8 +581,8 @@ long generic_file_read(struct inode * inode, struct file * filp,
 	read = 0;
 	page_cache = 0;
 
-	pos = filp->f_pos;
-	ppos = pos & PAGE_MASK;
+	pos = *ppos;
+	pgpos = pos & PAGE_MASK;
 /*
  * If the current position is outside the previous read-ahead window, 
  * we reset the current read-ahead context and set read ahead max to zero
@@ -579,7 +590,7 @@ long generic_file_read(struct inode * inode, struct file * filp,
  * otherwise, we assume that the file accesses are sequential enough to
  * continue read-ahead.
  */
-	if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) {
+	if (pgpos > filp->f_raend || pgpos + filp->f_rawin < filp->f_raend) {
 		reada_ok = 0;
 		filp->f_raend = 0;
 		filp->f_ralen = 0;
@@ -600,15 +611,15 @@ long generic_file_read(struct inode * inode, struct file * filp,
 	} else {
 		unsigned long needed;
 
-		needed = ((pos + count) & PAGE_MASK) - ppos;
+		needed = ((pos + count) & PAGE_MASK) - pgpos;
 
 		if (filp->f_ramax < needed)
 			filp->f_ramax = needed;
 
 		if (reada_ok && filp->f_ramax < MIN_READAHEAD)
 				filp->f_ramax = MIN_READAHEAD;
-		if (filp->f_ramax > MAX_READAHEAD)
-			filp->f_ramax = MAX_READAHEAD;
+		if (filp->f_ramax > max_readahead)
+			filp->f_ramax = max_readahead;
 	}
 
 	for (;;) {
@@ -736,7 +747,7 @@ page_read_error:
 		break;
 	}
 
-	filp->f_pos = pos;
+	*ppos = pos;
 	filp->f_reada = 1;
 	if (page_cache)
 		free_page(page_cache);
@@ -911,7 +922,8 @@ static inline int do_write_page(struct inode * inode, struct file * file,
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
 	retval = -EIO;
-	if (size == file->f_op->write(inode, file, (const char *) page, size))
+	if (size == file->f_op->write(file, (const char *) page,
+				      size, &file->f_pos))
 		retval = 0;
 	set_fs(old_fs);
 	return retval;
@@ -1210,9 +1222,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
 static int msync_interval(struct vm_area_struct * vma,
 	unsigned long start, unsigned long end, int flags)
 {
-	if (!vma->vm_dentry)
-		return 0;
-	if (vma->vm_ops->sync) {
+	if (vma->vm_dentry && vma->vm_ops && vma->vm_ops->sync) {
 		int error;
 		error = vma->vm_ops->sync(vma, start, end-start, flags);
 		if (!error && (flags & MS_SYNC)) {
@@ -1301,21 +1311,23 @@ out:
  * file system has to do this all by itself, unfortunately.
  *							okir@monad.swb.de
  */
-long
-generic_file_write(struct inode *inode, struct file *file, const char *buf, unsigned long count)
+ssize_t
+generic_file_write(struct file *file, const char *buf,
+		   size_t count, loff_t *ppos)
 {
+	struct inode	*inode = file->f_dentry->d_inode; 
 	struct page	*page, **hash;
 	unsigned long	page_cache = 0;
-	unsigned long	ppos, offset;
-	unsigned int	bytes, written;
+	unsigned long	pgpos, offset;
+	unsigned long	bytes, written;
 	unsigned long	pos;
-	int		status, sync, didread;
+	long		status, sync, didread;
 
 	if (!inode->i_op || !inode->i_op->updatepage)
 		return -EIO;
 
 	sync    = file->f_flags & O_SYNC;
-	pos     = file->f_pos;
+	pos     = *ppos;
 	written = 0;
 	status  = 0;
 
@@ -1328,13 +1340,13 @@ generic_file_write(struct inode *inode, struct file *file, const char *buf, unsi
 		 * allocate a free page.
 		 */
 		offset = (pos & ~PAGE_MASK);
-		ppos = pos & PAGE_MASK;
+		pgpos = pos & PAGE_MASK;
 
 		if ((bytes = PAGE_SIZE - offset) > count)
 			bytes = count;
 
-		hash = page_hash(inode, ppos);
-		if (!(page = __find_page(inode, ppos, *hash))) {
+		hash = page_hash(inode, pgpos);
+		if (!(page = __find_page(inode, pgpos, *hash))) {
 			if (!page_cache) {
 				page_cache = __get_free_page(GFP_KERNEL);
 				if (!page_cache) {
@@ -1344,7 +1356,7 @@ generic_file_write(struct inode *inode, struct file *file, const char *buf, unsi
 				continue;
 			}
 			page = mem_map + MAP_NR(page_cache);
-			add_to_page_cache(page, inode, ppos, hash);
+			add_to_page_cache(page, inode, pgpos, hash);
 			page_cache = 0;
 		}
 
@@ -1364,7 +1376,7 @@ page_wait:
 		 * after the current end of file.
 		 */
 		if (!PageUptodate(page)) {
-			if (bytes < PAGE_SIZE && ppos < inode->i_size) {
+			if (bytes < PAGE_SIZE && pgpos < inode->i_size) {
 				if (didread < 2)
 				    status = inode->i_op->readpage(inode, page);
 				else 
@@ -1390,7 +1402,7 @@ done_with_page:
 		pos += status;
 		buf += status;
 	}
-	file->f_pos = pos;
+	*ppos = pos;
 	if (pos > inode->i_size)
 		inode->i_size = pos;
 
@@ -1400,3 +1412,60 @@ done_with_page:
 		return written;
 	return status;
 }
+
+/*
+ * Support routines for directory cacheing using the page cache.
+ */
+
+/*
+ * Finds the page at the specified offset, installing a new page
+ * if requested.  The count is incremented and the page is locked.
+ *
+ * Note: we don't have to worry about races here, as the caller
+ * is holding the inode semaphore.
+ */
+unsigned long get_cached_page(struct inode * inode, unsigned long offset,
+				int new)
+{
+	struct page * page;
+	struct page ** hash;
+	unsigned long page_cache;
+
+	hash = page_hash(inode, offset);
+	page = __find_page(inode, offset, *hash);
+	if (!page) {
+		if (!new)
+			goto out;
+		page_cache = get_free_page(GFP_KERNEL);
+		if (!page_cache)
+			goto out;
+		page = mem_map + MAP_NR(page_cache);
+		add_to_page_cache(page, inode, offset, hash);
+	}
+	if (atomic_read(&page->count) != 2)
+		printk("get_cached_page: page count=%d\n",
+			atomic_read(&page->count));
+	if (test_bit(PG_locked, &page->flags))
+		printk("get_cached_page: page already locked!\n");
+	set_bit(PG_locked, &page->flags);
+
+out:
+	return page_address(page);
+}
+
+/*
+ * Unlock and free a page.
+ */
+void put_cached_page(unsigned long addr)
+{
+	struct page * page = mem_map + MAP_NR(addr);
+
+	if (!test_bit(PG_locked, &page->flags))
+		printk("put_cached_page: page not locked!\n");
+	if (atomic_read(&page->count) != 2)
+		printk("put_cached_page: page count=%d\n", 
+			atomic_read(&page->count));
+	clear_bit(PG_locked, &page->flags);
+	wake_up(&page->wait);
+	__free_page(page);
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index 10481ef95..501b31913 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -92,25 +92,21 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 	struct mm_struct *mm = current->mm;
 
 	lock_kernel();
-	retval = mm->brk;
 	if (brk < mm->end_code)
 		goto out;
 	newbrk = PAGE_ALIGN(brk);
 	oldbrk = PAGE_ALIGN(mm->brk);
-	if (oldbrk == newbrk) {
-		retval = mm->brk = brk;
-		goto out;
-	}
+	if (oldbrk == newbrk)
+		goto set_brk;
 
 	/* Always allow shrinking brk. */
 	if (brk <= mm->brk) {
-		retval = mm->brk = brk;
-		do_munmap(newbrk, oldbrk-newbrk);
+		if (!do_munmap(newbrk, oldbrk-newbrk))
+			goto set_brk;
 		goto out;
 	}
 
 	/* Check against rlimit and stack.. */
-	retval = mm->brk;
 	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
@@ -126,12 +122,14 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if(do_mmap(NULL, oldbrk, newbrk-oldbrk,
+	if (do_mmap(NULL, oldbrk, newbrk-oldbrk,
 		   PROT_READ|PROT_WRITE|PROT_EXEC,
-		   MAP_FIXED|MAP_PRIVATE, 0) == oldbrk)
-		mm->brk = brk;
-	retval = mm->brk;
+		   MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
+		goto out;
+set_brk:
+	mm->brk = brk;
 out:
+	retval = mm->brk;
 	unlock_kernel();
 	return retval;
 }
@@ -163,7 +161,7 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma;
-	int correct_wcount = 0;
+	int correct_wcount = 0, error;
 
 	if ((len = PAGE_ALIGN(len)) == 0)
 		return addr;
@@ -262,26 +260,24 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
 	vma->vm_dentry = NULL;
 	vma->vm_pte = 0;
 
-	do_munmap(addr, len);	/* Clear old maps */
+	/* Clear old maps */
+	error = -ENOMEM;
+	if (do_munmap(addr, len))
+		goto free_vma;
 
 	/* Check against address space limit. */
 	if ((mm->total_vm << PAGE_SHIFT) + len
-	    > current->rlim[RLIMIT_AS].rlim_cur) {
-		kmem_cache_free(vm_area_cachep, vma);
-		return -ENOMEM;
-	}
+	    > current->rlim[RLIMIT_AS].rlim_cur)
+		goto free_vma;
 
 	/* Private writable mapping? Check memory availability.. */
-	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) {
-		if (!(flags & MAP_NORESERVE) &&
-		    !vm_enough_memory(len >> PAGE_SHIFT)) {
-			kmem_cache_free(vm_area_cachep, vma);
-			return -ENOMEM;
-		}
-	}
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
+	    !(flags & MAP_NORESERVE)				 &&
+	    !vm_enough_memory(len >> PAGE_SHIFT))
+		goto free_vma;
 
+	error = 0;
 	if (file) {
-		int error = 0;
 		if (vma->vm_flags & VM_DENYWRITE) {
 			if (file->f_dentry->d_inode->i_writecount > 0)
 				error = -ETXTBSY;
@@ -298,23 +294,22 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
 		if (!error)
 			error = file->f_op->mmap(file, vma);
 	
-		if (error) {
-			if (correct_wcount)
-				file->f_dentry->d_inode->i_writecount++;
-			kmem_cache_free(vm_area_cachep, vma);
-			return error;
-		}
 	}
+	/* Fix up the count if necessary, then check for an error */
+	if (correct_wcount)
+		file->f_dentry->d_inode->i_writecount++;
+	if (error)
+		goto free_vma;
 
+	/*
+	 * merge_segments may merge our vma, so we can't refer to it
+	 * after the call.  Save the values we need now ...
+	 */
 	flags = vma->vm_flags;
+	addr = vma->vm_start; /* can addr have changed?? */
 	insert_vm_struct(mm, vma);
-	if (correct_wcount)
-		file->f_dentry->d_inode->i_writecount++;
 	merge_segments(mm, vma->vm_start, vma->vm_end);
 	
-	addr = vma->vm_start;
-
-	/* merge_segments might have merged our vma, so we can't use it any more */
 	mm->total_vm += len >> PAGE_SHIFT;
 	if ((flags & VM_LOCKED) && !(flags & VM_IO)) {
 		unsigned long start = addr;
@@ -328,6 +323,10 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
 		} while (len > 0);
 	}
 	return addr;
+
+free_vma:
+	kmem_cache_free(vm_area_cachep, vma);
+	return error;
 }
 
 /* Get an address range which is currently unmapped.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e0cfe1fef..75f284124 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -18,9 +18,6 @@
 #include <linux/swapctl.h>
 #include <linux/init.h>
 
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for cop_to/from_user */
 #include <asm/bitops.h>
 #include <asm/pgtable.h>
 
@@ -60,31 +57,47 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
 	return 0;
 }
 
+/*
+ * If swap_map[] reaches 127, the entries are treated as "permanent".
+ */
 void swap_duplicate(unsigned long entry)
 {
 	struct swap_info_struct * p;
 	unsigned long offset, type;
 
 	if (!entry)
-		return;
-	offset = SWP_OFFSET(entry);
+		goto out;
 	type = SWP_TYPE(entry);
 	if (type & SHM_SWP_TYPE)
-		return;
-	if (type >= nr_swapfiles) {
-		printk("Trying to duplicate nonexistent swap-page\n");
-		return;
-	}
+		goto out;
+	if (type >= nr_swapfiles)
+		goto bad_file;
 	p = type + swap_info;
-	if (offset >= p->max) {
-		printk("swap_duplicate: weirdness\n");
-		return;
-	}
-	if (!p->swap_map[offset]) {
-		printk("swap_duplicate: trying to duplicate unused page\n");
-		return;
+	offset = SWP_OFFSET(entry);
+	if (offset >= p->max)
+		goto bad_offset;
+	if (!p->swap_map[offset])
+		goto bad_unused;
+	if (p->swap_map[offset] < 126)
+		p->swap_map[offset]++;
+	else {
+		static int overflow = 0;
+		if (overflow++ < 5)
+			printk("swap_duplicate: entry %08lx map count=%d\n",
+				entry, p->swap_map[offset]);
+		p->swap_map[offset] = 127;
 	}
-	p->swap_map[offset]++;
+out:
 	return;
+
+bad_file:
+	printk("swap_duplicate: Trying to duplicate nonexistent swap-page\n");
+	goto out;
+bad_offset:
+	printk("swap_duplicate: offset exceeds max\n");
+	goto out;
+bad_unused:
+	printk("swap_duplicate: unused page\n");
+	goto out;
 }
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b76b34237..13d2436ba 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -21,11 +21,7 @@
 #include <linux/malloc.h>
 #include <linux/blkdev.h> /* for blk_size */
 #include <linux/vmalloc.h>
-#include <linux/dcache.h>
 
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for copy_to/from_user */
 #include <asm/bitops.h>
 #include <asm/pgtable.h>
 
@@ -122,52 +118,60 @@ unsigned long get_swap_page(void)
 	}
 }
 
+/*
+ * If the swap count overflows (swap_map[] == 127), the entry is considered
+ * "permanent" and can't be reclaimed until the swap device is closed.
+ */
 void swap_free(unsigned long entry)
 {
 	struct swap_info_struct * p;
 	unsigned long offset, type;
 
 	if (!entry)
-		return;
+		goto out;
 	type = SWP_TYPE(entry);
 	if (type & SHM_SWP_TYPE)
-		return;
-	if (type >= nr_swapfiles) {
-		printk("Trying to free nonexistent swap-page\n");
-		return;
-	}
+		goto out;
+	if (type >= nr_swapfiles)
+		goto bad_nofile;
 	p = & swap_info[type];
+	if (!(p->flags & SWP_USED))
+		goto bad_device;
+	if (p->prio > swap_info[swap_list.next].prio)
+		swap_list.next = swap_list.head;
 	offset = SWP_OFFSET(entry);
-	if (offset >= p->max) {
-		printk("swap_free: weirdness\n");
-		return;
-	}
-	if (!(p->flags & SWP_USED)) {
-		printk("Trying to free swap from unused swap-device\n");
-		return;
-	}
+	if (offset >= p->max)
+		goto bad_offset;
 	if (offset < p->lowest_bit)
 		p->lowest_bit = offset;
 	if (offset > p->highest_bit)
 		p->highest_bit = offset;
 	if (!p->swap_map[offset])
-		printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
-	else
+		goto bad_free;
+	if (p->swap_map[offset] < 127) {
 		if (!--p->swap_map[offset])
 			nr_swap_pages++;
-	if (p->prio > swap_info[swap_list.next].prio) {
-	    swap_list.next = swap_list.head;
 	}
+out:
+	return;
+
+bad_nofile:
+	printk("swap_free: Trying to free nonexistent swap-page\n");
+	goto out;
+bad_device:
+	printk("swap_free: Trying to free swap from unused swap-device\n");
+	goto out;
+bad_offset:
+	printk("swap_free: offset exceeds max\n");
+	goto out;
+bad_free:
+	printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
+	goto out;
 }
 
 /*
- * Trying to stop swapping from a file is fraught with races, so
- * we repeat quite a bit here when we have to pause. swapoff()
- * isn't exactly timing-critical, so who cares (but this is /really/
- * inefficient, ugh).
- *
- * We return 1 after having slept, which makes the process start over
- * from the beginning for this process..
+ * The swap entry has been read in advance, and we return 1 to indicate
+ * that the page has been used or is no longer needed.
  */
 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
 	pte_t *dir, unsigned long entry, unsigned long page)
@@ -198,9 +202,8 @@ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
 	if (pte_val(pte) != entry)
 		return 0;
 	set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
-	flush_tlb_page(vma, address);
 	++vma->vm_mm->rss;
-	swap_free(pte_val(pte));
+	swap_free(entry);
 	return 1;
 }
 
@@ -296,18 +299,6 @@ static int unuse_process(struct mm_struct * mm, unsigned long entry,
 	return 0;
 }
 
-static unsigned long find_swap_entry(int type)
-{
-	struct swap_info_struct * p = &swap_info[type];
-	int i;
-
-	for (i = 1 ; i < p->max ; i++) {
-		if (p->swap_map[i] > 0 && p->swap_map[i] != 0x80)
-			return SWP_ENTRY(type, i);
-	}
-	return 0;
-}
-
 /*
  * We completely avoid races by reading each swap page in advance,
  * and then search for the process using it.  All the necessary
@@ -315,14 +306,13 @@ static unsigned long find_swap_entry(int type)
  */
 static int try_to_unuse(unsigned int type)
 {
-	unsigned long page = 0;
+	struct swap_info_struct * si = &swap_info[type];
 	struct task_struct *p;
+	unsigned long page = 0;
 	unsigned long entry;
+	int i;
 
-	/*
-	 * Find all swap entries in use ...
-	 */
-	while ((entry = find_swap_entry(type)) != 0) {
+	while (1) {
 		if (!page) {
 			page = __get_free_page(GFP_KERNEL);
 			if (!page)
@@ -330,8 +320,16 @@ static int try_to_unuse(unsigned int type)
 		}
 
 		/*
-		 * Read in the page, and then free the swap page.
-		 */
+	 	* Find a swap page in use and read it in.
+	 	*/
+		for (i = 1 , entry = 0; i < si->max ; i++) {
+			if (si->swap_map[i] > 0 && si->swap_map[i] != 0x80) {
+				entry = SWP_ENTRY(type, i);
+				break;
+			}
+		}
+		if (!entry)
+			break;
 		read_swap_page(entry, (char *) page);
 
 		read_lock(&tasklist_lock);
@@ -344,9 +342,19 @@ static int try_to_unuse(unsigned int type)
 	unlock:
 		read_unlock(&tasklist_lock);
 		if (page) {
-			printk("try_to_unuse: didn't find entry %8lx\n",
-				entry);
-			swap_free(entry);
+			/*
+			 * If we couldn't find an entry, there are several
+			 * possible reasons: someone else freed it first,
+			 * we freed the last reference to an overflowed entry,
+			 * or the system has lost track of the use counts.
+			 */
+			if (si->swap_map[i] != 0) {
+				if (si->swap_map[i] != 127)
+					printk("try_to_unuse: entry %08lx "
+					       "not in use\n", entry);
+				si->swap_map[i] = 0;
+				nr_swap_pages++;
+			}
 		}
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f1ab1fae..53c4e58bf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -17,16 +17,12 @@
 #include <linux/kernel_stat.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/stat.h>
 #include <linux/swap.h>
-#include <linux/fs.h>
 #include <linux/swapctl.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
+#include <linux/dcache.h>
 
-#include <asm/dma.h>
-#include <asm/system.h> /* for cli()/sti() */
-#include <asm/uaccess.h> /* for copy_to/from_user */
 #include <asm/bitops.h>
 #include <asm/pgtable.h>
 
@@ -355,6 +351,11 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
 	int i=6;
 	int stop;
 
+	/* Let the dcache know we're looking for memory ... */
+	shrink_dcache_memory();
+	/* Always trim SLAB caches when memory gets low. */
+	(void) kmem_cache_reap(0, dma, wait);
+
 	/* we don't try as hard if we're not waiting.. */
 	stop = 3;
 	if (wait)
@@ -366,19 +367,9 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
 				return 1;
 			state = 1;
 		case 1:
-			/*
-			 * We shouldn't have a priority here:
-			 * If we're low on memory we should
-			 * unconditionally throw away _all_
-			 * kmalloc caches!
-			 */
-			if (kmem_cache_reap(0, dma, wait))
-				return 1;
-			state = 2;
-		case 2:
 			if (shm_swap(i, dma))
 				return 1;
-			state = 3;
+			state = 2;
 		default:
 			if (swap_out(i, dma, wait))
 				return 1;
@@ -425,6 +416,7 @@ void kswapd_setup(void)
        printk ("Starting kswapd v%.*s\n", i, s);
 }
 
+#define MAX_SWAP_FAIL 3
 /*
  * The background pageout daemon.
  * Started as a kernel thread from the init process.
@@ -452,6 +444,8 @@ int kswapd(void *unused)
 	init_swap_timer();
 	
 	while (1) {
+		int fail;
+
 		kswapd_awake = 0;
 		current->signal = 0;
 		run_task_queue(&tq_disk);
@@ -462,13 +456,27 @@ int kswapd(void *unused)
 		 * We now only swap out as many pages as needed.
 		 * When we are truly low on memory, we swap out
 		 * synchronously (WAIT == 1).  -- Rik.
+		 * If we've had too many consecutive failures,
+		 * go back to sleep to let other tasks run.
+		 */
+		for (fail = 0; fail++ < MAX_SWAP_FAIL;) {
+			int pages, wait;
+
+			pages = nr_free_pages;
+			if (nr_free_pages >= min_free_pages)
+				pages += atomic_read(&nr_async_pages);
+			if (pages >= free_pages_high)
+				break;
+			wait = (pages < free_pages_low);
+			if (try_to_free_page(GFP_KERNEL, 0, wait))
+				fail = 0;
+		}
+		/*
+		 * Report failure if we couldn't reach the minimum goal.
 		 */
-		while(nr_free_pages < min_free_pages)
-			try_to_free_page(GFP_KERNEL, 0, 1);
-		while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_low)
-			try_to_free_page(GFP_KERNEL, 0, 1);
-		while((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_high)
-			try_to_free_page(GFP_KERNEL, 0, 0);
+		if (nr_free_pages < min_free_pages)
+			printk("kswapd: failed, got %d of %d\n",
+				nr_free_pages, min_free_pages);
 	}
 }
author	Ralf Baechle <ralf@linux-mips.org>	1997-12-06 23:51:34 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1997-12-06 23:51:34 +0000
commit	230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree	5dd821c8d33f450470588e7a543f74bf74306e9e /mm
parent	c9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)