Merge with Linux 2.4.3.

Note that mingetty does no longer work with serial console, you have to switch to another getty like getty_ps. This commit also includes a fix for a setitimer bug which did prevent getty_ps from working on older kernels.
author: Ralf Baechle <ralf@linux-mips.org> 2001-04-05 04:55:58 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2001-04-05 04:55:58 +0000
commit: 74a9f2e1b4d3ab45a9f72cb5b556c9f521524ab3 (patch)
tree: 7c4cdb103ab1b388c9852a88bd6fb1e73eba0b5c /mm
parent: ee6374c8b0d333c08061c6a97bc77090d7461225 (diff)
12 files changed, 331 insertions, 215 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 156ef6010..733d8667b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -559,7 +559,7 @@ static inline int page_cache_read(struct file * file, unsigned long offset)
 	if (page)
 		return 0;
 
-	page = page_cache_alloc();
+	page = page_cache_alloc(mapping);
 	if (!page)
 		return -ENOMEM;
 
@@ -659,7 +659,7 @@ void lock_page(struct page *page)
 
 /*
  * a rather lightweight function, finding and getting a reference to a
- * hashed page atomically, waiting for it if it's locked.
+ * hashed page atomically.
  */
 struct page * __find_get_page(struct address_space *mapping,
 			      unsigned long offset, struct page **hash)
@@ -679,7 +679,8 @@ struct page * __find_get_page(struct address_space *mapping,
 }
 
 /*
- * Get the lock to a page atomically.
+ * Same as the above, but lock the page too, verifying that
+ * it's still valid once we own it.
  */
 struct page * __find_lock_page (struct address_space *mapping,
 				unsigned long offset, struct page **hash)
@@ -1174,7 +1175,7 @@ no_cached_page:
 		 */
 		if (!cached_page) {
 			spin_unlock(&pagecache_lock);
-			cached_page = page_cache_alloc();
+			cached_page = page_cache_alloc(mapping);
 			if (!cached_page) {
 				desc->error = -ENOMEM;
 				break;
@@ -1474,7 +1475,7 @@ success:
 	 */
 	old_page = page;
 	if (no_share) {
-		struct page *new_page = page_cache_alloc();
+		struct page *new_page = alloc_page(GFP_HIGHUSER);
 
 		if (new_page) {
 			copy_user_highpage(new_page, old_page, address);
@@ -1752,7 +1753,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 	struct vm_area_struct * vma;
 	int unmapped_error, error = -EINVAL;
 
-	down(&current->mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 	if (start & ~PAGE_MASK)
 		goto out;
 	len = (len + ~PAGE_MASK) & PAGE_MASK;
@@ -1798,7 +1799,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 		vma = vma->vm_next;
 	}
 out:
-	up(&current->mm->mmap_sem);
+	up_read(&current->mm->mmap_sem);
 	return error;
 }
 
@@ -2097,7 +2098,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior)
 	int unmapped_error = 0;
 	int error = -EINVAL;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 
 	if (start & ~PAGE_MASK)
 		goto out;
@@ -2148,7 +2149,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior)
 	}
 
 out:
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return error;
 }
 
@@ -2250,7 +2251,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len,
 	int unmapped_error = 0;
 	long error = -EINVAL;
 
-	down(&current->mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 
 	if (start & ~PAGE_CACHE_MASK)
 		goto out;
@@ -2302,7 +2303,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len,
 	}
 
 out:
-	up(&current->mm->mmap_sem);
+	up_read(&current->mm->mmap_sem);
 	return error;
 }
 
@@ -2319,7 +2320,7 @@ repeat:
 	page = __find_get_page(mapping, index, hash);
 	if (!page) {
 		if (!cached_page) {
-			cached_page = page_cache_alloc();
+			cached_page = page_cache_alloc(mapping);
 			if (!cached_page)
 				return ERR_PTR(-ENOMEM);
 		}
@@ -2382,7 +2383,7 @@ repeat:
 	page = __find_lock_page(mapping, index, hash);
 	if (!page) {
 		if (!*cached_page) {
-			*cached_page = page_cache_alloc();
+			*cached_page = page_cache_alloc(mapping);
 			if (!*cached_page)
 				return NULL;
 		}
diff --git a/mm/memory.c b/mm/memory.c
index 242981f72..c6287eae8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -42,11 +42,11 @@
 #include <linux/smp_lock.h>
 #include <linux/swapctl.h>
 #include <linux/iobuf.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
 
 unsigned long max_mapnr;
 unsigned long num_physpages;
@@ -160,6 +160,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 	src_pgd = pgd_offset(src, address)-1;
 	dst_pgd = pgd_offset(dst, address)-1;
 	
+	spin_lock(&dst->page_table_lock);		
 	for (;;) {
 		pmd_t * src_pmd, * dst_pmd;
 
@@ -177,13 +178,11 @@ skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
 				goto out;
 			continue;
 		}
-		if (pgd_none(*dst_pgd)) {
-			if (!pmd_alloc(dst_pgd, 0))
-				goto nomem;
-		}
-		
+
 		src_pmd = pmd_offset(src_pgd, address);
-		dst_pmd = pmd_offset(dst_pgd, address);
+		dst_pmd = pmd_alloc(dst, dst_pgd, address);
+		if (!dst_pmd)
+			goto nomem;
 
 		do {
 			pte_t * src_pte, * dst_pte;
@@ -200,13 +199,11 @@ skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
 					goto out;
 				goto cont_copy_pmd_range;
 			}
-			if (pmd_none(*dst_pmd)) {
-				if (!pte_alloc(dst_pmd, 0))
-					goto nomem;
-			}
-			
+
 			src_pte = pte_offset(src_pmd, address);
-			dst_pte = pte_offset(dst_pmd, address);
+			dst_pte = pte_alloc(dst, dst_pmd, address);
+			if (!dst_pte)
+				goto nomem;
 
 			spin_lock(&src->page_table_lock);			
 			do {
@@ -251,14 +248,14 @@ cont_copy_pmd_range:	src_pmd++;
 			dst_pmd++;
 		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
 	}
-out:
-	return 0;
-
 out_unlock:
 	spin_unlock(&src->page_table_lock);
+out:
+	spin_unlock(&dst->page_table_lock);
 	return 0;
 
 nomem:
+	spin_unlock(&dst->page_table_lock);
 	return -ENOMEM;
 }
 
@@ -377,7 +374,6 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&mm->page_table_lock);
 	/*
 	 * Update rss for the mm_struct (not necessarily current->mm)
 	 * Notice that rss is an unsigned long.
@@ -386,6 +382,7 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
 		mm->rss -= freed;
 	else
 		mm->rss = 0;
+	spin_unlock(&mm->page_table_lock);
 }
 
 
@@ -450,7 +447,7 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
 	if (err)
 		return err;
 
-	down(&mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
 	err = -EFAULT;
 	iobuf->locked = 0;
@@ -501,12 +498,12 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
 		ptr += PAGE_SIZE;
 	}
 
-	up(&mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 	dprintk ("map_user_kiobuf: end OK\n");
 	return 0;
 
  out_unlock:
-	up(&mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 	unmap_kiobuf(iobuf);
 	dprintk ("map_user_kiobuf: end %d\n", err);
 	return err;
@@ -658,7 +655,7 @@ static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
 	} while (address && (address < end));
 }
 
-static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address,
+static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
                                     unsigned long size, pgprot_t prot)
 {
 	unsigned long end;
@@ -668,7 +665,7 @@ static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address,
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 	do {
-		pte_t * pte = pte_alloc(pmd, address);
+		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		zeromap_pte_range(pte, address, end - address, prot);
@@ -684,23 +681,27 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
 	pgd_t * dir;
 	unsigned long beg = address;
 	unsigned long end = address + size;
+	struct mm_struct *mm = current->mm;
 
-	dir = pgd_offset(current->mm, address);
-	flush_cache_range(current->mm, beg, end);
+	dir = pgd_offset(mm, address);
+	flush_cache_range(mm, beg, end);
 	if (address >= end)
 		BUG();
+
+	spin_lock(&mm->page_table_lock);
 	do {
-		pmd_t *pmd = pmd_alloc(dir, address);
+		pmd_t *pmd = pmd_alloc(mm, dir, address);
 		error = -ENOMEM;
 		if (!pmd)
 			break;
-		error = zeromap_pmd_range(pmd, address, end - address, prot);
+		error = zeromap_pmd_range(mm, pmd, address, end - address, prot);
 		if (error)
 			break;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	flush_tlb_range(current->mm, beg, end);
+	spin_unlock(&mm->page_table_lock);
+	flush_tlb_range(mm, beg, end);
 	return error;
 }
 
@@ -733,7 +734,7 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned
 	} while (address && (address < end));
 }
 
-static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
+static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
 	unsigned long phys_addr, pgprot_t prot)
 {
 	unsigned long end;
@@ -744,7 +745,7 @@ static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned l
 		end = PGDIR_SIZE;
 	phys_addr -= address;
 	do {
-		pte_t * pte = pte_alloc(pmd, address);
+		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_pte_range(pte, address, end - address, address + phys_addr, prot);
@@ -761,24 +762,28 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
 	pgd_t * dir;
 	unsigned long beg = from;
 	unsigned long end = from + size;
+	struct mm_struct *mm = current->mm;
 
 	phys_addr -= from;
-	dir = pgd_offset(current->mm, from);
-	flush_cache_range(current->mm, beg, end);
+	dir = pgd_offset(mm, from);
+	flush_cache_range(mm, beg, end);
 	if (from >= end)
 		BUG();
+
+	spin_lock(&mm->page_table_lock);
 	do {
-		pmd_t *pmd = pmd_alloc(dir, from);
+		pmd_t *pmd = pmd_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pmd)
 			break;
-		error = remap_pmd_range(pmd, from, end - from, phys_addr + from, prot);
+		error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot);
 		if (error)
 			break;
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (from && (from < end));
-	flush_tlb_range(current->mm, beg, end);
+	spin_unlock(&mm->page_table_lock);
+	flush_tlb_range(mm, beg, end);
 	return error;
 }
 
@@ -787,6 +792,8 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
  *  - flush the old one
  *  - update the page tables
  *  - inform the TLB about the new one
+ *
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
  */
 static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
 {
@@ -795,6 +802,9 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr
 	update_mmu_cache(vma, address, entry);
 }
 
+/*
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
+ */
 static inline void break_cow(struct vm_area_struct * vma, struct page *	old_page, struct page * new_page, unsigned long address, 
 		pte_t *page_table)
 {
@@ -862,7 +872,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 			break;
 		flush_cache_page(vma, address);
 		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
-		spin_unlock(&mm->page_table_lock);
 		return 1;	/* Minor fault */
 	}
 
@@ -870,10 +879,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 	 * Ok, we need to copy. Oh, well..
 	 */
 	spin_unlock(&mm->page_table_lock);
-	new_page = page_cache_alloc();
+	new_page = alloc_page(GFP_HIGHUSER);
+	spin_lock(&mm->page_table_lock);
 	if (!new_page)
 		return -1;
-	spin_lock(&mm->page_table_lock);
 
 	/*
 	 * Re-check the pte - we dropped the lock
@@ -886,12 +895,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 		/* Free the old page.. */
 		new_page = old_page;
 	}
-	spin_unlock(&mm->page_table_lock);
 	page_cache_release(new_page);
 	return 1;	/* Minor fault */
 
 bad_wp_page:
-	spin_unlock(&mm->page_table_lock);
 	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
 	return -1;
 }
@@ -1021,63 +1028,100 @@ void swapin_readahead(swp_entry_t entry)
 	return;
 }
 
+/*
+ * We hold the mm semaphore and the page_table_lock on entry and exit.
+ */
 static int do_swap_page(struct mm_struct * mm,
 	struct vm_area_struct * vma, unsigned long address,
 	pte_t * page_table, swp_entry_t entry, int write_access)
 {
-	struct page *page = lookup_swap_cache(entry);
+	struct page *page;
 	pte_t pte;
 
+	spin_unlock(&mm->page_table_lock);
+	page = lookup_swap_cache(entry);
 	if (!page) {
 		lock_kernel();
 		swapin_readahead(entry);
 		page = read_swap_cache(entry);
 		unlock_kernel();
-		if (!page)
+		if (!page) {
+			spin_lock(&mm->page_table_lock);
 			return -1;
+		}
 
 		flush_page_to_ram(page);
 		flush_icache_page(vma, page);
 	}
 
-	mm->rss++;
-
-	pte = mk_pte(page, vma->vm_page_prot);
-
 	/*
 	 * Freeze the "shared"ness of the page, ie page_count + swap_count.
 	 * Must lock page before transferring our swap count to already
 	 * obtained page count.
 	 */
 	lock_page(page);
+
+	/*
+	 * Back out if somebody else faulted in this pte while we
+	 * released the page table lock.
+	 */
+	spin_lock(&mm->page_table_lock);
+	if (pte_present(*page_table)) {
+		UnlockPage(page);
+		page_cache_release(page);
+		return 1;
+	}
+		
+	/* The page isn't present yet, go ahead with the fault. */
+	mm->rss++;
+	pte = mk_pte(page, vma->vm_page_prot);
+
 	swap_free(entry);
 	if (write_access && !is_page_shared(page))
 		pte = pte_mkwrite(pte_mkdirty(pte));
 	UnlockPage(page);
 
 	set_pte(page_table, pte);
+
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
 	return 1;	/* Minor fault */
 }
 
 /*
- * This only needs the MM semaphore
+ * We are called with the MM semaphore and page_table_lock
+ * spinlock held to protect against concurrent faults in
+ * multithreaded programs. 
  */
 static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
 {
-	struct page *page = NULL;
-	pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+	pte_t entry;
+
+	/* Read-only mapping of ZERO_PAGE. */
+	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+
+	/* ..except if it's a write access */
 	if (write_access) {
+		struct page *page;
+
+		/* Allocate our own private page. */
+		spin_unlock(&mm->page_table_lock);
 		page = alloc_page(GFP_HIGHUSER);
+		spin_lock(&mm->page_table_lock);
 		if (!page)
 			return -1;
-		clear_user_highpage(page, addr);
-		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+		if (!pte_none(*page_table)) {
+			page_cache_release(page);
+			return 1;
+		}
 		mm->rss++;
+		clear_user_highpage(page, addr);
 		flush_page_to_ram(page);
+		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 	}
+
 	set_pte(page_table, entry);
+
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, entry);
 	return 1;	/* Minor fault */
@@ -1092,7 +1136,8 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
  * As this is called only for pages that do not currently exist, we
  * do not need to flush old virtual caches or the TLB.
  *
- * This is called with the MM semaphore held.
+ * This is called with the MM semaphore held and the page table
+ * spinlock held.
  */
 static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 	unsigned long address, int write_access, pte_t *page_table)
@@ -1102,6 +1147,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	if (!vma->vm_ops || !vma->vm_ops->nopage)
 		return do_anonymous_page(mm, vma, page_table, write_access, address);
+	spin_unlock(&mm->page_table_lock);
 
 	/*
 	 * The third argument is "no_share", which tells the low-level code
@@ -1109,11 +1155,12 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 	 * essentially an early COW detection.
 	 */
 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
+
+	spin_lock(&mm->page_table_lock);
 	if (new_page == NULL)	/* no page was available -- SIGBUS */
 		return 0;
 	if (new_page == NOPAGE_OOM)
 		return -1;
-	++mm->rss;
 	/*
 	 * This silly early PAGE_DIRTY setting removes a race
 	 * due to the bad i386 page protection. But it's valid
@@ -1124,15 +1171,24 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 	 * so we can make it writable and dirty to avoid having to
 	 * handle that later.
 	 */
-	flush_page_to_ram(new_page);
-	flush_icache_page(vma, new_page);
-	entry = mk_pte(new_page, vma->vm_page_prot);
-	if (write_access) {
-		entry = pte_mkwrite(pte_mkdirty(entry));
-	} else if (page_count(new_page) > 1 &&
-		   !(vma->vm_flags & VM_SHARED))
-		entry = pte_wrprotect(entry);
-	set_pte(page_table, entry);
+	/* Only go through if we didn't race with anybody else... */
+	if (pte_none(*page_table)) {
+		++mm->rss;
+		flush_page_to_ram(new_page);
+		flush_icache_page(vma, new_page);
+		entry = mk_pte(new_page, vma->vm_page_prot);
+		if (write_access) {
+			entry = pte_mkwrite(pte_mkdirty(entry));
+		} else if (page_count(new_page) > 1 &&
+			   !(vma->vm_flags & VM_SHARED))
+			entry = pte_wrprotect(entry);
+		set_pte(page_table, entry);
+	} else {
+		/* One of our sibling threads was faster, back out. */
+		page_cache_release(new_page);
+		return 1;
+	}
+
 	/* no need to invalidate: a not-present page shouldn't be cached */
 	update_mmu_cache(vma, address, entry);
 	return 2;	/* Major fault */
@@ -1162,11 +1218,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 {
 	pte_t entry;
 
-	/*
-	 * We need the page table lock to synchronize with kswapd
-	 * and the SMP-safe atomic PTE updates.
-	 */
-	spin_lock(&mm->page_table_lock);
 	entry = *pte;
 	if (!pte_present(entry)) {
 		/*
@@ -1174,7 +1225,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 		 * and the PTE updates will not touch it later. So
 		 * drop the lock.
 		 */
-		spin_unlock(&mm->page_table_lock);
 		if (pte_none(entry))
 			return do_no_page(mm, vma, address, write_access, pte);
 		return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
@@ -1188,7 +1238,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 	}
 	entry = pte_mkyoung(entry);
 	establish_pte(vma, address, pte, entry);
-	spin_unlock(&mm->page_table_lock);
 	return 1;
 }
 
@@ -1204,17 +1253,95 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
 	current->state = TASK_RUNNING;
 	pgd = pgd_offset(mm, address);
-	pmd = pmd_alloc(pgd, address);
+
+	/*
+	 * We need the page table lock to synchronize with kswapd
+	 * and the SMP-safe atomic PTE updates.
+	 */
+	spin_lock(&mm->page_table_lock);
+	pmd = pmd_alloc(mm, pgd, address);
 
 	if (pmd) {
-		pte_t * pte = pte_alloc(pmd, address);
+		pte_t * pte = pte_alloc(mm, pmd, address);
 		if (pte)
 			ret = handle_pte_fault(mm, vma, address, write_access, pte);
 	}
+	spin_unlock(&mm->page_table_lock);
 	return ret;
 }
 
 /*
+ * Allocate page middle directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level page table, this ends up actually being entirely
+ * optimized away.
+ */
+pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	pmd_t *new;
+
+	/* "fast" allocation can happen without dropping the lock.. */
+	new = pmd_alloc_one_fast(mm, address);
+	if (!new) {
+		spin_unlock(&mm->page_table_lock);
+		new = pmd_alloc_one(mm, address);
+		spin_lock(&mm->page_table_lock);
+		if (!new)
+			return NULL;
+
+		/*
+		 * Because we dropped the lock, we should re-check the
+		 * entry, as somebody else could have populated it..
+		 */
+		if (pgd_present(*pgd)) {
+			pmd_free(new);
+			goto out;
+		}
+	}
+	pgd_populate(mm, pgd, new);
+out:
+	return pmd_offset(pgd, address);
+}
+
+/*
+ * Allocate the page table directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ */
+pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		/* "fast" allocation can happen without dropping the lock.. */
+		new = pte_alloc_one_fast(mm, address);
+		if (!new) {
+			spin_unlock(&mm->page_table_lock);
+			new = pte_alloc_one(mm, address);
+			spin_lock(&mm->page_table_lock);
+			if (!new)
+				return NULL;
+
+			/*
+			 * Because we dropped the lock, we should re-check the
+			 * entry, as somebody else could have populated it..
+			 */
+			if (pmd_present(*pmd)) {
+				pte_free(new);
+				goto out;
+			}
+		}
+		pmd_populate(mm, pmd, new);
+	}
+out:
+	return pte_offset(pmd, address);
+}
+
+/*
  * Simplistic page force-in..
  */
 int make_pages_present(unsigned long addr, unsigned long end)
diff --git a/mm/mlock.c b/mm/mlock.c
index 16e9f947b..75bca3639 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -198,7 +198,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
 	unsigned long lock_limit;
 	int error = -ENOMEM;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
 	start &= PAGE_MASK;
 
@@ -219,7 +219,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
 
 	error = do_mlock(start, len, 1);
 out:
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return error;
 }
 
@@ -227,11 +227,11 @@ asmlinkage long sys_munlock(unsigned long start, size_t len)
 {
 	int ret;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
 	start &= PAGE_MASK;
 	ret = do_mlock(start, len, 0);
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return ret;
 }
 
@@ -268,7 +268,7 @@ asmlinkage long sys_mlockall(int flags)
 	unsigned long lock_limit;
 	int ret = -EINVAL;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
 		goto out;
 
@@ -286,7 +286,7 @@ asmlinkage long sys_mlockall(int flags)
 
 	ret = do_mlockall(flags);
 out:
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return ret;
 }
 
@@ -294,8 +294,8 @@ asmlinkage long sys_munlockall(void)
 {
 	int ret;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	ret = do_mlockall(0);
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return ret;
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index ee45f2db8..a598527fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -133,7 +133,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
 	unsigned long newbrk, oldbrk;
 	struct mm_struct *mm = current->mm;
 
-	down(&mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
 	if (brk < mm->end_code)
 		goto out;
@@ -169,7 +169,7 @@ set_brk:
 	mm->brk = brk;
 out:
 	retval = mm->brk;
-	up(&mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 	return retval;
 }
 
@@ -177,7 +177,7 @@ out:
  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
  * into "VM_xxx".
  */
-static inline unsigned long vm_flags(unsigned long prot, unsigned long flags)
+static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
 {
 #define _trans(x,bit1,bit2) \
 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
@@ -200,6 +200,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma;
+	unsigned int vm_flags;
 	int correct_wcount = 0;
 	int error;
 
@@ -220,19 +221,33 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 	if (mm->map_count > MAX_MAP_COUNT)
 		return -ENOMEM;
 
+	/* Obtain the address to map to. we verify (or select) it and ensure
+	 * that it represents a valid section of the address space.
+	 */
+	if (flags & MAP_FIXED) {
+		if (addr & ~PAGE_MASK)
+			return -EINVAL;
+	} else {
+		addr = get_unmapped_area(addr, len);
+		if (!addr)
+			return -ENOMEM;
+	}
+
+	/* Do simple checking here so the lower-level routines won't have
+	 * to. we assume access permissions have been handled by the open
+	 * of the memory object, so we don't do any here.
+	 */
+	vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
 	/* mlock MCL_FUTURE? */
-	if (mm->def_flags & VM_LOCKED) {
+	if (vm_flags & VM_LOCKED) {
 		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
 		locked += len;
 		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
 			return -EAGAIN;
 	}
 
-	/* Do simple checking here so the lower-level routines won't have
-	 * to. we assume access permissions have been handled by the open
-	 * of the memory object, so we don't do any here.
-	 */
-	if (file != NULL) {
+	if (file) {
 		switch (flags & MAP_TYPE) {
 		case MAP_SHARED:
 			if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
@@ -246,6 +261,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 			if (locks_verify_locked(file->f_dentry->d_inode))
 				return -EAGAIN;
 
+			vm_flags |= VM_SHARED | VM_MAYSHARE;
+			if (!(file->f_mode & FMODE_WRITE))
+				vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
+
 			/* fall through */
 		case MAP_PRIVATE:
 			if (!(file->f_mode & FMODE_READ))
@@ -255,18 +274,43 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 		default:
 			return -EINVAL;
 		}
+	} else {
+		vm_flags |= VM_SHARED | VM_MAYSHARE;
+		switch (flags & MAP_TYPE) {
+		default:
+			return -EINVAL;
+		case MAP_PRIVATE:
+			vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+			/* fall through */
+		case MAP_SHARED:
+			break;
+		}
 	}
 
-	/* Obtain the address to map to. we verify (or select) it and ensure
-	 * that it represents a valid section of the address space.
-	 */
-	if (flags & MAP_FIXED) {
-		if (addr & ~PAGE_MASK)
-			return -EINVAL;
-	} else {
-		addr = get_unmapped_area(addr, len);
-		if (!addr)
-			return -ENOMEM;
+	/* Clear old maps */
+	error = -ENOMEM;
+	if (do_munmap(mm, addr, len))
+		return -ENOMEM;
+
+	/* Check against address space limit. */
+	if ((mm->total_vm << PAGE_SHIFT) + len
+	    > current->rlim[RLIMIT_AS].rlim_cur)
+		return -ENOMEM;
+
+	/* Private writable mapping? Check memory availability.. */
+	if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
+	    !(flags & MAP_NORESERVE)				 &&
+	    !vm_enough_memory(len >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	/* Can we just expand an old anonymous mapping? */
+	if (addr && !file && !(vm_flags & VM_SHARED)) {
+		struct vm_area_struct * vma = find_vma(mm, addr-1);
+		if (vma && vma->vm_end == addr && !vma->vm_file && 
+		    vma->vm_flags == vm_flags) {
+			vma->vm_end = addr + len;
+			goto out;
+		}
 	}
 
 	/* Determine the object being mapped and call the appropriate
@@ -280,58 +324,16 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
-	vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
-
-	if (file) {
-		VM_ClearReadHint(vma);
-		vma->vm_raend = 0;
-
-		if (file->f_mode & FMODE_READ)
-			vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-		if (flags & MAP_SHARED) {
-			vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
-
-			/* This looks strange, but when we don't have the file open
-			 * for writing, we can demote the shared mapping to a simpler
-			 * private mapping. That also takes care of a security hole
-			 * with ptrace() writing to a shared mapping without write
-			 * permissions.
-			 *
-			 * We leave the VM_MAYSHARE bit on, just to get correct output
-			 * from /proc/xxx/maps..
-			 */
-			if (!(file->f_mode & FMODE_WRITE))
-				vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
-		}
-	} else {
-		vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-		if (flags & MAP_SHARED)
-			vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
-	}
-	vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
+	vma->vm_flags = vm_flags;
+	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
 	vma->vm_ops = NULL;
 	vma->vm_pgoff = pgoff;
 	vma->vm_file = NULL;
 	vma->vm_private_data = NULL;
-
-	/* Clear old maps */
-	error = -ENOMEM;
-	if (do_munmap(mm, addr, len))
-		goto free_vma;
-
-	/* Check against address space limit. */
-	if ((mm->total_vm << PAGE_SHIFT) + len
-	    > current->rlim[RLIMIT_AS].rlim_cur)
-		goto free_vma;
-
-	/* Private writable mapping? Check memory availability.. */
-	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
-	    !(flags & MAP_NORESERVE)				 &&
-	    !vm_enough_memory(len >> PAGE_SHIFT))
-		goto free_vma;
+	vma->vm_raend = 0;
 
 	if (file) {
-		if (vma->vm_flags & VM_DENYWRITE) {
+		if (vm_flags & VM_DENYWRITE) {
 			error = deny_write_access(file);
 			if (error)
 				goto free_vma;
@@ -353,15 +355,15 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
 	 * Answer: Yes, several device drivers can do it in their
 	 *         f_op->mmap method. -DaveM
 	 */
-	flags = vma->vm_flags;
 	addr = vma->vm_start;
 
 	insert_vm_struct(mm, vma);
 	if (correct_wcount)
 		atomic_inc(&file->f_dentry->d_inode->i_writecount);
-	
+
+out:	
 	mm->total_vm += len >> PAGE_SHIFT;
-	if (flags & VM_LOCKED) {
+	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += len >> PAGE_SHIFT;
 		make_pages_present(addr, addr + len);
 	}
@@ -776,9 +778,9 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
 	int ret;
 	struct mm_struct *mm = current->mm;
 
-	down(&mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 	ret = do_munmap(mm, addr, len);
-	up(&mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 	return ret;
 }
 
@@ -825,12 +827,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	if (!vm_enough_memory(len >> PAGE_SHIFT))
 		return -ENOMEM;
 
-	flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
+	flags = calc_vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
 				MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
 
 	flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 	
-
 	/* Can we just expand an old anonymous mapping? */
 	if (addr) {
 		struct vm_area_struct * vma = find_vma(mm, addr-1);
@@ -841,7 +842,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		}
 	}	
 
-
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
@@ -889,8 +889,8 @@ void exit_mmap(struct mm_struct * mm)
 	spin_lock(&mm->page_table_lock);
 	mpnt = mm->mmap;
 	mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
-	spin_unlock(&mm->page_table_lock);
 	mm->rss = 0;
+	spin_unlock(&mm->page_table_lock);
 	mm->total_vm = 0;
 	mm->locked_vm = 0;
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 91905c8b1..10c500100 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -242,7 +242,8 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot
 	if (end == start)
 		return 0;
 
-	down(&current->mm->mmap_sem);
+	/* XXX: maybe this could be down_read ??? - Rik */
+	down_write(&current->mm->mmap_sem);
 
 	vma = find_vma(current->mm, start);
 	error = -EFAULT;
@@ -278,6 +279,6 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot
 		}
 	}
 out:
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return error;
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index e237c9442..3a3b40ee4 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -51,9 +51,9 @@ static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr)
 	pmd_t * pmd;
 	pte_t * pte = NULL;
 
-	pmd = pmd_alloc(pgd_offset(mm, addr), addr);
+	pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
 	if (pmd)
-		pte = pte_alloc(pmd, addr);
+		pte = pte_alloc(mm, pmd, addr);
 	return pte;
 }
 
@@ -62,7 +62,6 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
 	int error = 0;
 	pte_t pte;
 
-	spin_lock(&mm->page_table_lock);
 	if (!pte_none(*src)) {
 		pte = ptep_get_and_clear(src);
 		if (!dst) {
@@ -72,7 +71,6 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
 		}
 		set_pte(dst, pte);
 	}
-	spin_unlock(&mm->page_table_lock);
 	return error;
 }
 
@@ -81,9 +79,11 @@ static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned
 	int error = 0;
 	pte_t * src;
 
+	spin_lock(&mm->page_table_lock);
 	src = get_one_pte(mm, old_addr);
 	if (src)
 		error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr));
+	spin_unlock(&mm->page_table_lock);
 	return error;
 }
 
@@ -292,8 +292,8 @@ asmlinkage unsigned long sys_mremap(unsigned long addr,
 {
 	unsigned long ret;
 
-	down(&current->mm->mmap_sem);
+	down_write(&current->mm->mmap_sem);
 	ret = do_mremap(addr, old_len, new_len, flags, new_addr);
-	up(&current->mm->mmap_sem);
+	up_write(&current->mm->mmap_sem);
 	return ret;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 09ac27284..6ad49c723 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -455,8 +455,7 @@ try_again:
 			memory_pressure++;
 			try_to_free_pages(gfp_mask);
 			wakeup_bdflush(0);
-			if (!order)
-				goto try_again;
+			goto try_again;
 		}
 	}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 00426ca27..951dcb7a2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -321,7 +321,7 @@ struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, i
 		inode->i_sb->u.shmem_sb.free_blocks--;
 		spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
 		/* Ok, get a new page */
-		page = page_cache_alloc();
+		page = page_cache_alloc(mapping);
 		if (!page)
 			goto oom;
 		clear_user_highpage(page, address);
@@ -338,7 +338,7 @@ cached_page:
 	up(&inode->i_sem);
 
 	if (no_share) {
-		struct page *new_page = page_cache_alloc();
+		struct page *new_page = page_cache_alloc(inode->i_mapping);
 
 		if (new_page) {
 			copy_user_highpage(new_page, page, address);
diff --git a/mm/slab.c b/mm/slab.c
index f6f8be1db..b66533649 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -814,28 +814,6 @@ opps:
 	return cachep;
 }
 
-/*
- * This check if the kmem_cache_t pointer is chained in the cache_cache
- * list. -arca
- */
-static int is_chained_kmem_cache(kmem_cache_t * cachep)
-{
-	struct list_head *p;
-	int ret = 0;
-
-	/* Find the cache in the chain of caches. */
-	down(&cache_chain_sem);
-	list_for_each(p, &cache_chain) {
-		if (p == &cachep->next) {
-			ret = 1;
-			break;
-		}
-	}
-	up(&cache_chain_sem);
-
-	return ret;
-}
-
 #ifdef CONFIG_SMP
 /*
  * Waits for all CPUs to execute func().
@@ -938,7 +916,7 @@ static int __kmem_cache_shrink(kmem_cache_t *cachep)
  */
 int kmem_cache_shrink(kmem_cache_t *cachep)
 {
-	if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
+	if (!cachep || in_interrupt())
 		BUG();
 
 	return __kmem_cache_shrink(cachep);
@@ -1539,7 +1517,6 @@ void * kmalloc (size_t size, int flags)
 		return __kmem_cache_alloc(flags & GFP_DMA ?
 			 csizep->cs_dmacachep : csizep->cs_cachep, flags);
 	}
-	BUG(); // too big size
 	return NULL;
 }
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f02800bc6..1ccbeca82 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -209,6 +209,7 @@ bad_count:
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
  */
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
 	pte_t *dir, swp_entry_t entry, struct page* page)
 {
@@ -234,6 +235,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
 	++vma->vm_mm->rss;
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 	unsigned long address, unsigned long size, unsigned long offset,
 	swp_entry_t entry, struct page* page)
@@ -261,6 +263,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 	} while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
 	unsigned long address, unsigned long size,
 	swp_entry_t entry, struct page* page)
@@ -291,6 +294,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
 	} while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
 			swp_entry_t entry, struct page* page)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab74d114b..4ca1d19c7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -102,9 +102,11 @@ static inline int alloc_area_pte (pte_t * pte, unsigned long address,
 		end = PMD_SIZE;
 	do {
 		struct page * page;
+		spin_unlock(&init_mm.page_table_lock);
+		page = alloc_page(gfp_mask);
+		spin_lock(&init_mm.page_table_lock);
 		if (!pte_none(*pte))
 			printk(KERN_ERR "alloc_area_pte: page already exists\n");
-		page = alloc_page(gfp_mask);
 		if (!page)
 			return -ENOMEM;
 		set_pte(pte, mk_pte(page, prot));
@@ -123,7 +125,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 	do {
-		pte_t * pte = pte_alloc_kernel(pmd, address);
+		pte_t * pte = pte_alloc(&init_mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		if (alloc_area_pte(pte, address, end - address, gfp_mask, prot))
@@ -143,11 +145,11 @@ inline int vmalloc_area_pages (unsigned long address, unsigned long size,
 
 	dir = pgd_offset_k(address);
 	flush_cache_all();
-	lock_kernel();
+	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
 		
-		pmd = pmd_alloc_kernel(dir, address);
+		pmd = pmd_alloc(&init_mm, dir, address);
 		ret = -ENOMEM;
 		if (!pmd)
 			break;
@@ -161,7 +163,7 @@ inline int vmalloc_area_pages (unsigned long address, unsigned long size,
 
 		ret = 0;
 	} while (address && (address < end));
-	unlock_kernel();
+	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return ret;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f41c53328..be1090882 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -25,16 +25,15 @@
 #include <asm/pgalloc.h>
 
 /*
- * The swap-out functions return 1 if they successfully
- * threw something out, and we got a free page. It returns
- * zero if it couldn't do anything, and any other value
- * indicates it decreased rss, but the page was shared.
+ * The swap-out function returns 1 if it successfully
+ * scanned all the pages it was asked to (`count').
+ * It returns zero if it couldn't do anything,
  *
- * NOTE! If it sleeps, it *must* return 1 to make sure we
- * don't continue with the swap-out. Otherwise we may be
- * using a process that no longer actually exists (it might
- * have died while we slept).
+ * rss may decrease because pages are shared, but this
+ * doesn't count as having freed a page.
  */
+
+/* mm->page_table_lock is held. mmap_sem is not held */
 static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
 {
 	pte_t pte;
@@ -129,6 +128,7 @@ out_unlock_restore:
 	return;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pte_t * pte;
@@ -165,6 +165,7 @@ static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_
 	return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pmd_t * pmd;
@@ -194,6 +195,7 @@ static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vm
 	return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
 {
 	pgd_t *pgdir;
@@ -218,6 +220,9 @@ static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsi
 	return count;
 }
 
+/*
+ * Returns non-zero if we scanned all `count' pages
+ */
 static int swap_out_mm(struct mm_struct * mm, int count)
 {
 	unsigned long address;
author	Ralf Baechle <ralf@linux-mips.org>	2001-04-05 04:55:58 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2001-04-05 04:55:58 +0000
commit	74a9f2e1b4d3ab45a9f72cb5b556c9f521524ab3 (patch)
tree	7c4cdb103ab1b388c9852a88bd6fb1e73eba0b5c /mm
parent	ee6374c8b0d333c08061c6a97bc77090d7461225 (diff)