diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-04-05 04:55:58 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-04-05 04:55:58 +0000 |
commit | 74a9f2e1b4d3ab45a9f72cb5b556c9f521524ab3 (patch) | |
tree | 7c4cdb103ab1b388c9852a88bd6fb1e73eba0b5c /mm | |
parent | ee6374c8b0d333c08061c6a97bc77090d7461225 (diff) |
Merge with Linux 2.4.3.
Note that mingetty does no longer work with serial console, you have to
switch to another getty like getty_ps. This commit also includes a
fix for a setitimer bug which did prevent getty_ps from working on
older kernels.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 27 | ||||
-rw-r--r-- | mm/memory.c | 269 | ||||
-rw-r--r-- | mm/mlock.c | 16 | ||||
-rw-r--r-- | mm/mmap.c | 148 | ||||
-rw-r--r-- | mm/mprotect.c | 5 | ||||
-rw-r--r-- | mm/mremap.c | 12 | ||||
-rw-r--r-- | mm/page_alloc.c | 3 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/slab.c | 25 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 12 | ||||
-rw-r--r-- | mm/vmscan.c | 21 |
12 files changed, 331 insertions, 215 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 156ef6010..733d8667b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -559,7 +559,7 @@ static inline int page_cache_read(struct file * file, unsigned long offset) if (page) return 0; - page = page_cache_alloc(); + page = page_cache_alloc(mapping); if (!page) return -ENOMEM; @@ -659,7 +659,7 @@ void lock_page(struct page *page) /* * a rather lightweight function, finding and getting a reference to a - * hashed page atomically, waiting for it if it's locked. + * hashed page atomically. */ struct page * __find_get_page(struct address_space *mapping, unsigned long offset, struct page **hash) @@ -679,7 +679,8 @@ struct page * __find_get_page(struct address_space *mapping, } /* - * Get the lock to a page atomically. + * Same as the above, but lock the page too, verifying that + * it's still valid once we own it. */ struct page * __find_lock_page (struct address_space *mapping, unsigned long offset, struct page **hash) @@ -1174,7 +1175,7 @@ no_cached_page: */ if (!cached_page) { spin_unlock(&pagecache_lock); - cached_page = page_cache_alloc(); + cached_page = page_cache_alloc(mapping); if (!cached_page) { desc->error = -ENOMEM; break; @@ -1474,7 +1475,7 @@ success: */ old_page = page; if (no_share) { - struct page *new_page = page_cache_alloc(); + struct page *new_page = alloc_page(GFP_HIGHUSER); if (new_page) { copy_user_highpage(new_page, old_page, address); @@ -1752,7 +1753,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) struct vm_area_struct * vma; int unmapped_error, error = -EINVAL; - down(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; len = (len + ~PAGE_MASK) & PAGE_MASK; @@ -1798,7 +1799,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) vma = vma->vm_next; } out: - up(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); return error; } @@ -2097,7 +2098,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior) int unmapped_error = 0; int error = -EINVAL; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -2148,7 +2149,7 @@ asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior) } out: - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return error; } @@ -2250,7 +2251,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, int unmapped_error = 0; long error = -EINVAL; - down(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_CACHE_MASK) goto out; @@ -2302,7 +2303,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, } out: - up(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); return error; } @@ -2319,7 +2320,7 @@ repeat: page = __find_get_page(mapping, index, hash); if (!page) { if (!cached_page) { - cached_page = page_cache_alloc(); + cached_page = page_cache_alloc(mapping); if (!cached_page) return ERR_PTR(-ENOMEM); } @@ -2382,7 +2383,7 @@ repeat: page = __find_lock_page(mapping, index, hash); if (!page) { if (!*cached_page) { - *cached_page = page_cache_alloc(); + *cached_page = page_cache_alloc(mapping); if (!*cached_page) return NULL; } diff --git a/mm/memory.c b/mm/memory.c index 242981f72..c6287eae8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -42,11 +42,11 @@ #include <linux/smp_lock.h> #include <linux/swapctl.h> #include <linux/iobuf.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> #include <linux/highmem.h> #include <linux/pagemap.h> +#include <asm/pgalloc.h> +#include <asm/uaccess.h> unsigned long max_mapnr; unsigned long num_physpages; @@ -160,6 +160,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; + spin_lock(&dst->page_table_lock); for (;;) { pmd_t * src_pmd, * dst_pmd; @@ -177,13 +178,11 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; goto out; continue; } - if (pgd_none(*dst_pgd)) { - if (!pmd_alloc(dst_pgd, 0)) - goto nomem; - } - + src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_offset(dst_pgd, address); + dst_pmd = pmd_alloc(dst, dst_pgd, address); + if (!dst_pmd) + goto nomem; do { pte_t * src_pte, * dst_pte; @@ -200,13 +199,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; goto out; goto cont_copy_pmd_range; } - if (pmd_none(*dst_pmd)) { - if (!pte_alloc(dst_pmd, 0)) - goto nomem; - } - + src_pte = pte_offset(src_pmd, address); - dst_pte = pte_offset(dst_pmd, address); + dst_pte = pte_alloc(dst, dst_pmd, address); + if (!dst_pte) + goto nomem; spin_lock(&src->page_table_lock); do { @@ -251,14 +248,14 @@ cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); } -out: - return 0; - out_unlock: spin_unlock(&src->page_table_lock); +out: + spin_unlock(&dst->page_table_lock); return 0; nomem: + spin_unlock(&dst->page_table_lock); return -ENOMEM; } @@ -377,7 +374,6 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); /* * Update rss for the mm_struct (not necessarily current->mm) * Notice that rss is an unsigned long. @@ -386,6 +382,7 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s mm->rss -= freed; else mm->rss = 0; + spin_unlock(&mm->page_table_lock); } @@ -450,7 +447,7 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) if (err) return err; - down(&mm->mmap_sem); + down_write(&mm->mmap_sem); err = -EFAULT; iobuf->locked = 0; @@ -501,12 +498,12 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) ptr += PAGE_SIZE; } - up(&mm->mmap_sem); + up_write(&mm->mmap_sem); dprintk ("map_user_kiobuf: end OK\n"); return 0; out_unlock: - up(&mm->mmap_sem); + up_write(&mm->mmap_sem); unmap_kiobuf(iobuf); dprintk ("map_user_kiobuf: end %d\n", err); return err; @@ -658,7 +655,7 @@ static inline void zeromap_pte_range(pte_t * pte, unsigned long address, } while (address && (address < end)); } -static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, +static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, pgprot_t prot) { unsigned long end; @@ -668,7 +665,7 @@ static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc(pmd, address); + pte_t * pte = pte_alloc(mm, pmd, address); if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, prot); @@ -684,23 +681,27 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) pgd_t * dir; unsigned long beg = address; unsigned long end = address + size; + struct mm_struct *mm = current->mm; - dir = pgd_offset(current->mm, address); - flush_cache_range(current->mm, beg, end); + dir = pgd_offset(mm, address); + flush_cache_range(mm, beg, end); if (address >= end) BUG(); + + spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(dir, address); + pmd_t *pmd = pmd_alloc(mm, dir, address); error = -ENOMEM; if (!pmd) break; - error = zeromap_pmd_range(pmd, address, end - address, prot); + error = zeromap_pmd_range(mm, pmd, address, end - address, prot); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); - flush_tlb_range(current->mm, beg, end); + spin_unlock(&mm->page_table_lock); + flush_tlb_range(mm, beg, end); return error; } @@ -733,7 +734,7 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned } while (address && (address < end)); } -static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, +static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long phys_addr, pgprot_t prot) { unsigned long end; @@ -744,7 +745,7 @@ static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned l end = PGDIR_SIZE; phys_addr -= address; do { - pte_t * pte = pte_alloc(pmd, address); + pte_t * pte = pte_alloc(mm, pmd, address); if (!pte) return -ENOMEM; remap_pte_range(pte, address, end - address, address + phys_addr, prot); @@ -761,24 +762,28 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; + struct mm_struct *mm = current->mm; phys_addr -= from; - dir = pgd_offset(current->mm, from); - flush_cache_range(current->mm, beg, end); + dir = pgd_offset(mm, from); + flush_cache_range(mm, beg, end); if (from >= end) BUG(); + + spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(dir, from); + pmd_t *pmd = pmd_alloc(mm, dir, from); error = -ENOMEM; if (!pmd) break; - error = remap_pmd_range(pmd, from, end - from, phys_addr + from, prot); + error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (from && (from < end)); - flush_tlb_range(current->mm, beg, end); + spin_unlock(&mm->page_table_lock); + flush_tlb_range(mm, beg, end); return error; } @@ -787,6 +792,8 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long * - flush the old one * - update the page tables * - inform the TLB about the new one + * + * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock */ static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) { @@ -795,6 +802,9 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr update_mmu_cache(vma, address, entry); } +/* + * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock + */ static inline void break_cow(struct vm_area_struct * vma, struct page * old_page, struct page * new_page, unsigned long address, pte_t *page_table) { @@ -862,7 +872,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, break; flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); - spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ } @@ -870,10 +879,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, * Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); - new_page = page_cache_alloc(); + new_page = alloc_page(GFP_HIGHUSER); + spin_lock(&mm->page_table_lock); if (!new_page) return -1; - spin_lock(&mm->page_table_lock); /* * Re-check the pte - we dropped the lock @@ -886,12 +895,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, /* Free the old page.. */ new_page = old_page; } - spin_unlock(&mm->page_table_lock); page_cache_release(new_page); return 1; /* Minor fault */ bad_wp_page: - spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page); return -1; } @@ -1021,63 +1028,100 @@ void swapin_readahead(swp_entry_t entry) return; } +/* + * We hold the mm semaphore and the page_table_lock on entry and exit. + */ static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, swp_entry_t entry, int write_access) { - struct page *page = lookup_swap_cache(entry); + struct page *page; pte_t pte; + spin_unlock(&mm->page_table_lock); + page = lookup_swap_cache(entry); if (!page) { lock_kernel(); swapin_readahead(entry); page = read_swap_cache(entry); unlock_kernel(); - if (!page) + if (!page) { + spin_lock(&mm->page_table_lock); return -1; + } flush_page_to_ram(page); flush_icache_page(vma, page); } - mm->rss++; - - pte = mk_pte(page, vma->vm_page_prot); - /* * Freeze the "shared"ness of the page, ie page_count + swap_count. * Must lock page before transferring our swap count to already * obtained page count. */ lock_page(page); + + /* + * Back out if somebody else faulted in this pte while we + * released the page table lock. + */ + spin_lock(&mm->page_table_lock); + if (pte_present(*page_table)) { + UnlockPage(page); + page_cache_release(page); + return 1; + } + + /* The page isn't present yet, go ahead with the fault. */ + mm->rss++; + pte = mk_pte(page, vma->vm_page_prot); + swap_free(entry); if (write_access && !is_page_shared(page)) pte = pte_mkwrite(pte_mkdirty(pte)); UnlockPage(page); set_pte(page_table, pte); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); return 1; /* Minor fault */ } /* - * This only needs the MM semaphore + * We are called with the MM semaphore and page_table_lock + * spinlock held to protect against concurrent faults in + * multithreaded programs. */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { - struct page *page = NULL; - pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + pte_t entry; + + /* Read-only mapping of ZERO_PAGE. */ + entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + + /* ..except if it's a write access */ if (write_access) { + struct page *page; + + /* Allocate our own private page. */ + spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); + spin_lock(&mm->page_table_lock); if (!page) return -1; - clear_user_highpage(page, addr); - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + if (!pte_none(*page_table)) { + page_cache_release(page); + return 1; + } mm->rss++; + clear_user_highpage(page, addr); flush_page_to_ram(page); + entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); } + set_pte(page_table, entry); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); return 1; /* Minor fault */ @@ -1092,7 +1136,8 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. * - * This is called with the MM semaphore held. + * This is called with the MM semaphore held and the page table + * spinlock held. */ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t *page_table) @@ -1102,6 +1147,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, if (!vma->vm_ops || !vma->vm_ops->nopage) return do_anonymous_page(mm, vma, page_table, write_access, address); + spin_unlock(&mm->page_table_lock); /* * The third argument is "no_share", which tells the low-level code @@ -1109,11 +1155,12 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, * essentially an early COW detection. */ new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); + + spin_lock(&mm->page_table_lock); if (new_page == NULL) /* no page was available -- SIGBUS */ return 0; if (new_page == NOPAGE_OOM) return -1; - ++mm->rss; /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid @@ -1124,15 +1171,24 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, * so we can make it writable and dirty to avoid having to * handle that later. */ - flush_page_to_ram(new_page); - flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); - if (write_access) { - entry = pte_mkwrite(pte_mkdirty(entry)); - } else if (page_count(new_page) > 1 && - !(vma->vm_flags & VM_SHARED)) - entry = pte_wrprotect(entry); - set_pte(page_table, entry); + /* Only go through if we didn't race with anybody else... */ + if (pte_none(*page_table)) { + ++mm->rss; + flush_page_to_ram(new_page); + flush_icache_page(vma, new_page); + entry = mk_pte(new_page, vma->vm_page_prot); + if (write_access) { + entry = pte_mkwrite(pte_mkdirty(entry)); + } else if (page_count(new_page) > 1 && + !(vma->vm_flags & VM_SHARED)) + entry = pte_wrprotect(entry); + set_pte(page_table, entry); + } else { + /* One of our sibling threads was faster, back out. */ + page_cache_release(new_page); + return 1; + } + /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); return 2; /* Major fault */ @@ -1162,11 +1218,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, { pte_t entry; - /* - * We need the page table lock to synchronize with kswapd - * and the SMP-safe atomic PTE updates. - */ - spin_lock(&mm->page_table_lock); entry = *pte; if (!pte_present(entry)) { /* @@ -1174,7 +1225,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, * and the PTE updates will not touch it later. So * drop the lock. */ - spin_unlock(&mm->page_table_lock); if (pte_none(entry)) return do_no_page(mm, vma, address, write_access, pte); return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access); @@ -1188,7 +1238,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); - spin_unlock(&mm->page_table_lock); return 1; } @@ -1204,17 +1253,95 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, current->state = TASK_RUNNING; pgd = pgd_offset(mm, address); - pmd = pmd_alloc(pgd, address); + + /* + * We need the page table lock to synchronize with kswapd + * and the SMP-safe atomic PTE updates. + */ + spin_lock(&mm->page_table_lock); + pmd = pmd_alloc(mm, pgd, address); if (pmd) { - pte_t * pte = pte_alloc(pmd, address); + pte_t * pte = pte_alloc(mm, pmd, address); if (pte) ret = handle_pte_fault(mm, vma, address, write_access, pte); } + spin_unlock(&mm->page_table_lock); return ret; } /* + * Allocate page middle directory. + * + * We've already handled the fast-path in-line, and we own the + * page table lock. + * + * On a two-level page table, this ends up actually being entirely + * optimized away. + */ +pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + pmd_t *new; + + /* "fast" allocation can happen without dropping the lock.. */ + new = pmd_alloc_one_fast(mm, address); + if (!new) { + spin_unlock(&mm->page_table_lock); + new = pmd_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pgd)) { + pmd_free(new); + goto out; + } + } + pgd_populate(mm, pgd, new); +out: + return pmd_offset(pgd, address); +} + +/* + * Allocate the page table directory. + * + * We've already handled the fast-path in-line, and we own the + * page table lock. + */ +pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +{ + if (!pmd_present(*pmd)) { + pte_t *new; + + /* "fast" allocation can happen without dropping the lock.. */ + new = pte_alloc_one_fast(mm, address); + if (!new) { + spin_unlock(&mm->page_table_lock); + new = pte_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pmd_present(*pmd)) { + pte_free(new); + goto out; + } + } + pmd_populate(mm, pmd, new); + } +out: + return pte_offset(pmd, address); +} + +/* * Simplistic page force-in.. */ int make_pages_present(unsigned long addr, unsigned long end) diff --git a/mm/mlock.c b/mm/mlock.c index 16e9f947b..75bca3639 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -198,7 +198,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) unsigned long lock_limit; int error = -ENOMEM; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -219,7 +219,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) error = do_mlock(start, len, 1); out: - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return error; } @@ -227,11 +227,11 @@ asmlinkage long sys_munlock(unsigned long start, size_t len) { int ret; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; ret = do_mlock(start, len, 0); - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return ret; } @@ -268,7 +268,7 @@ asmlinkage long sys_mlockall(int flags) unsigned long lock_limit; int ret = -EINVAL; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) goto out; @@ -286,7 +286,7 @@ asmlinkage long sys_mlockall(int flags) ret = do_mlockall(flags); out: - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return ret; } @@ -294,8 +294,8 @@ asmlinkage long sys_munlockall(void) { int ret; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); ret = do_mlockall(0); - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return ret; } @@ -133,7 +133,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk) unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; - down(&mm->mmap_sem); + down_write(&mm->mmap_sem); if (brk < mm->end_code) goto out; @@ -169,7 +169,7 @@ set_brk: mm->brk = brk; out: retval = mm->brk; - up(&mm->mmap_sem); + up_write(&mm->mmap_sem); return retval; } @@ -177,7 +177,7 @@ out: * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits * into "VM_xxx". */ -static inline unsigned long vm_flags(unsigned long prot, unsigned long flags) +static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags) { #define _trans(x,bit1,bit2) \ ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) @@ -200,6 +200,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon { struct mm_struct * mm = current->mm; struct vm_area_struct * vma; + unsigned int vm_flags; int correct_wcount = 0; int error; @@ -220,19 +221,33 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; + /* Obtain the address to map to. we verify (or select) it and ensure + * that it represents a valid section of the address space. + */ + if (flags & MAP_FIXED) { + if (addr & ~PAGE_MASK) + return -EINVAL; + } else { + addr = get_unmapped_area(addr, len); + if (!addr) + return -ENOMEM; + } + + /* Do simple checking here so the lower-level routines won't have + * to. we assume access permissions have been handled by the open + * of the memory object, so we don't do any here. + */ + vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + /* mlock MCL_FUTURE? */ - if (mm->def_flags & VM_LOCKED) { + if (vm_flags & VM_LOCKED) { unsigned long locked = mm->locked_vm << PAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; } - /* Do simple checking here so the lower-level routines won't have - * to. we assume access permissions have been handled by the open - * of the memory object, so we don't do any here. - */ - if (file != NULL) { + if (file) { switch (flags & MAP_TYPE) { case MAP_SHARED: if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE)) @@ -246,6 +261,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon if (locks_verify_locked(file->f_dentry->d_inode)) return -EAGAIN; + vm_flags |= VM_SHARED | VM_MAYSHARE; + if (!(file->f_mode & FMODE_WRITE)) + vm_flags &= ~(VM_MAYWRITE | VM_SHARED); + /* fall through */ case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) @@ -255,18 +274,43 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon default: return -EINVAL; } + } else { + vm_flags |= VM_SHARED | VM_MAYSHARE; + switch (flags & MAP_TYPE) { + default: + return -EINVAL; + case MAP_PRIVATE: + vm_flags &= ~(VM_SHARED | VM_MAYSHARE); + /* fall through */ + case MAP_SHARED: + break; + } } - /* Obtain the address to map to. we verify (or select) it and ensure - * that it represents a valid section of the address space. - */ - if (flags & MAP_FIXED) { - if (addr & ~PAGE_MASK) - return -EINVAL; - } else { - addr = get_unmapped_area(addr, len); - if (!addr) - return -ENOMEM; + /* Clear old maps */ + error = -ENOMEM; + if (do_munmap(mm, addr, len)) + return -ENOMEM; + + /* Check against address space limit. */ + if ((mm->total_vm << PAGE_SHIFT) + len + > current->rlim[RLIMIT_AS].rlim_cur) + return -ENOMEM; + + /* Private writable mapping? Check memory availability.. */ + if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && + !(flags & MAP_NORESERVE) && + !vm_enough_memory(len >> PAGE_SHIFT)) + return -ENOMEM; + + /* Can we just expand an old anonymous mapping? */ + if (addr && !file && !(vm_flags & VM_SHARED)) { + struct vm_area_struct * vma = find_vma(mm, addr-1); + if (vma && vma->vm_end == addr && !vma->vm_file && + vma->vm_flags == vm_flags) { + vma->vm_end = addr + len; + goto out; + } } /* Determine the object being mapped and call the appropriate @@ -280,58 +324,16 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags(prot,flags) | mm->def_flags; - - if (file) { - VM_ClearReadHint(vma); - vma->vm_raend = 0; - - if (file->f_mode & FMODE_READ) - vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - if (flags & MAP_SHARED) { - vma->vm_flags |= VM_SHARED | VM_MAYSHARE; - - /* This looks strange, but when we don't have the file open - * for writing, we can demote the shared mapping to a simpler - * private mapping. That also takes care of a security hole - * with ptrace() writing to a shared mapping without write - * permissions. - * - * We leave the VM_MAYSHARE bit on, just to get correct output - * from /proc/xxx/maps.. - */ - if (!(file->f_mode & FMODE_WRITE)) - vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED); - } - } else { - vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - if (flags & MAP_SHARED) - vma->vm_flags |= VM_SHARED | VM_MAYSHARE; - } - vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_flags = vm_flags; + vma->vm_page_prot = protection_map[vm_flags & 0x0f]; vma->vm_ops = NULL; vma->vm_pgoff = pgoff; vma->vm_file = NULL; vma->vm_private_data = NULL; - - /* Clear old maps */ - error = -ENOMEM; - if (do_munmap(mm, addr, len)) - goto free_vma; - - /* Check against address space limit. */ - if ((mm->total_vm << PAGE_SHIFT) + len - > current->rlim[RLIMIT_AS].rlim_cur) - goto free_vma; - - /* Private writable mapping? Check memory availability.. */ - if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && - !(flags & MAP_NORESERVE) && - !vm_enough_memory(len >> PAGE_SHIFT)) - goto free_vma; + vma->vm_raend = 0; if (file) { - if (vma->vm_flags & VM_DENYWRITE) { + if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) goto free_vma; @@ -353,15 +355,15 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM */ - flags = vma->vm_flags; addr = vma->vm_start; insert_vm_struct(mm, vma); if (correct_wcount) atomic_inc(&file->f_dentry->d_inode->i_writecount); - + +out: mm->total_vm += len >> PAGE_SHIFT; - if (flags & VM_LOCKED) { + if (vm_flags & VM_LOCKED) { mm->locked_vm += len >> PAGE_SHIFT; make_pages_present(addr, addr + len); } @@ -776,9 +778,9 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len) int ret; struct mm_struct *mm = current->mm; - down(&mm->mmap_sem); + down_write(&mm->mmap_sem); ret = do_munmap(mm, addr, len); - up(&mm->mmap_sem); + up_write(&mm->mmap_sem); return ret; } @@ -825,12 +827,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (!vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; - flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, + flags = calc_vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE) | mm->def_flags; flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - /* Can we just expand an old anonymous mapping? */ if (addr) { struct vm_area_struct * vma = find_vma(mm, addr-1); @@ -841,7 +842,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len) } } - /* * create a vma struct for an anonymous mapping */ @@ -889,8 +889,8 @@ void exit_mmap(struct mm_struct * mm) spin_lock(&mm->page_table_lock); mpnt = mm->mmap; mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL; - spin_unlock(&mm->page_table_lock); mm->rss = 0; + spin_unlock(&mm->page_table_lock); mm->total_vm = 0; mm->locked_vm = 0; diff --git a/mm/mprotect.c b/mm/mprotect.c index 91905c8b1..10c500100 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -242,7 +242,8 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot if (end == start) return 0; - down(¤t->mm->mmap_sem); + /* XXX: maybe this could be down_read ??? - Rik */ + down_write(¤t->mm->mmap_sem); vma = find_vma(current->mm, start); error = -EFAULT; @@ -278,6 +279,6 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot } } out: - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return error; } diff --git a/mm/mremap.c b/mm/mremap.c index e237c9442..3a3b40ee4 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -51,9 +51,9 @@ static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr) pmd_t * pmd; pte_t * pte = NULL; - pmd = pmd_alloc(pgd_offset(mm, addr), addr); + pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); if (pmd) - pte = pte_alloc(pmd, addr); + pte = pte_alloc(mm, pmd, addr); return pte; } @@ -62,7 +62,6 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) int error = 0; pte_t pte; - spin_lock(&mm->page_table_lock); if (!pte_none(*src)) { pte = ptep_get_and_clear(src); if (!dst) { @@ -72,7 +71,6 @@ static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) } set_pte(dst, pte); } - spin_unlock(&mm->page_table_lock); return error; } @@ -81,9 +79,11 @@ static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned int error = 0; pte_t * src; + spin_lock(&mm->page_table_lock); src = get_one_pte(mm, old_addr); if (src) error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr)); + spin_unlock(&mm->page_table_lock); return error; } @@ -292,8 +292,8 @@ asmlinkage unsigned long sys_mremap(unsigned long addr, { unsigned long ret; - down(¤t->mm->mmap_sem); + down_write(¤t->mm->mmap_sem); ret = do_mremap(addr, old_len, new_len, flags, new_addr); - up(¤t->mm->mmap_sem); + up_write(¤t->mm->mmap_sem); return ret; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 09ac27284..6ad49c723 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -455,8 +455,7 @@ try_again: memory_pressure++; try_to_free_pages(gfp_mask); wakeup_bdflush(0); - if (!order) - goto try_again; + goto try_again; } } diff --git a/mm/shmem.c b/mm/shmem.c index 00426ca27..951dcb7a2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -321,7 +321,7 @@ struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, i inode->i_sb->u.shmem_sb.free_blocks--; spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); /* Ok, get a new page */ - page = page_cache_alloc(); + page = page_cache_alloc(mapping); if (!page) goto oom; clear_user_highpage(page, address); @@ -338,7 +338,7 @@ cached_page: up(&inode->i_sem); if (no_share) { - struct page *new_page = page_cache_alloc(); + struct page *new_page = page_cache_alloc(inode->i_mapping); if (new_page) { copy_user_highpage(new_page, page, address); @@ -814,28 +814,6 @@ opps: return cachep; } -/* - * This check if the kmem_cache_t pointer is chained in the cache_cache - * list. -arca - */ -static int is_chained_kmem_cache(kmem_cache_t * cachep) -{ - struct list_head *p; - int ret = 0; - - /* Find the cache in the chain of caches. */ - down(&cache_chain_sem); - list_for_each(p, &cache_chain) { - if (p == &cachep->next) { - ret = 1; - break; - } - } - up(&cache_chain_sem); - - return ret; -} - #ifdef CONFIG_SMP /* * Waits for all CPUs to execute func(). @@ -938,7 +916,7 @@ static int __kmem_cache_shrink(kmem_cache_t *cachep) */ int kmem_cache_shrink(kmem_cache_t *cachep) { - if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep)) + if (!cachep || in_interrupt()) BUG(); return __kmem_cache_shrink(cachep); @@ -1539,7 +1517,6 @@ void * kmalloc (size_t size, int flags) return __kmem_cache_alloc(flags & GFP_DMA ? csizep->cs_dmacachep : csizep->cs_cachep, flags); } - BUG(); // too big size return NULL; } diff --git a/mm/swapfile.c b/mm/swapfile.c index f02800bc6..1ccbeca82 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -209,6 +209,7 @@ bad_count: * share this swap entry, so be cautious and let do_wp_page work out * what to do if a write is requested later. */ +/* tasklist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page* page) { @@ -234,6 +235,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, ++vma->vm_mm->rss; } +/* tasklist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, swp_entry_t entry, struct page* page) @@ -261,6 +263,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, } while (address && (address < end)); } +/* tasklist_lock and vma->vm_mm->page_table_lock are held */ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, swp_entry_t entry, struct page* page) @@ -291,6 +294,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, } while (address && (address < end)); } +/* tasklist_lock and vma->vm_mm->page_table_lock are held */ static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, swp_entry_t entry, struct page* page) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ab74d114b..4ca1d19c7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -102,9 +102,11 @@ static inline int alloc_area_pte (pte_t * pte, unsigned long address, end = PMD_SIZE; do { struct page * page; + spin_unlock(&init_mm.page_table_lock); + page = alloc_page(gfp_mask); + spin_lock(&init_mm.page_table_lock); if (!pte_none(*pte)) printk(KERN_ERR "alloc_area_pte: page already exists\n"); - page = alloc_page(gfp_mask); if (!page) return -ENOMEM; set_pte(pte, mk_pte(page, prot)); @@ -123,7 +125,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc_kernel(pmd, address); + pte_t * pte = pte_alloc(&init_mm, pmd, address); if (!pte) return -ENOMEM; if (alloc_area_pte(pte, address, end - address, gfp_mask, prot)) @@ -143,11 +145,11 @@ inline int vmalloc_area_pages (unsigned long address, unsigned long size, dir = pgd_offset_k(address); flush_cache_all(); - lock_kernel(); + spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc_kernel(dir, address); + pmd = pmd_alloc(&init_mm, dir, address); ret = -ENOMEM; if (!pmd) break; @@ -161,7 +163,7 @@ inline int vmalloc_area_pages (unsigned long address, unsigned long size, ret = 0; } while (address && (address < end)); - unlock_kernel(); + spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return ret; } diff --git a/mm/vmscan.c b/mm/vmscan.c index f41c53328..be1090882 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -25,16 +25,15 @@ #include <asm/pgalloc.h> /* - * The swap-out functions return 1 if they successfully - * threw something out, and we got a free page. It returns - * zero if it couldn't do anything, and any other value - * indicates it decreased rss, but the page was shared. + * The swap-out function returns 1 if it successfully + * scanned all the pages it was asked to (`count'). + * It returns zero if it couldn't do anything, * - * NOTE! If it sleeps, it *must* return 1 to make sure we - * don't continue with the swap-out. Otherwise we may be - * using a process that no longer actually exists (it might - * have died while we slept). + * rss may decrease because pages are shared, but this + * doesn't count as having freed a page. */ + +/* mm->page_table_lock is held. mmap_sem is not held */ static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page) { pte_t pte; @@ -129,6 +128,7 @@ out_unlock_restore: return; } +/* mm->page_table_lock is held. mmap_sem is not held */ static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count) { pte_t * pte; @@ -165,6 +165,7 @@ static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_ return count; } +/* mm->page_table_lock is held. mmap_sem is not held */ static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count) { pmd_t * pmd; @@ -194,6 +195,7 @@ static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vm return count; } +/* mm->page_table_lock is held. mmap_sem is not held */ static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count) { pgd_t *pgdir; @@ -218,6 +220,9 @@ static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsi return count; } +/* + * Returns non-zero if we scanned all `count' pages + */ static int swap_out_mm(struct mm_struct * mm, int count) { unsigned long address; |