diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1999-09-28 22:25:29 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1999-09-28 22:25:29 +0000 |
commit | 0ae8dceaebe3659ee0c3352c08125f403e77ebca (patch) | |
tree | 5085c389f09da78182b899d19fe1068b619a69dd /mm | |
parent | 273767781288c35c9d679e908672b9996cda4c34 (diff) |
Merge with 2.3.10.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 24 | ||||
-rw-r--r-- | mm/memory.c | 256 | ||||
-rw-r--r-- | mm/mlock.c | 12 | ||||
-rw-r--r-- | mm/mmap.c | 24 | ||||
-rw-r--r-- | mm/mprotect.c | 6 | ||||
-rw-r--r-- | mm/mremap.c | 13 | ||||
-rw-r--r-- | mm/page_alloc.c | 87 | ||||
-rw-r--r-- | mm/vmscan.c | 17 |
8 files changed, 203 insertions, 236 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ed5b6d34c..668c6c99f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1194,8 +1194,6 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou struct file * in_file, * out_file; struct inode * in_inode, * out_inode; - lock_kernel(); - /* * Get input file, and verify that it is ok.. */ @@ -1234,7 +1232,6 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou if (retval) goto fput_out; - unlock_kernel(); retval = 0; if (count) { read_descriptor_t desc; @@ -1244,7 +1241,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou ppos = &in_file->f_pos; if (offset) { if (get_user(pos, offset)) - goto fput_out_lock; + goto fput_out; ppos = &pos; } @@ -1261,14 +1258,11 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou put_user(pos, offset); } -fput_out_lock: - lock_kernel(); fput_out: fput(out_file); fput_in: fput(in_file); out: - unlock_kernel(); return retval; } @@ -1297,9 +1291,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long new_page = 0; offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) - goto no_page_nolock; - - unlock_kernel(); + goto no_page; /* * Do we have something in the page cache already? @@ -1344,7 +1336,6 @@ success: page_cache_free(new_page); flush_page_to_ram(old_page); - lock_kernel(); return old_page; } @@ -1354,7 +1345,6 @@ success: copy_page(new_page, old_page); flush_page_to_ram(new_page); page_cache_release(page); - lock_kernel(); return new_page; no_cached_page: @@ -1431,8 +1421,6 @@ failure: if (new_page) page_cache_free(new_page); no_page: - lock_kernel(); -no_page_nolock: return 0; } @@ -1487,7 +1475,7 @@ static int filemap_write_page(struct vm_area_struct * vma, * If a task terminates while we're swapping the page, the vma and * and file could be released ... increment the count to be safe. */ - atomic_inc(&file->f_count); + get_file(file); result = do_write_page(inode, file, (const char *) page, offset); fput(file); return result; @@ -1648,8 +1636,7 @@ static struct vm_operations_struct file_shared_mmap = { NULL, /* advise */ filemap_nopage, /* nopage */ NULL, /* wppage */ - filemap_swapout, /* swapout */ - NULL, /* swapin */ + filemap_swapout /* swapout */ }; /* @@ -1667,8 +1654,7 @@ static struct vm_operations_struct file_private_mmap = { NULL, /* advise */ filemap_nopage, /* nopage */ NULL, /* wppage */ - NULL, /* swapout */ - NULL, /* swapin */ + NULL /* swapout */ }; /* This is used for a general mmap of a disk file */ diff --git a/mm/memory.c b/mm/memory.c index aac203bbb..a31e862b2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -36,7 +36,9 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/swap.h> +#include <linux/pagemap.h> #include <linux/smp_lock.h> +#include <linux/swapctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -320,7 +322,7 @@ static inline void forget_pte(pte_t page) } } -static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size) +static inline int zap_pte_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size) { pte_t * pte; int freed; @@ -345,15 +347,15 @@ static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned lon page = *pte; pte++; size--; + pte_clear(pte-1); if (pte_none(page)) continue; - pte_clear(pte-1); freed += free_pte(page); } return freed; } -static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size) +static inline int zap_pmd_range(struct mm_struct *mm, pgd_t * dir, unsigned long address, unsigned long size) { pmd_t * pmd; unsigned long end; @@ -373,7 +375,7 @@ static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned lon end = PGDIR_SIZE; freed = 0; do { - freed += zap_pte_range(pmd, address, end - address); + freed += zap_pte_range(mm, pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); @@ -390,11 +392,21 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s int freed = 0; dir = pgd_offset(mm, address); + + /* + * This is a long-lived spinlock. That's fine. + * There's no contention, because the page table + * lock only protects against kswapd anyway, and + * even if kswapd happened to be looking at this + * process we _want_ it to get stuck. + */ + spin_lock(&mm->page_table_lock); while (address < end) { - freed += zap_pmd_range(dir, address, end - address); + freed += zap_pmd_range(mm, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } + spin_unlock(&mm->page_table_lock); /* * Update rss for the mm_struct (not necessarily current->mm) */ @@ -599,17 +611,16 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. + * + * We enter with the page table read-lock held, and need to exit without + * it. */ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { unsigned long old_page, new_page; struct page * page; - - new_page = __get_free_page(GFP_USER); - /* Did swap_out() unmap the protected page while we slept? */ - if (pte_val(*page_table) != pte_val(pte)) - goto end_wp_page; + old_page = pte_page(pte); if (MAP_NR(old_page) >= max_mapnr) goto bad_wp_page; @@ -634,44 +645,44 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, /* FallThrough */ case 1: flush_cache_page(vma, address); - set_pte(page_table, pte_mkdirty(pte_mkwrite(pte))); + set_pte(page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); flush_tlb_page(vma, address); -end_wp_page: - /* - * We can release the kernel lock now.. Now swap_out will see - * a dirty page and so won't get confused and flush_tlb_page - * won't SMP race. -Andrea - */ - unlock_kernel(); - - if (new_page) - free_page(new_page); + spin_unlock(&tsk->mm->page_table_lock); return 1; } - + + /* + * Ok, we need to copy. Oh, well.. + */ + spin_unlock(&tsk->mm->page_table_lock); + new_page = __get_free_page(GFP_USER); if (!new_page) - goto no_new_page; + return -1; + spin_lock(&tsk->mm->page_table_lock); - if (PageReserved(page)) - ++vma->vm_mm->rss; - copy_cow_page(old_page,new_page); - flush_page_to_ram(old_page); - flush_page_to_ram(new_page); - flush_cache_page(vma, address); - set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); - flush_tlb_page(vma, address); - unlock_kernel(); - __free_page(page); + /* + * Re-check the pte - we dropped the lock + */ + if (pte_val(*page_table) == pte_val(pte)) { + if (PageReserved(page)) + ++vma->vm_mm->rss; + copy_cow_page(old_page,new_page); + flush_page_to_ram(old_page); + flush_page_to_ram(new_page); + flush_cache_page(vma, address); + set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); + flush_tlb_page(vma, address); + + /* Free the old page.. */ + new_page = old_page; + } + spin_unlock(&tsk->mm->page_table_lock); + free_page(new_page); return 1; bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); - send_sig(SIGKILL, tsk, 1); -no_new_page: - unlock_kernel(); - if (new_page) - free_page(new_page); - return 0; + return -1; } /* @@ -725,8 +736,9 @@ void vmtruncate(struct inode * inode, unsigned long offset) struct vm_area_struct * mpnt; truncate_inode_pages(inode, offset); + spin_lock(&inode->i_shared_lock); if (!inode->i_mmap) - return; + goto out_unlock; mpnt = inode->i_mmap; do { struct mm_struct *mm = mpnt->vm_mm; @@ -757,35 +769,81 @@ void vmtruncate(struct inode * inode, unsigned long offset) zap_page_range(mm, start, len); flush_tlb_range(mm, start, end); } while ((mpnt = mpnt->vm_next_share) != NULL); +out_unlock: + spin_unlock(&inode->i_shared_lock); } -/* - * This is called with the kernel lock held, we need - * to return without it. + +/* + * Primitive swap readahead code. We simply read an aligned block of + * (1 << page_cluster) entries in the swap area. This method is chosen + * because it doesn't cost us any seek time. We also make sure to queue + * the 'original' request together with the readahead ones... */ -static int do_swap_page(struct task_struct * tsk, +static void swapin_readahead(unsigned long entry) +{ + int i; + struct page *new_page; + unsigned long offset = SWP_OFFSET(entry); + struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info; + + offset = (offset >> page_cluster) << page_cluster; + + i = 1 << page_cluster; + do { + /* Don't read-ahead past the end of the swap area */ + if (offset >= swapdev->max) + break; + /* Don't block on I/O for read-ahead */ + if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster) + break; + /* Don't read in bad or busy pages */ + if (!swapdev->swap_map[offset]) + break; + if (swapdev->swap_map[offset] == SWAP_MAP_BAD) + break; + + /* Ok, do the async read-ahead now */ + new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0); + if (new_page != NULL) + __free_page(new_page); + offset++; + } while (--i); + return; +} + +static int do_swap_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, - pte_t * page_table, pte_t entry, int write_access) + pte_t * page_table, unsigned long entry, int write_access) { - if (!vma->vm_ops || !vma->vm_ops->swapin) { - swap_in(tsk, vma, page_table, pte_val(entry), write_access); - flush_page_to_ram(pte_page(*page_table)); - } else { - pte_t page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry)); - if (pte_val(*page_table) != pte_val(entry)) { - free_page(pte_page(page)); - } else { - if (page_count(mem_map + MAP_NR(pte_page(page))) > 1 && - !(vma->vm_flags & VM_SHARED)) - page = pte_wrprotect(page); - ++vma->vm_mm->rss; - ++tsk->maj_flt; - flush_page_to_ram(pte_page(page)); - set_pte(page_table, page); - } + struct page *page = lookup_swap_cache(entry); + pte_t pte; + + if (!page) { + lock_kernel(); + swapin_readahead(entry); + page = read_swap_cache(entry); + unlock_kernel(); + if (!page) + return -1; + + flush_page_to_ram(page_address(page)); + } + + vma->vm_mm->rss++; + tsk->min_flt++; + swap_free(entry); + + pte = mk_pte(page_address(page), vma->vm_page_prot); + + if (write_access && !is_page_shared(page)) { + delete_from_swap_cache(page); + pte = pte_mkwrite(pte_mkdirty(pte)); } - unlock_kernel(); + set_pte(page_table, pte); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, address, pte); return 1; } @@ -798,7 +856,7 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v if (write_access) { unsigned long page = __get_free_page(GFP_USER); if (!page) - return 0; + return -1; clear_page(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; @@ -806,6 +864,8 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v flush_page_to_ram(page); } set_pte(page_table, entry); + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, addr, entry); return 1; } @@ -827,23 +887,17 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long page; pte_t entry; - if (!vma->vm_ops || !vma->vm_ops->nopage) { - unlock_kernel(); - return do_anonymous_page(tsk, vma, page_table, write_access, - address); - } + if (!vma->vm_ops || !vma->vm_ops->nopage) + return do_anonymous_page(tsk, vma, page_table, write_access, address); /* * The third argument is "no_share", which tells the low-level code * to copy, not share the page even if sharing is possible. It's * essentially an early COW detection. */ - page = vma->vm_ops->nopage(vma, address & PAGE_MASK, - (vma->vm_flags & VM_SHARED)?0:write_access); - - unlock_kernel(); + page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); if (!page) - return 0; + return 0; /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */ ++tsk->maj_flt; ++vma->vm_mm->rss; @@ -866,6 +920,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, entry = pte_wrprotect(entry); set_pte(page_table, entry); /* no need to invalidate: a not-present page shouldn't be cached */ + update_mmu_cache(vma, address, entry); return 1; } @@ -877,6 +932,15 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). + * + * Note the "page_table_lock". It is to protect against kswapd removing + * pages from under us. Note that kswapd only ever _removes_ pages, never + * adds them. As such, once we have noticed that the page is not present, + * we can drop the lock early. + * + * The adding of pages is protected by the MM semaphore (which we hold), + * so we don't need to worry about a page being suddenly been added into + * our VM. */ static inline int handle_pte_fault(struct task_struct *tsk, struct vm_area_struct * vma, unsigned long address, @@ -884,27 +948,32 @@ static inline int handle_pte_fault(struct task_struct *tsk, { pte_t entry; - lock_kernel(); entry = *pte; - if (!pte_present(entry)) { if (pte_none(entry)) return do_no_page(tsk, vma, address, write_access, pte); - return do_swap_page(tsk, vma, address, pte, entry, write_access); + return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access); } - entry = pte_mkyoung(entry); - set_pte(pte, entry); - flush_tlb_page(vma, address); - if (write_access) { - if (!pte_write(entry)) - return do_wp_page(tsk, vma, address, pte, entry); + /* + * Ok, the entry was present, we need to get the page table + * lock to synchronize with kswapd, and verify that the entry + * didn't change from under us.. + */ + spin_lock(&tsk->mm->page_table_lock); + if (pte_val(entry) == pte_val(*pte)) { + if (write_access) { + if (!pte_write(entry)) + return do_wp_page(tsk, vma, address, pte, entry); - entry = pte_mkdirty(entry); + entry = pte_mkdirty(entry); + } + entry = pte_mkyoung(entry); set_pte(pte, entry); flush_tlb_page(vma, address); + update_mmu_cache(vma, address, entry); } - unlock_kernel(); + spin_unlock(&tsk->mm->page_table_lock); return 1; } @@ -921,28 +990,27 @@ int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, pmd = pmd_alloc(pgd, address); if (pmd) { pte_t * pte = pte_alloc(pmd, address); - if (pte) { - if (handle_pte_fault(tsk, vma, address, write_access, pte)) { - update_mmu_cache(vma, address, *pte); - return 1; - } - } + if (pte) + return handle_pte_fault(tsk, vma, address, write_access, pte); } - return 0; + return -1; } /* * Simplistic page force-in.. */ -void make_pages_present(unsigned long addr, unsigned long end) +int make_pages_present(unsigned long addr, unsigned long end) { int write; + struct task_struct *tsk = current; struct vm_area_struct * vma; - vma = find_vma(current->mm, addr); + vma = find_vma(tsk->mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; while (addr < end) { - handle_mm_fault(current, vma, addr, write); + if (handle_mm_fault(tsk, vma, addr, write) < 0) + return -1; addr += PAGE_SIZE; } + return 0; } diff --git a/mm/mlock.c b/mm/mlock.c index 7947031af..d6b19cfb1 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -31,7 +31,7 @@ static inline int mlock_fixup_start(struct vm_area_struct * vma, vma->vm_offset += vma->vm_start - n->vm_start; n->vm_flags = newflags; if (n->vm_file) - atomic_inc(&n->vm_file->f_count); + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); @@ -52,7 +52,7 @@ static inline int mlock_fixup_end(struct vm_area_struct * vma, n->vm_offset += n->vm_start - vma->vm_start; n->vm_flags = newflags; if (n->vm_file) - atomic_inc(&n->vm_file->f_count); + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); @@ -179,7 +179,6 @@ asmlinkage int sys_mlock(unsigned long start, size_t len) int error = -ENOMEM; down(¤t->mm->mmap_sem); - lock_kernel(); len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK; start &= PAGE_MASK; @@ -200,7 +199,6 @@ asmlinkage int sys_mlock(unsigned long start, size_t len) error = do_mlock(start, len, 1); out: - unlock_kernel(); up(¤t->mm->mmap_sem); return error; } @@ -210,11 +208,9 @@ asmlinkage int sys_munlock(unsigned long start, size_t len) int ret; down(¤t->mm->mmap_sem); - lock_kernel(); len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK; start &= PAGE_MASK; ret = do_mlock(start, len, 0); - unlock_kernel(); up(¤t->mm->mmap_sem); return ret; } @@ -254,7 +250,6 @@ asmlinkage int sys_mlockall(int flags) int ret = -EINVAL; down(¤t->mm->mmap_sem); - lock_kernel(); if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) goto out; @@ -272,7 +267,6 @@ asmlinkage int sys_mlockall(int flags) ret = do_mlockall(flags); out: - unlock_kernel(); up(¤t->mm->mmap_sem); return ret; } @@ -282,9 +276,7 @@ asmlinkage int sys_munlockall(void) int ret; down(¤t->mm->mmap_sem); - lock_kernel(); ret = do_mlockall(0); - unlock_kernel(); up(¤t->mm->mmap_sem); return ret; } @@ -77,10 +77,12 @@ static inline void remove_shared_vm_struct(struct vm_area_struct *vma) if (file) { if (vma->vm_flags & VM_DENYWRITE) - file->f_dentry->d_inode->i_writecount++; + atomic_inc(&file->f_dentry->d_inode->i_writecount); + spin_lock(&file->f_dentry->d_inode->i_shared_lock); if(vma->vm_next_share) vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share; *vma->vm_pprev_share = vma->vm_next_share; + spin_unlock(&file->f_dentry->d_inode->i_shared_lock); } } @@ -294,7 +296,7 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, if (file) { int correct_wcount = 0; if (vma->vm_flags & VM_DENYWRITE) { - if (file->f_dentry->d_inode->i_writecount > 0) { + if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { error = -ETXTBSY; goto free_vma; } @@ -303,17 +305,17 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, * might). In any case, this takes care of any * race that this might cause. */ - file->f_dentry->d_inode->i_writecount--; + atomic_dec(&file->f_dentry->d_inode->i_writecount); correct_wcount = 1; } error = file->f_op->mmap(file, vma); /* Fix up the count if necessary, then check for an error */ if (correct_wcount) - file->f_dentry->d_inode->i_writecount++; + atomic_inc(&file->f_dentry->d_inode->i_writecount); if (error) goto unmap_and_free_vma; vma->vm_file = file; - atomic_inc(&file->f_count); + get_file(file); } /* @@ -547,7 +549,7 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area, mpnt->vm_file = area->vm_file; mpnt->vm_pte = area->vm_pte; if (mpnt->vm_file) - atomic_inc(&mpnt->vm_file->f_count); + get_file(mpnt->vm_file); if (mpnt->vm_ops && mpnt->vm_ops->open) mpnt->vm_ops->open(mpnt); area->vm_end = addr; /* Truncate area */ @@ -678,9 +680,9 @@ int do_munmap(unsigned long addr, size_t len) size = end - st; lock_kernel(); - if (mpnt->vm_ops && mpnt->vm_ops->unmap) mpnt->vm_ops->unmap(mpnt, st, size); + unlock_kernel(); remove_shared_vm_struct(mpnt); mm->map_count--; @@ -693,8 +695,6 @@ int do_munmap(unsigned long addr, size_t len) * Fix the mapping, and free the old area if it wasn't reused. */ extra = unmap_fixup(mpnt, st, size, extra); - - unlock_kernel(); } /* Release the extra vma struct if it wasn't used */ @@ -787,10 +787,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len) flags = vma->vm_flags; addr = vma->vm_start; - lock_kernel(); /* kswapd, ugh */ insert_vm_struct(mm, vma); merge_segments(mm, vma->vm_start, vma->vm_end); - unlock_kernel(); mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { @@ -878,13 +876,15 @@ void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp) if (file) { struct inode * inode = file->f_dentry->d_inode; if (vmp->vm_flags & VM_DENYWRITE) - inode->i_writecount--; + atomic_dec(&inode->i_writecount); /* insert vmp into inode's share list */ + spin_lock(&inode->i_shared_lock); if((vmp->vm_next_share = inode->i_mmap) != NULL) inode->i_mmap->vm_pprev_share = &vmp->vm_next_share; inode->i_mmap = vmp; vmp->vm_pprev_share = &inode->i_mmap; + spin_unlock(&inode->i_shared_lock); } } diff --git a/mm/mprotect.c b/mm/mprotect.c index 14073c0fa..b1504af83 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -103,7 +103,7 @@ static inline int mprotect_fixup_start(struct vm_area_struct * vma, n->vm_flags = newflags; n->vm_page_prot = prot; if (n->vm_file) - atomic_inc(&n->vm_file->f_count); + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); @@ -126,7 +126,7 @@ static inline int mprotect_fixup_end(struct vm_area_struct * vma, n->vm_flags = newflags; n->vm_page_prot = prot; if (n->vm_file) - atomic_inc(&n->vm_file->f_count); + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); @@ -212,7 +212,6 @@ asmlinkage int sys_mprotect(unsigned long start, size_t len, unsigned long prot) return 0; down(¤t->mm->mmap_sem); - lock_kernel(); vma = find_vma(current->mm, start); error = -EFAULT; @@ -249,7 +248,6 @@ asmlinkage int sys_mprotect(unsigned long start, size_t len, unsigned long prot) } merge_segments(current->mm, start, end); out: - unlock_kernel(); up(¤t->mm->mmap_sem); return error; } diff --git a/mm/mremap.c b/mm/mremap.c index 48d3e9f94..2852f9b06 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -57,11 +57,13 @@ static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr) return pte; } -static inline int copy_one_pte(pte_t * src, pte_t * dst) +static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst) { int error = 0; - pte_t pte = *src; + pte_t pte; + spin_lock(&mm->page_table_lock); + pte = *src; if (!pte_none(pte)) { error++; if (dst) { @@ -70,6 +72,7 @@ static inline int copy_one_pte(pte_t * src, pte_t * dst) error--; } } + spin_unlock(&mm->page_table_lock); return error; } @@ -80,7 +83,7 @@ static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned src = get_one_pte(mm, old_addr); if (src) - error = copy_one_pte(src, alloc_one_pte(mm, new_addr)); + error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr)); return error; } @@ -134,14 +137,12 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, new_vma->vm_start = new_addr; new_vma->vm_end = new_addr+new_len; new_vma->vm_offset = vma->vm_offset + (addr - vma->vm_start); - lock_kernel(); if (new_vma->vm_file) - atomic_inc(&new_vma->vm_file->f_count); + get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); insert_vm_struct(current->mm, new_vma); merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); - unlock_kernel(); do_munmap(addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3f30a049e..22ce7ac00 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -345,90 +345,3 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m } return start_mem; } - -/* - * Primitive swap readahead code. We simply read an aligned block of - * (1 << page_cluster) entries in the swap area. This method is chosen - * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... - */ -void swapin_readahead(unsigned long entry) -{ - int i; - struct page *new_page; - unsigned long offset = SWP_OFFSET(entry); - struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info; - - offset = (offset >> page_cluster) << page_cluster; - - i = 1 << page_cluster; - do { - /* Don't read-ahead past the end of the swap area */ - if (offset >= swapdev->max) - break; - /* Don't block on I/O for read-ahead */ - if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster) - break; - /* Don't read in bad or busy pages */ - if (!swapdev->swap_map[offset]) - break; - if (swapdev->swap_map[offset] == SWAP_MAP_BAD) - break; - - /* Ok, do the async read-ahead now */ - new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0); - if (new_page != NULL) - __free_page(new_page); - offset++; - } while (--i); - return; -} - -/* - * The tests may look silly, but it essentially makes sure that - * no other process did a swap-in on us just as we were waiting. - * - * Also, don't bother to add to the swap cache if this page-in - * was due to a write access. - */ -void swap_in(struct task_struct * tsk, struct vm_area_struct * vma, - pte_t * page_table, unsigned long entry, int write_access) -{ - unsigned long page; - struct page *page_map = lookup_swap_cache(entry); - - if (!page_map) { - swapin_readahead(entry); - page_map = read_swap_cache(entry); - } - if (pte_val(*page_table) != entry) { - if (page_map) - free_page_and_swap_cache(page_address(page_map)); - return; - } - if (!page_map) { - set_pte(page_table, BAD_PAGE); - swap_free(entry); - oom(tsk); - return; - } - - page = page_address(page_map); - vma->vm_mm->rss++; - tsk->min_flt++; - swap_free(entry); - - if (!write_access || is_page_shared(page_map)) { - set_pte(page_table, mk_pte(page, vma->vm_page_prot)); - return; - } - - /* - * The page is unshared and we're going to dirty it - so tear - * down the swap cache and give exclusive access to the page to - * this process. - */ - delete_from_swap_cache(page_map); - set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)))); - return; -} diff --git a/mm/vmscan.c b/mm/vmscan.c index 4cccaf171..1ae052b94 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -45,7 +45,11 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma, page_addr = pte_page(pte); if (MAP_NR(page_addr) >= max_mapnr) goto out_failed; + page = mem_map + MAP_NR(page_addr); + spin_lock(&tsk->mm->page_table_lock); + if (pte_val(pte) != pte_val(*page_table)) + goto out_failed_unlock; /* * Dont be too eager to get aging right if @@ -58,13 +62,13 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma, */ set_pte(page_table, pte_mkold(pte)); set_bit(PG_referenced, &page->flags); - goto out_failed; + goto out_failed_unlock; } if (PageReserved(page) || PageLocked(page) || ((gfp_mask & __GFP_DMA) && !PageDMA(page))) - goto out_failed; + goto out_failed_unlock; /* * Is the page already in the swap cache? If so, then @@ -82,7 +86,7 @@ drop_pte: vma->vm_mm->rss--; flush_tlb_page(vma, address); __free_page(page); - goto out_failed; + goto out_failed_unlock; } /* @@ -109,7 +113,7 @@ drop_pte: * locks etc. */ if (!(gfp_mask & __GFP_IO)) - goto out_failed; + goto out_failed_unlock; /* * Ok, it's really dirty. That means that @@ -134,6 +138,7 @@ drop_pte: if (vma->vm_ops && vma->vm_ops->swapout) { pid_t pid = tsk->pid; pte_clear(page_table); + spin_unlock(&tsk->mm->page_table_lock); flush_tlb_page(vma, address); vma->vm_mm->rss--; @@ -155,6 +160,8 @@ drop_pte: vma->vm_mm->rss--; tsk->nswap++; set_pte(page_table, __pte(entry)); + spin_unlock(&tsk->mm->page_table_lock); + flush_tlb_page(vma, address); swap_duplicate(entry); /* One for the process, one for the swap cache */ @@ -167,6 +174,8 @@ drop_pte: out_free_success: __free_page(page); return 1; +out_failed_unlock: + spin_unlock(&tsk->mm->page_table_lock); out_failed: return 0; } |