diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 224 |
1 files changed, 110 insertions, 114 deletions
diff --git a/mm/memory.c b/mm/memory.c index 388d9ce03..932c35648 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -33,23 +33,13 @@ * Idea by Alex Bligh (alex@cconcepts.co.uk) */ -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/swap.h> -#include <linux/smp.h> #include <linux/smp_lock.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> -#include <asm/string.h> unsigned long max_mapnr = 0; unsigned long num_physpages = 0; @@ -289,10 +279,6 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; } if (cow) pte = pte_wrprotect(pte); -#if 0 /* No longer needed with the new swap cache code */ - if (delete_from_swap_cache(&mem_map[page_nr])) - pte = pte_mkdirty(pte); -#endif set_pte(dst_pte, pte_mkold(pte)); set_pte(src_pte, pte); atomic_inc(&mem_map[page_nr].count); @@ -635,15 +621,15 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. */ -static void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, - unsigned long address, int write_access, pte_t *page_table) +static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, + unsigned long address, pte_t *page_table) { pte_t pte; unsigned long old_page, new_page; struct page * page_map; pte = *page_table; - new_page = get_user_page(address); + new_page = __get_free_page(GFP_USER); /* Did someone else copy this page for us while we slept? */ if (pte_val(*page_table) != pte_val(pte)) goto end_wp_page; @@ -661,40 +647,42 @@ static void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, * Do we need to copy? */ if (is_page_shared(page_map)) { - if (new_page) { - if (PageReserved(mem_map + MAP_NR(old_page))) - ++vma->vm_mm->rss; - copy_cow_page(old_page,new_page); - flush_page_to_ram(old_page); - flush_page_to_ram(new_page); - flush_cache_page(vma, address); - set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); - free_page(old_page); - flush_tlb_page(vma, address); - return; - } + unlock_kernel(); + if (!new_page) + return 0; + + if (PageReserved(mem_map + MAP_NR(old_page))) + ++vma->vm_mm->rss; + copy_cow_page(old_page,new_page); + flush_page_to_ram(old_page); + flush_page_to_ram(new_page); flush_cache_page(vma, address); - set_pte(page_table, BAD_PAGE); - flush_tlb_page(vma, address); + set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); free_page(old_page); - oom(tsk); - return; + flush_tlb_page(vma, address); + return 1; } + if (PageSwapCache(page_map)) delete_from_swap_cache(page_map); + + /* We can release the kernel lock now.. */ + unlock_kernel(); + flush_cache_page(vma, address); set_pte(page_table, pte_mkdirty(pte_mkwrite(pte))); flush_tlb_page(vma, address); +end_wp_page: if (new_page) free_page(new_page); - return; + return 1; + bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); send_sig(SIGKILL, tsk, 1); -end_wp_page: if (new_page) free_page(new_page); - return; + return 0; } /* @@ -783,30 +771,53 @@ void vmtruncate(struct inode * inode, unsigned long offset) } -static inline void do_swap_page(struct task_struct * tsk, +/* + * This is called with the kernel lock held, we need + * to return without it. + */ +static int do_swap_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, pte_t entry, int write_access) { - pte_t page; - if (!vma->vm_ops || !vma->vm_ops->swapin) { - swap_in(tsk, vma, address, page_table, pte_val(entry), write_access); + swap_in(tsk, vma, page_table, pte_val(entry), write_access); flush_page_to_ram(pte_page(*page_table)); - return; + } else { + pte_t page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry)); + if (pte_val(*page_table) != pte_val(entry)) { + free_page(pte_page(page)); + } else { + if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1 && + !(vma->vm_flags & VM_SHARED)) + page = pte_wrprotect(page); + ++vma->vm_mm->rss; + ++tsk->maj_flt; + flush_page_to_ram(pte_page(page)); + set_pte(page_table, page); + } } - page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry)); - if (pte_val(*page_table) != pte_val(entry)) { - free_page(pte_page(page)); - return; + unlock_kernel(); + return 1; +} + +/* + * This only needs the MM semaphore + */ +static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) +{ + pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); + if (write_access) { + unsigned long page = __get_free_page(GFP_USER); + if (!page) + return 0; + clear_page(page); + entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + vma->vm_mm->rss++; + tsk->min_flt++; + flush_page_to_ram(page); } - if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) > 1 && - !(vma->vm_flags & VM_SHARED)) - page = pte_wrprotect(page); - ++vma->vm_mm->rss; - ++tsk->maj_flt; - flush_page_to_ram(pte_page(page)); - set_pte(page_table, page); - return; + put_page(page_table, entry); + return 1; } /* @@ -817,26 +828,34 @@ static inline void do_swap_page(struct task_struct * tsk, * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. + * + * This is called with the MM semaphore and the kernel lock held. + * We need to release the kernel lock as soon as possible.. */ -static void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, - unsigned long address, int write_access, pte_t *page_table, pte_t entry) +static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, + unsigned long address, int write_access, pte_t *page_table) { unsigned long page; + pte_t entry; + + if (!vma->vm_ops || !vma->vm_ops->nopage) { + unlock_kernel(); + return do_anonymous_page(tsk, vma, page_table, write_access, + address); + } - if (!pte_none(entry)) - goto swap_page; - address &= PAGE_MASK; - if (!vma->vm_ops || !vma->vm_ops->nopage) - goto anonymous_page; /* * The third argument is "no_share", which tells the low-level code * to copy, not share the page even if sharing is possible. It's - * essentially an early COW detection + * essentially an early COW detection. */ - page = vma->vm_ops->nopage(vma, address, + page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); + + unlock_kernel(); if (!page) - goto sigbus; + return 0; + ++tsk->maj_flt; ++vma->vm_mm->rss; /* @@ -849,7 +868,6 @@ static void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, * so we can make it writable and dirty to avoid having to * handle that later. */ -/* do_no_page might already have flushed the page ... */ flush_page_to_ram(page); entry = mk_pte(page, vma->vm_page_prot); if (write_access) { @@ -859,32 +877,7 @@ static void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, entry = pte_wrprotect(entry); put_page(page_table, entry); /* no need to invalidate: a not-present page shouldn't be cached */ - return; - -anonymous_page: - entry = pte_wrprotect(mk_pte(ZERO_PAGE(address), vma->vm_page_prot)); - if (write_access) { - unsigned long page = get_user_page(address); - if (!page) - goto sigbus; - clear_page(page); - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - vma->vm_mm->rss++; - tsk->min_flt++; - flush_page_to_ram(page); - } - put_page(page_table, entry); - return; - -sigbus: - force_sig(SIGBUS, current); - put_page(page_table, BAD_PAGE); - /* no need to invalidate, wasn't present */ - return; - -swap_page: - do_swap_page(tsk, vma, address, page_table, entry, write_access); - return; + return 1; } /* @@ -896,54 +889,57 @@ swap_page: * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ -static inline void handle_pte_fault(struct task_struct *tsk, +static inline int handle_pte_fault(struct task_struct *tsk, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { - pte_t entry = *pte; + pte_t entry; + + lock_kernel(); + entry = *pte; if (!pte_present(entry)) { - do_no_page(tsk, vma, address, write_access, pte, entry); - return; + if (pte_none(entry)) + return do_no_page(tsk, vma, address, write_access, pte); + return do_swap_page(tsk, vma, address, pte, entry, write_access); } + entry = pte_mkyoung(entry); set_pte(pte, entry); flush_tlb_page(vma, address); - if (!write_access) - return; - if (pte_write(entry)) { + if (write_access) { + if (!pte_write(entry)) + return do_wp_page(tsk, vma, address, pte); + entry = pte_mkdirty(entry); set_pte(pte, entry); flush_tlb_page(vma, address); - return; } - do_wp_page(tsk, vma, address, write_access, pte); + unlock_kernel(); + return 1; } /* * By the time we get here, we already hold the mm semaphore */ -void handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, +int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *pgd; pmd_t *pmd; - pte_t *pte; pgd = pgd_offset(vma->vm_mm, address); pmd = pmd_alloc(pgd, address); - if (!pmd) - goto no_memory; - pte = pte_alloc(pmd, address); - if (!pte) - goto no_memory; - lock_kernel(); - handle_pte_fault(tsk, vma, address, write_access, pte); - unlock_kernel(); - update_mmu_cache(vma, address, *pte); - return; -no_memory: - oom(tsk); + if (pmd) { + pte_t * pte = pte_alloc(pmd, address); + if (pte) { + if (handle_pte_fault(tsk, vma, address, write_access, pte)) { + update_mmu_cache(vma, address, *pte); + return 1; + } + } + } + return 0; } /* |