diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-04-28 01:09:25 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-04-28 01:09:25 +0000 |
commit | b9ba7aeb165cffecdffb60aec8c3fa8d590d9ca9 (patch) | |
tree | 42d07b0c7246ae2536a702e7c5de9e2732341116 /mm | |
parent | 7406b0a326f2d70ade2671c37d1beef62249db97 (diff) |
Merge with 2.3.99-pre6.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 113 | ||||
-rw-r--r-- | mm/memory.c | 62 | ||||
-rw-r--r-- | mm/mmap.c | 50 | ||||
-rw-r--r-- | mm/mremap.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 13 | ||||
-rw-r--r-- | mm/slab.c | 6 | ||||
-rw-r--r-- | mm/swap_state.c | 7 | ||||
-rw-r--r-- | mm/swapfile.c | 49 | ||||
-rw-r--r-- | mm/vmscan.c | 54 |
9 files changed, 233 insertions, 127 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 533747f96..d0df8bd2c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -44,6 +44,7 @@ atomic_t page_cache_size = ATOMIC_INIT(0); unsigned int page_hash_bits; struct page **page_hash_table; +struct list_head lru_cache; spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED; /* @@ -77,6 +78,15 @@ static void remove_page_from_hash_queue(struct page * page) atomic_dec(&page_cache_size); } +static inline int sync_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping && mapping->a_ops && mapping->a_ops->sync_page) + return mapping->a_ops->sync_page(page); + return 0; +} + /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage @@ -87,6 +97,9 @@ void remove_inode_page(struct page *page) if (!PageLocked(page)) PAGE_BUG(page); + /* Initiate completion of any async operations */ + sync_page(page); + spin_lock(&pagecache_lock); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); @@ -99,6 +112,7 @@ void invalidate_inode_pages(struct inode * inode) struct list_head *head, *curr; struct page * page; + repeat: head = &inode->i_mapping->pages; spin_lock(&pagecache_lock); curr = head->next; @@ -110,14 +124,13 @@ void invalidate_inode_pages(struct inode * inode) /* We cannot invalidate a locked page */ if (TryLockPage(page)) continue; + spin_unlock(&pagecache_lock); lru_cache_del(page); - remove_page_from_inode_queue(page); - remove_page_from_hash_queue(page); - page->mapping = NULL; + remove_inode_page(page); UnlockPage(page); - page_cache_release(page); + goto repeat; } spin_unlock(&pagecache_lock); } @@ -149,11 +162,16 @@ repeat: /* page wholly truncated - free it */ if (offset >= start) { + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + get_page(page); + wait_on_page(page); + put_page(page); + goto repeat; + } get_page(page); spin_unlock(&pagecache_lock); - lock_page(page); - if (!page->buffers || block_flushpage(page, 0)) lru_cache_del(page); @@ -191,11 +209,13 @@ repeat: continue; /* partial truncate, clear end of page */ + if (TryLockPage(page)) { + spin_unlock(&pagecache_lock); + goto repeat; + } get_page(page); spin_unlock(&pagecache_lock); - lock_page(page); - memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); if (page->buffers) block_flushpage(page, partial); @@ -208,6 +228,9 @@ repeat: */ UnlockPage(page); page_cache_release(page); + get_page(page); + wait_on_page(page); + put_page(page); goto repeat; } spin_unlock(&pagecache_lock); @@ -215,46 +238,55 @@ repeat: int shrink_mmap(int priority, int gfp_mask, zone_t *zone) { - int ret = 0, count; + int ret = 0, loop = 0, count; LIST_HEAD(young); LIST_HEAD(old); LIST_HEAD(forget); struct list_head * page_lru, * dispose; - struct page * page; - + struct page * page = NULL; + struct zone_struct * p_zone; + int maxloop = 256 >> priority; + if (!zone) BUG(); - count = nr_lru_pages / (priority+1); + count = nr_lru_pages >> priority; + if (!count) + return ret; spin_lock(&pagemap_lru_lock); - - while (count > 0 && (page_lru = zone->lru_cache.prev) != &zone->lru_cache) { +again: + /* we need pagemap_lru_lock for list_del() ... subtle code below */ + while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) { page = list_entry(page_lru, struct page, lru); list_del(page_lru); + p_zone = page->zone; - dispose = &zone->lru_cache; - if (test_and_clear_bit(PG_referenced, &page->flags)) - /* Roll the page at the top of the lru list, - * we could also be more aggressive putting - * the page in the young-dispose-list, so - * avoiding to free young pages in each pass. - */ - goto dispose_continue; - + /* + * These two tests are there to make sure we don't free too + * many pages from the "wrong" zone. We free some anyway, + * they are the least recently used pages in the system. + * When we don't free them, leave them in &old. + */ dispose = &old; - /* don't account passes over not DMA pages */ - if (zone && (!memclass(page->zone, zone))) + if (p_zone != zone && (loop > (maxloop / 4) || + p_zone->free_pages > p_zone->pages_high)) goto dispose_continue; - count--; - + /* The page is in use, or was used very recently, put it in + * &young to make sure that we won't try to free it the next + * time */ dispose = &young; - /* avoid unscalable SMP locking */ + if (test_and_clear_bit(PG_referenced, &page->flags)) + goto dispose_continue; + + count--; if (!page->buffers && page_count(page) > 1) goto dispose_continue; + /* Page not used -> free it; if that fails -> &old */ + dispose = &old; if (TryLockPage(page)) goto dispose_continue; @@ -327,6 +359,7 @@ unlock_continue: list_add(page_lru, dispose); continue; + /* we're holding pagemap_lru_lock, so we can just loop again */ dispose_continue: list_add(page_lru, dispose); } @@ -342,9 +375,14 @@ made_buffer_progress: /* nr_lru_pages needs the spinlock */ nr_lru_pages--; + loop++; + /* wrong zone? not looped too often? roll again... */ + if (page->zone != zone && loop < maxloop) + goto again; + out: - list_splice(&young, &zone->lru_cache); - list_splice(&old, zone->lru_cache.prev); + list_splice(&young, &lru_cache); + list_splice(&old, lru_cache.prev); spin_unlock(&pagemap_lru_lock); @@ -467,6 +505,9 @@ static inline void __add_to_page_cache(struct page * page, struct page *alias; unsigned long flags; + if (PageLocked(page)) + BUG(); + flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_dirty)); page->flags = flags | (1 << PG_locked) | (1 << PG_referenced); get_page(page); @@ -574,7 +615,7 @@ void ___wait_on_page(struct page *page) add_wait_queue(&page->wait, &wait); do { - run_task_queue(&tq_disk); + sync_page(page); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!PageLocked(page)) break; @@ -619,7 +660,7 @@ repeat: struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); - run_task_queue(&tq_disk); + sync_page(page); __set_task_state(tsk, TASK_UNINTERRUPTIBLE); add_wait_queue(&page->wait, &wait); @@ -669,7 +710,7 @@ repeat: struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); - run_task_queue(&tq_disk); + sync_page(page); __set_task_state(tsk, TASK_UNINTERRUPTIBLE); add_wait_queue(&page->wait, &wait); @@ -1496,7 +1537,7 @@ static int filemap_write_page(struct file *file, * mmap_sem is held. */ lock_page(page); - result = inode->i_mapping->a_ops->writepage(dentry, page); + result = inode->i_mapping->a_ops->writepage(file, dentry, page); UnlockPage(page); return result; } @@ -1707,8 +1748,8 @@ static int msync_interval(struct vm_area_struct * vma, error = vma->vm_ops->sync(vma, start, end-start, flags); if (!error && (flags & MS_SYNC)) { struct file * file = vma->vm_file; - if (file) - error = file_fsync(file, file->f_dentry); + if (file && file->f_op && file->f_op->fsync) + error = file->f_op->fsync(file, file->f_dentry); } return error; } diff --git a/mm/memory.c b/mm/memory.c index 28791baa2..84ecb57b5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -472,7 +472,7 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) goto out_unlock; } } - if (handle_mm_fault(current, vma, ptr, datain) <= 0) + if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) goto out_unlock; spin_lock(&mm->page_table_lock); map = follow_page(ptr); @@ -815,7 +815,7 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * old_page * We enter with the page table read-lock held, and need to exit without * it. */ -static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, +static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { unsigned long map_nr; @@ -824,7 +824,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, map_nr = pte_pagenr(pte); if (map_nr >= max_mapnr) goto bad_wp_page; - tsk->min_flt++; + mm->min_flt++; old_page = mem_map + map_nr; /* @@ -854,36 +854,36 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, case 1: flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); return 1; } /* * Ok, we need to copy. Oh, well.. */ - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); new_page = alloc_page(GFP_HIGHUSER); if (!new_page) return -1; - spin_lock(&tsk->mm->page_table_lock); + spin_lock(&mm->page_table_lock); /* * Re-check the pte - we dropped the lock */ if (pte_val(*page_table) == pte_val(pte)) { if (PageReserved(old_page)) - ++vma->vm_mm->rss; + ++mm->rss; break_cow(vma, old_page, new_page, address, page_table); /* Free the old page.. */ new_page = old_page; } - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); __free_page(new_page); return 1; bad_wp_page: - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); printk("do_wp_page: bogus page at address %08lx (nr %ld)\n",address,map_nr); return -1; } @@ -1029,7 +1029,7 @@ void swapin_readahead(swp_entry_t entry) return; } -static int do_swap_page(struct task_struct * tsk, +static int do_swap_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, swp_entry_t entry, int write_access) { @@ -1048,8 +1048,8 @@ static int do_swap_page(struct task_struct * tsk, flush_icache_page(vma, page); } - vma->vm_mm->rss++; - tsk->min_flt++; + mm->rss++; + mm->min_flt++; pte = mk_pte(page, vma->vm_page_prot); @@ -1080,7 +1080,7 @@ static int do_swap_page(struct task_struct * tsk, /* * This only needs the MM semaphore */ -static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) +static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { int high = 0; struct page *page = NULL; @@ -1093,8 +1093,8 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v high = 1; clear_user_highpage(page, addr); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - vma->vm_mm->rss++; - tsk->min_flt++; + mm->rss++; + mm->min_flt++; flush_page_to_ram(page); } set_pte(page_table, entry); @@ -1114,14 +1114,14 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v * * This is called with the MM semaphore held. */ -static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, +static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t *page_table) { struct page * new_page; pte_t entry; if (!vma->vm_ops || !vma->vm_ops->nopage) - return do_anonymous_page(tsk, vma, page_table, write_access, address); + return do_anonymous_page(mm, vma, page_table, write_access, address); /* * The third argument is "no_share", which tells the low-level code @@ -1133,8 +1133,8 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, return 0; if (new_page == NOPAGE_OOM) return -1; - ++tsk->maj_flt; - ++vma->vm_mm->rss; + ++mm->maj_flt; + ++mm->rss; /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid @@ -1177,7 +1177,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, * so we don't need to worry about a page being suddenly been added into * our VM. */ -static inline int handle_pte_fault(struct task_struct *tsk, +static inline int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { @@ -1186,8 +1186,8 @@ static inline int handle_pte_fault(struct task_struct *tsk, entry = *pte; if (!pte_present(entry)) { if (pte_none(entry)) - return do_no_page(tsk, vma, address, write_access, pte); - return do_swap_page(tsk, vma, address, pte, pte_to_swp_entry(entry), write_access); + return do_no_page(mm, vma, address, write_access, pte); + return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access); } /* @@ -1195,38 +1195,38 @@ static inline int handle_pte_fault(struct task_struct *tsk, * lock to synchronize with kswapd, and verify that the entry * didn't change from under us.. */ - spin_lock(&tsk->mm->page_table_lock); + spin_lock(&mm->page_table_lock); if (pte_val(entry) == pte_val(*pte)) { if (write_access) { if (!pte_write(entry)) - return do_wp_page(tsk, vma, address, pte, entry); + return do_wp_page(mm, vma, address, pte, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); } - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); return 1; } /* * By the time we get here, we already hold the mm semaphore */ -int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long address, int write_access) { int ret = -1; pgd_t *pgd; pmd_t *pmd; - pgd = pgd_offset(vma->vm_mm, address); + pgd = pgd_offset(mm, address); pmd = pmd_alloc(pgd, address); if (pmd) { pte_t * pte = pte_alloc(pmd, address); if (pte) - ret = handle_pte_fault(tsk, vma, address, write_access, pte); + ret = handle_pte_fault(mm, vma, address, write_access, pte); } return ret; } @@ -1237,15 +1237,15 @@ int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, int make_pages_present(unsigned long addr, unsigned long end) { int write; - struct task_struct *tsk = current; + struct mm_struct *mm = current->mm; struct vm_area_struct * vma; - vma = find_vma(tsk->mm, addr); + vma = find_vma(mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; if (addr >= end) BUG(); do { - if (handle_mm_fault(tsk, vma, addr, write) < 0) + if (handle_mm_fault(mm, vma, addr, write) < 0) return -1; addr += PAGE_SIZE; } while (addr < end); @@ -110,7 +110,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk) /* Always allow shrinking brk. */ if (brk <= mm->brk) { - if (!do_munmap(newbrk, oldbrk-newbrk)) + if (!do_munmap(mm, newbrk, oldbrk-newbrk)) goto set_brk; goto out; } @@ -220,8 +220,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon default: return -EINVAL; } - } else if ((flags & MAP_TYPE) != MAP_PRIVATE) - return -EINVAL; + } /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. @@ -269,8 +268,11 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon if (!(file->f_mode & FMODE_WRITE)) vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED); } - } else + } else { vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + if (flags & MAP_SHARED) + vma->vm_flags |= VM_SHARED | VM_MAYSHARE; + } vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; vma->vm_ops = NULL; vma->vm_pgoff = pgoff; @@ -279,7 +281,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon /* Clear old maps */ error = -ENOMEM; - if (do_munmap(addr, len)) + if (do_munmap(mm, addr, len)) goto free_vma; /* Check against address space limit. */ @@ -316,6 +318,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon atomic_inc(&file->f_dentry->d_inode->i_writecount); if (error) goto unmap_and_free_vma; + } else if (flags & MAP_SHARED) { + error = map_zero_setup(vma); } /* @@ -468,13 +472,13 @@ struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, return NULL; } -struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr) +struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr) { struct vm_area_struct * vma; unsigned long start; addr &= PAGE_MASK; - vma = find_vma(tsk->mm,addr); + vma = find_vma(mm,addr); if (!vma) return NULL; if (vma->vm_start <= addr) @@ -513,8 +517,9 @@ struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long * allocate a new one, and the return indicates whether the old * area was reused. */ -static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area, - unsigned long addr, size_t len, struct vm_area_struct *extra) +static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, + struct vm_area_struct *area, unsigned long addr, size_t len, + struct vm_area_struct *extra) { struct vm_area_struct *mpnt; unsigned long end = addr + len; @@ -536,11 +541,11 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area, /* Work out to one of the ends. */ if (end == area->vm_end) { area->vm_end = addr; - vmlist_modify_lock(current->mm); + vmlist_modify_lock(mm); } else if (addr == area->vm_start) { area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT; area->vm_start = end; - vmlist_modify_lock(current->mm); + vmlist_modify_lock(mm); } else { /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */ /* Add end mapping -- leave beginning for below */ @@ -562,12 +567,12 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area, if (mpnt->vm_ops && mpnt->vm_ops->open) mpnt->vm_ops->open(mpnt); area->vm_end = addr; /* Truncate area */ - vmlist_modify_lock(current->mm); - insert_vm_struct(current->mm, mpnt); + vmlist_modify_lock(mm); + insert_vm_struct(mm, mpnt); } - insert_vm_struct(current->mm, area); - vmlist_modify_unlock(current->mm); + insert_vm_struct(mm, area); + vmlist_modify_unlock(mm); return extra; } @@ -634,9 +639,8 @@ no_mmaps: * work. This now handles partial unmappings. * Jeremy Fitzhardine <jeremy@sw.oz.au> */ -int do_munmap(unsigned long addr, size_t len) +int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) { - struct mm_struct * mm; struct vm_area_struct *mpnt, *prev, **npp, *free, *extra; if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr) @@ -650,7 +654,6 @@ int do_munmap(unsigned long addr, size_t len) * every area affected in some way (by any overlap) is put * on the list. If nothing is put on, nothing is affected. */ - mm = current->mm; mpnt = find_vma_prev(mm, addr, &prev); if (!mpnt) return 0; @@ -713,7 +716,7 @@ int do_munmap(unsigned long addr, size_t len) /* * Fix the mapping, and free the old area if it wasn't reused. */ - extra = unmap_fixup(mpnt, st, size, extra); + extra = unmap_fixup(mm, mpnt, st, size, extra); } /* Release the extra vma struct if it wasn't used */ @@ -728,10 +731,11 @@ int do_munmap(unsigned long addr, size_t len) asmlinkage long sys_munmap(unsigned long addr, size_t len) { int ret; + struct mm_struct *mm = current->mm; - down(¤t->mm->mmap_sem); - ret = do_munmap(addr, len); - up(¤t->mm->mmap_sem); + down(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up(&mm->mmap_sem); return ret; } @@ -763,7 +767,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) /* * Clear old maps. this also does some error checking for us */ - retval = do_munmap(addr, len); + retval = do_munmap(mm, addr, len); if (retval != 0) return retval; diff --git a/mm/mremap.c b/mm/mremap.c index d8d18cf62..0404dd795 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -145,7 +145,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, insert_vm_struct(current->mm, new_vma); merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); vmlist_modify_unlock(vma->vm_mm); - do_munmap(addr, old_len); + do_munmap(current->mm, addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { current->mm->locked_vm += new_len >> PAGE_SHIFT; @@ -201,7 +201,7 @@ unsigned long do_mremap(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - do_munmap(new_addr, new_len); + do_munmap(current->mm, new_addr, new_len); } /* @@ -210,7 +210,7 @@ unsigned long do_mremap(unsigned long addr, */ ret = addr; if (old_len >= new_len) { - do_munmap(addr+new_len, old_len - new_len); + do_munmap(current->mm, addr+new_len, old_len - new_len); if (!(flags & MREMAP_FIXED) || (new_addr == addr)) goto out; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07fdaa021..ba5ba3013 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,7 +25,7 @@ #endif int nr_swap_pages = 0; -int nr_lru_pages; +int nr_lru_pages = 0; pg_data_t *pgdat_list = (pg_data_t *)0; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; @@ -273,6 +273,8 @@ static int zone_balance_memory(zonelist_t *zonelist) struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) { zone_t **zone = zonelist->zones; + int gfp_mask = zonelist->gfp_mask; + static int low_on_memory; /* * If this is a recursive call, we'd better @@ -282,6 +284,11 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) if (current->flags & PF_MEMALLOC) goto allocate_ok; + /* If we're a memory hog, unmap some pages */ + if (current->hog && low_on_memory && + (gfp_mask & __GFP_WAIT)) + swap_out(4, gfp_mask); + /* * (If anyone calls gfp from interrupts nonatomically then it * will sooner or later tripped up by a schedule().) @@ -299,11 +306,13 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) /* Are we supposed to free memory? Don't make it worse.. */ if (!z->zone_wake_kswapd && z->free_pages > z->pages_low) { struct page *page = rmqueue(z, order); + low_on_memory = 0; if (page) return page; } } + low_on_memory = 1; /* * Ok, no obvious zones were available, start * balancing things a bit.. @@ -530,6 +539,7 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, freepages.min += i; freepages.low += i * 2; freepages.high += i * 3; + memlist_init(&lru_cache); /* * Some architectures (with lots of mem and discontinous memory @@ -609,7 +619,6 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, unsigned long bitmap_size; memlist_init(&zone->free_area[i].free_list); - memlist_init(&zone->lru_cache); mask += mask; size = (size + ~mask) & mask; bitmap_size = size >> i; @@ -1534,7 +1534,7 @@ nul_ptr: * it should be in this state _before_ it is released. */ static inline void -__kmem_cache_free(kmem_cache_t *cachep, const void *objp) +__kmem_cache_free(kmem_cache_t *cachep, void *objp) { kmem_slab_t *slabp; kmem_bufctl_t *bufp; @@ -1739,7 +1739,7 @@ kfree(const void *objp) */ cachep = SLAB_GET_PAGE_CACHE(page); if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) { - __kmem_cache_free(cachep, objp); + __kmem_cache_free(cachep, (void *)objp); return; } } @@ -1774,7 +1774,7 @@ kfree_s(const void *objp, size_t size) cachep = SLAB_GET_PAGE_CACHE(page); if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) { if (size <= cachep->c_org_size) { /* XXX better check */ - __kmem_cache_free(cachep, objp); + __kmem_cache_free(cachep, (void *)objp); return; } } diff --git a/mm/swap_state.c b/mm/swap_state.c index defe9b463..29ba0d78b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -17,12 +17,17 @@ #include <asm/pgtable.h> +static struct address_space_operations swap_aops = { + sync_page: block_sync_page +}; + struct address_space swapper_space = { { /* pages */ &swapper_space.pages, /* .next */ &swapper_space.pages /* .prev */ }, - 0 /* nrpages */ + 0, /* nrpages */ + &swap_aops, }; #ifdef SWAP_CACHE_INFO diff --git a/mm/swapfile.c b/mm/swapfile.c index abdb08e57..da2dd9147 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -444,6 +444,7 @@ asmlinkage long sys_swapoff(const char * specialfile) { struct swap_info_struct * p = NULL; struct dentry * dentry; + struct vfsmount *mnt; int i, type, prev; int err; @@ -513,11 +514,14 @@ asmlinkage long sys_swapoff(const char * specialfile) dentry = p->swap_file; p->swap_file = NULL; + mnt = p->swap_vfsmnt; + p->swap_vfsmnt = NULL; p->swap_device = 0; vfree(p->swap_map); p->swap_map = NULL; p->flags = 0; err = 0; + mntput(mnt); out_dput: dput(dentry); @@ -538,7 +542,8 @@ int get_swaparea_info(char *buf) len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n"); for (i = 0 ; i < nr_swapfiles ; i++, ptr++) { if (ptr->flags & SWP_USED) { - char * path = d_path(ptr->swap_file, NULL, page, PAGE_SIZE); + char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt, + page, PAGE_SIZE); len += sprintf(buf + len, "%-31s ", path); @@ -584,7 +589,8 @@ int is_swap_partition(kdev_t dev) { asmlinkage long sys_swapon(const char * specialfile, int swap_flags) { struct swap_info_struct * p; - struct dentry * swap_dentry; + struct nameidata nd; + struct inode * swap_inode; unsigned int type; int i, j, prev; int error; @@ -595,6 +601,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) unsigned long maxpages; int swapfilesize; struct block_device *bdev = NULL; + char *name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -610,6 +617,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) nr_swapfiles = type+1; p->flags = SWP_USED; p->swap_file = NULL; + p->swap_vfsmnt = NULL; p->swap_device = 0; p->swap_map = NULL; p->lowest_bit = 0; @@ -624,24 +632,31 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) } else { p->prio = --least_priority; } - swap_dentry = namei(specialfile); - error = PTR_ERR(swap_dentry); - if (IS_ERR(swap_dentry)) + name = getname(specialfile); + error = PTR_ERR(name); + if (IS_ERR(name)) + goto bad_swap_2; + error = 0; + if (walk_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + error = walk_name(name, &nd); + putname(name); + if (error) goto bad_swap_2; - p->swap_file = swap_dentry; + p->swap_file = nd.dentry; + p->swap_vfsmnt = nd.mnt; + swap_inode = nd.dentry->d_inode; error = -EINVAL; - if (S_ISBLK(swap_dentry->d_inode->i_mode)) { - kdev_t dev = swap_dentry->d_inode->i_rdev; + if (S_ISBLK(swap_inode->i_mode)) { + kdev_t dev = swap_inode->i_rdev; struct block_device_operations *bdops; p->swap_device = dev; set_blocksize(dev, PAGE_SIZE); - bdev = swap_dentry->d_inode->i_bdev; - bdops = devfs_get_ops ( devfs_get_handle_from_inode - (swap_dentry->d_inode) ); + bdev = swap_inode->i_bdev; + bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode)); if (bdops) bdev->bd_op = bdops; error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP); @@ -663,15 +678,15 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) if (blk_size[MAJOR(dev)]) swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)] >> (PAGE_SHIFT - 10); - } else if (S_ISREG(swap_dentry->d_inode->i_mode)) { + } else if (S_ISREG(swap_inode->i_mode)) { error = -EBUSY; for (i = 0 ; i < nr_swapfiles ; i++) { if (i == type || !swap_info[i].swap_file) continue; - if (swap_dentry->d_inode == swap_info[i].swap_file->d_inode) + if (swap_inode == swap_info[i].swap_file->d_inode) goto bad_swap; } - swapfilesize = swap_dentry->d_inode->i_size >> PAGE_SHIFT; + swapfilesize = swap_inode->i_size >> PAGE_SHIFT; } else goto bad_swap; @@ -811,13 +826,17 @@ bad_swap: bad_swap_2: if (p->swap_map) vfree(p->swap_map); - dput(p->swap_file); + nd.mnt = p->swap_vfsmnt; + nd.dentry = p->swap_file; p->swap_device = 0; p->swap_file = NULL; + p->swap_vfsmnt = NULL; p->swap_map = NULL; p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) ++least_priority; + dput(nd.dentry); + mntput(nd.mnt); out: if (swap_header) free_page((long) swap_header); diff --git a/mm/vmscan.c b/mm/vmscan.c index 1057dbb60..691d47f18 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -34,7 +34,7 @@ * using a process that no longer actually exists (it might * have died while we slept). */ -static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) +static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { pte_t pte; swp_entry_t entry; @@ -48,6 +48,7 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt if ((page-mem_map >= max_mapnr) || PageReserved(page)) goto out_failed; + mm->swap_cnt--; /* Don't look at this pte if it's been accessed recently. */ if (pte_young(pte)) { /* @@ -194,7 +195,7 @@ out_failed: * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de */ -static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask) +static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask) { pte_t * pte; unsigned long pmd_end; @@ -216,16 +217,18 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned do { int result; vma->vm_mm->swap_address = address + PAGE_SIZE; - result = try_to_swap_out(vma, address, pte, gfp_mask); + result = try_to_swap_out(mm, vma, address, pte, gfp_mask); if (result) return result; + if (!mm->swap_cnt) + return 0; address += PAGE_SIZE; pte++; } while (address && (address < end)); return 0; } -static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask) +static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask) { pmd_t * pmd; unsigned long pgd_end; @@ -245,16 +248,18 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned end = pgd_end; do { - int result = swap_out_pmd(vma, pmd, address, end, gfp_mask); + int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask); if (result) return result; + if (!mm->swap_cnt) + return 0; address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); return 0; } -static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int gfp_mask) +static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask) { pgd_t *pgdir; unsigned long end; @@ -269,9 +274,11 @@ static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int if (address >= end) BUG(); do { - int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask); + int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask); if (result) return result; + if (!mm->swap_cnt) + return 0; address = (address + PGDIR_SIZE) & PGDIR_MASK; pgdir++; } while (address && (address < end)); @@ -299,7 +306,7 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask) address = vma->vm_start; for (;;) { - int result = swap_out_vma(vma, address, gfp_mask); + int result = swap_out_vma(mm, vma, address, gfp_mask); if (result) return result; vma = vma->vm_next; @@ -321,7 +328,7 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask) * N.B. This function returns only 0 or 1. Return values != 1 from * the lower level routines result in continued processing. */ -static int swap_out(unsigned int priority, int gfp_mask) +int swap_out(unsigned int priority, int gfp_mask) { struct task_struct * p; int counter; @@ -356,6 +363,7 @@ static int swap_out(unsigned int priority, int gfp_mask) p = init_task.next_task; for (; p != &init_task; p = p->next_task) { struct mm_struct *mm = p->mm; + p->hog = 0; if (!p->swappable || !mm) continue; if (mm->rss <= 0) @@ -369,9 +377,26 @@ static int swap_out(unsigned int priority, int gfp_mask) pid = p->pid; } } - read_unlock(&tasklist_lock); - if (assign == 1) + if (assign == 1) { + /* we just assigned swap_cnt, normalise values */ assign = 2; + p = init_task.next_task; + for (; p != &init_task; p = p->next_task) { + int i = 0; + struct mm_struct *mm = p->mm; + if (!p->swappable || !mm || mm->rss <= 0) + continue; + /* small processes are swapped out less */ + while ((mm->swap_cnt << 2 * (i + 1) < max_cnt)) + i++; + mm->swap_cnt >>= i; + mm->swap_cnt += i; /* if swap_cnt reaches 0 */ + /* we're big -> hog treatment */ + if (!i) + p->hog = 1; + } + } + read_unlock(&tasklist_lock); if (!best) { if (!assign) { assign = 1; @@ -412,13 +437,14 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone) { int priority; int count = SWAP_CLUSTER_MAX; + int ret; /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); priority = 6; do { - while (shrink_mmap(priority, gfp_mask, zone)) { + while ((ret = shrink_mmap(priority, gfp_mask, zone))) { if (!--count) goto done; } @@ -441,7 +467,9 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone) } } - /* Then, try to page stuff out.. */ + /* Then, try to page stuff out.. + * We use swapcount here because this doesn't actually + * free pages */ while (swap_out(priority, gfp_mask)) { if (!--count) goto done; |