diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 18 | ||||
-rw-r--r-- | mm/memory.c | 14 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 49 | ||||
-rw-r--r-- | mm/shmem.c | 111 | ||||
-rw-r--r-- | mm/slab.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 3 | ||||
-rw-r--r-- | mm/vmalloc.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 383 |
9 files changed, 246 insertions, 339 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index dedd7911e..4c89ad3e9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,7 +143,8 @@ void __set_page_dirty(struct page *page) list_add(&page->list, &mapping->dirty_pages); spin_unlock(&pagecache_lock); - mark_inode_dirty_pages(mapping->host); + if (mapping->host) + mark_inode_dirty_pages(mapping->host); } /** @@ -306,7 +307,7 @@ inside: */ age_page_up(page); if (inactive_shortage() > inactive_target / 2 && free_shortage()) - wakeup_kswapd(0); + wakeup_kswapd(); not_found: return page; } @@ -974,10 +975,6 @@ static void generic_file_readahead(int reada_ok, * accessed sequentially. */ if (ahead) { - if (reada_ok == 2) { - run_task_queue(&tq_disk); - } - filp->f_ralen += ahead; filp->f_rawin += filp->f_ralen; filp->f_raend = raend + ahead + 1; @@ -1835,7 +1832,8 @@ static long madvise_fixup_start(struct vm_area_struct * vma, n->vm_end = end; setup_read_behavior(n, behavior); n->vm_raend = 0; - get_file(n->vm_file); + if (n->vm_file) + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); lock_vma_mappings(vma); @@ -1861,7 +1859,8 @@ static long madvise_fixup_end(struct vm_area_struct * vma, n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; setup_read_behavior(n, behavior); n->vm_raend = 0; - get_file(n->vm_file); + if (n->vm_file) + get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); lock_vma_mappings(vma); @@ -1893,7 +1892,8 @@ static long madvise_fixup_middle(struct vm_area_struct * vma, right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; left->vm_raend = 0; right->vm_raend = 0; - atomic_add(2, &vma->vm_file->f_count); + if (vma->vm_file) + atomic_add(2, &vma->vm_file->f_count); if (vma->vm_ops && vma->vm_ops->open) { vma->vm_ops->open(left); diff --git a/mm/memory.c b/mm/memory.c index 6f1f318a3..7fc8de5eb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -207,7 +207,8 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; src_pte = pte_offset(src_pmd, address); dst_pte = pte_offset(dst_pmd, address); - + + spin_lock(&src->page_table_lock); do { pte_t pte = *src_pte; struct page *ptepage; @@ -240,10 +241,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; cont_copy_pte_range: set_pte(dst_pte, pte); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) - goto out; + goto out_unlock; src_pte++; dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); + spin_unlock(&src->page_table_lock); cont_copy_pmd_range: src_pmd++; dst_pmd++; @@ -252,6 +254,10 @@ cont_copy_pmd_range: src_pmd++; out: return 0; +out_unlock: + spin_unlock(&src->page_table_lock); + return 0; + nomem: return -ENOMEM; } @@ -939,7 +945,6 @@ void vmtruncate(struct inode * inode, loff_t offset) if (inode->i_size < offset) goto do_expand; inode->i_size = offset; - truncate_inode_pages(mapping, offset); spin_lock(&mapping->i_shared_lock); if (!mapping->i_mmap && !mapping->i_mmap_shared) goto out_unlock; @@ -954,8 +959,7 @@ void vmtruncate(struct inode * inode, loff_t offset) out_unlock: spin_unlock(&mapping->i_shared_lock); - /* this should go into ->truncate */ - inode->i_size = offset; + truncate_inode_pages(mapping, offset); if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); return; @@ -883,6 +883,8 @@ void exit_mmap(struct mm_struct * mm) mm->rss = 0; mm->total_vm = 0; mm->locked_vm = 0; + + flush_cache_mm(mm); while (mpnt) { struct vm_area_struct * next = mpnt->vm_next; unsigned long start = mpnt->vm_start; @@ -895,13 +897,13 @@ void exit_mmap(struct mm_struct * mm) } mm->map_count--; remove_shared_vm_struct(mpnt); - flush_cache_range(mm, start, end); zap_page_range(mm, start, size); if (mpnt->vm_file) fput(mpnt->vm_file); kmem_cache_free(vm_area_cachep, mpnt); mpnt = next; } + flush_tlb_mm(mm); /* This is just debugging */ if (mm->map_count) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b67aa4913..09ac27284 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/slab.h> int nr_swap_pages; int nr_active_pages; @@ -303,7 +304,7 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) * an inactive page shortage, wake up kswapd. */ if (inactive_shortage() > inactive_target / 2 && free_shortage()) - wakeup_kswapd(0); + wakeup_kswapd(); /* * If we are about to get low on free pages and cleaning * the inactive_dirty pages would fix the situation, @@ -379,7 +380,7 @@ try_again: * - if we don't have __GFP_IO set, kswapd may be * able to free some memory we can't free ourselves */ - wakeup_kswapd(0); + wakeup_kswapd(); if (gfp_mask & __GFP_WAIT) { __set_current_state(TASK_RUNNING); current->policy |= SCHED_YIELD; @@ -404,7 +405,7 @@ try_again: * - we're doing a higher-order allocation * --> move pages to the free list until we succeed * - we're /really/ tight on memory - * --> wait on the kswapd waitqueue until memory is freed + * --> try to free pages ourselves with page_launder */ if (!(current->flags & PF_MEMALLOC)) { /* @@ -443,36 +444,20 @@ try_again: /* * When we arrive here, we are really tight on memory. * - * We wake up kswapd and sleep until kswapd wakes us - * up again. After that we loop back to the start. - * - * We have to do this because something else might eat - * the memory kswapd frees for us and we need to be - * reliable. Note that we don't loop back for higher - * order allocations since it is possible that kswapd - * simply cannot free a large enough contiguous area - * of memory *ever*. + * We try to free pages ourselves by: + * - shrinking the i/d caches. + * - reclaiming unused memory from the slab caches. + * - swapping/syncing pages to disk (done by page_launder) + * - moving clean pages from the inactive dirty list to + * the inactive clean list. (done by page_launder) */ - if ((gfp_mask & (__GFP_WAIT|__GFP_IO)) == (__GFP_WAIT|__GFP_IO)) { - wakeup_kswapd(1); + if (gfp_mask & __GFP_WAIT) { memory_pressure++; - if (!order) - goto try_again; - /* - * If __GFP_IO isn't set, we can't wait on kswapd because - * kswapd just might need some IO locks /we/ are holding ... - * - * SUBTLE: The scheduling point above makes sure that - * kswapd does get the chance to free memory we can't - * free ourselves... - */ - } else if (gfp_mask & __GFP_WAIT) { try_to_free_pages(gfp_mask); - memory_pressure++; + wakeup_bdflush(0); if (!order) goto try_again; } - } /* @@ -554,14 +539,8 @@ void __free_pages(struct page *page, unsigned long order) void free_pages(unsigned long addr, unsigned long order) { - struct page *fpage; - -#ifdef CONFIG_DISCONTIGMEM - if (addr == 0) return; -#endif - fpage = virt_to_page(addr); - if (VALID_PAGE(fpage)) - __free_pages(fpage, order); + if (addr != 0) + __free_pages(virt_to_page(addr), order); } /* diff --git a/mm/shmem.c b/mm/shmem.c index a81a74659..00426ca27 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -117,11 +117,43 @@ shmem_truncate_part (swp_entry_t * dir, unsigned long size, return 0; } +/* + * shmem_recalc_inode - recalculate the size of an inode + * + * @inode: inode to recalc + * + * We have to calculate the free blocks since the mm can drop pages + * behind our back + * + * But we know that normally + * inodes->i_blocks == inode->i_mapping->nrpages + info->swapped + * + * So the mm freed + * inodes->i_blocks - (inode->i_mapping->nrpages + info->swapped) + * + * It has to be called with the spinlock held. + */ + +static void shmem_recalc_inode(struct inode * inode) +{ + unsigned long freed; + + freed = inode->i_blocks - + (inode->i_mapping->nrpages + inode->u.shmem_i.swapped); + if (freed){ + struct shmem_sb_info * info = &inode->i_sb->u.shmem_sb; + inode->i_blocks -= freed; + spin_lock (&info->stat_lock); + info->free_blocks += freed; + spin_unlock (&info->stat_lock); + } +} + static void shmem_truncate (struct inode * inode) { int clear_base; unsigned long start; - unsigned long mmfreed, freed = 0; + unsigned long freed = 0; swp_entry_t **base, **ptr; struct shmem_inode_info * info = &inode->u.shmem_i; @@ -154,26 +186,9 @@ static void shmem_truncate (struct inode * inode) info->i_indirect = 0; out: - - /* - * We have to calculate the free blocks since we do not know - * how many pages the mm discarded - * - * But we know that normally - * inodes->i_blocks == inode->i_mapping->nrpages + info->swapped - * - * So the mm freed - * inodes->i_blocks - (inode->i_mapping->nrpages + info->swapped) - */ - - mmfreed = inode->i_blocks - (inode->i_mapping->nrpages + info->swapped); info->swapped -= freed; - inode->i_blocks -= freed + mmfreed; + shmem_recalc_inode(inode); spin_unlock (&info->lock); - - spin_lock (&inode->i_sb->u.shmem_sb.stat_lock); - inode->i_sb->u.shmem_sb.free_blocks += freed + mmfreed; - spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); } static void shmem_delete_inode(struct inode * inode) @@ -201,13 +216,15 @@ static int shmem_writepage(struct page * page) swp_entry_t *entry, swap; info = &page->mapping->host->u.shmem_i; - if (info->locked) - return 1; swap = __get_swap_page(2); - if (!swap.val) - return 1; + if (!swap.val) { + set_page_dirty(page); + UnlockPage(page); + return -ENOMEM; + } spin_lock(&info->lock); + shmem_recalc_inode(page->mapping->host); entry = shmem_swp_entry (info, page->index); if (!entry) /* this had been allocted on page allocation */ BUG(); @@ -269,6 +286,9 @@ struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, i entry = shmem_swp_entry (info, idx); if (!entry) goto oom; + spin_lock (&info->lock); + shmem_recalc_inode(inode); + spin_unlock (&info->lock); if (entry->val) { unsigned long flags; @@ -310,6 +330,8 @@ struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, i } /* We have the page */ SetPageUptodate (page); + if (info->locked) + page_cache_get(page); cached_page: UnlockPage (page); @@ -374,8 +396,7 @@ struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) inode->i_fop = &shmem_dir_operations; break; case S_IFLNK: - inode->i_op = &page_symlink_inode_operations; - break; + BUG(); } spin_lock (&shmem_ilock); list_add (&inode->u.shmem_i.list, &shmem_inodes); @@ -401,6 +422,32 @@ static int shmem_statfs(struct super_block *sb, struct statfs *buf) return 0; } +void shmem_lock(struct file * file, int lock) +{ + struct inode * inode = file->f_dentry->d_inode; + struct shmem_inode_info * info = &inode->u.shmem_i; + struct page * page; + unsigned long idx, size; + + if (info->locked == lock) + return; + down(&inode->i_sem); + info->locked = lock; + size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + for (idx = 0; idx < size; idx++) { + page = find_lock_page(inode->i_mapping, idx); + if (!page) + continue; + if (!lock) { + /* release the extra count and our reference */ + page_cache_release(page); + page_cache_release(page); + } + UnlockPage(page); + } + up(&inode->i_sem); +} + /* * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. @@ -528,19 +575,6 @@ static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struc return error; } -static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname) -{ - int error; - - error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0); - if (!error) { - int l = strlen(symname)+1; - struct inode *inode = dentry->d_inode; - error = block_symlink(inode, symname, l); - } - return error; -} - static int shmem_mmap(struct file * file, struct vm_area_struct * vma) { struct vm_operations_struct * ops; @@ -677,7 +711,6 @@ static struct inode_operations shmem_dir_inode_operations = { lookup: shmem_lookup, link: shmem_link, unlink: shmem_unlink, - symlink: shmem_symlink, mkdir: shmem_mkdir, rmdir: shmem_rmdir, mknod: shmem_mknod, @@ -1702,7 +1702,7 @@ static void enable_all_cpucaches (void) * kmem_cache_reap - Reclaim memory from caches. * @gfp_mask: the type of memory required. * - * Called from try_to_free_page(). + * Called from do_try_to_free_pages() and __alloc_pages() */ void kmem_cache_reap (int gfp_mask) { @@ -30,8 +30,7 @@ * start background swapping if we fall below freepages.high free * pages, and we begin intensive swapping below freepages.low. * - * Actual initialization is done in mm/page_alloc.c or - * arch/sparc(64)/mm/init.c. + * Actual initialization is done in mm/page_alloc.c */ freepages_t freepages = { 0, /* freepages.min */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 62ce5f1ff..93edab662 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -9,6 +9,7 @@ #include <linux/malloc.h> #include <linux/vmalloc.h> #include <linux/spinlock.h> +#include <linux/highmem.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> diff --git a/mm/vmscan.c b/mm/vmscan.c index afa5261c1..f41c53328 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -35,45 +35,21 @@ * using a process that no longer actually exists (it might * have died while we slept). */ -static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) +static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page) { pte_t pte; swp_entry_t entry; - struct page * page; - int onlist; - - pte = *page_table; - if (!pte_present(pte)) - goto out_failed; - page = pte_page(pte); - if ((!VALID_PAGE(page)) || PageReserved(page)) - goto out_failed; - - if (!mm->swap_cnt) - return 1; - - mm->swap_cnt--; - onlist = PageActive(page); /* Don't look at this pte if it's been accessed recently. */ if (ptep_test_and_clear_young(page_table)) { - age_page_up(page); - goto out_failed; + page->age += PAGE_AGE_ADV; + if (page->age > PAGE_AGE_MAX) + page->age = PAGE_AGE_MAX; + return; } - if (!onlist) - /* The page is still mapped, so it can't be freeable... */ - age_page_down_ageonly(page); - - /* - * If the page is in active use by us, or if the page - * is in active use by others, don't unmap it or - * (worse) start unneeded IO. - */ - if (page->age > 0) - goto out_failed; if (TryLockPage(page)) - goto out_failed; + return; /* From this point on, the odds are that we're going to * nuke this pte, so read and clear the pte. This hook @@ -87,9 +63,6 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. - * - * Return 0, as we didn't actually free any real - * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { entry.val = page->index; @@ -99,12 +72,12 @@ set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: - UnlockPage(page); mm->rss--; - deactivate_page(page); + if (!page->age) + deactivate_page(page); + UnlockPage(page); page_cache_release(page); -out_failed: - return 0; + return; } /* @@ -153,34 +126,20 @@ out_failed: out_unlock_restore: set_pte(page_table, pte); UnlockPage(page); - return 0; + return; } -/* - * A new implementation of swap_out(). We do not swap complete processes, - * but only a small number of blocks, before we continue with the next - * process. The number of blocks actually swapped is determined on the - * number of page faults, that this process actually had in the last time, - * so we won't swap heavily used processes all the time ... - * - * Note: the priority argument is a hint on much CPU to waste with the - * swap block search, not a hint, of how much blocks to swap with - * each process. - * - * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de - */ - -static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask) +static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count) { pte_t * pte; unsigned long pmd_end; if (pmd_none(*dir)) - return 0; + return count; if (pmd_bad(*dir)) { pmd_ERROR(*dir); pmd_clear(dir); - return 0; + return count; } pte = pte_offset(dir, address); @@ -190,28 +149,33 @@ static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vm end = pmd_end; do { - int result; - mm->swap_address = address + PAGE_SIZE; - result = try_to_swap_out(mm, vma, address, pte, gfp_mask); - if (result) - return result; + if (pte_present(*pte)) { + struct page *page = pte_page(*pte); + + if (VALID_PAGE(page) && !PageReserved(page)) { + try_to_swap_out(mm, vma, address, pte, page); + if (!--count) + break; + } + } address += PAGE_SIZE; pte++; } while (address && (address < end)); - return 0; + mm->swap_address = address + PAGE_SIZE; + return count; } -static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask) +static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count) { pmd_t * pmd; unsigned long pgd_end; if (pgd_none(*dir)) - return 0; + return count; if (pgd_bad(*dir)) { pgd_ERROR(*dir); pgd_clear(dir); - return 0; + return count; } pmd = pmd_offset(dir, address); @@ -221,23 +185,23 @@ static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vm end = pgd_end; do { - int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask); - if (result) - return result; + count = swap_out_pmd(mm, vma, pmd, address, end, count); + if (!count) + break; address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); - return 0; + return count; } -static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask) +static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count) { pgd_t *pgdir; unsigned long end; /* Don't swap out areas which are locked down */ if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) - return 0; + return count; pgdir = pgd_offset(mm, address); @@ -245,18 +209,17 @@ static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsi if (address >= end) BUG(); do { - int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask); - if (result) - return result; + count = swap_out_pgd(mm, vma, pgdir, address, end, count); + if (!count) + break; address = (address + PGDIR_SIZE) & PGDIR_MASK; pgdir++; } while (address && (address < end)); - return 0; + return count; } -static int swap_out_mm(struct mm_struct * mm, int gfp_mask) +static int swap_out_mm(struct mm_struct * mm, int count) { - int result = 0; unsigned long address; struct vm_area_struct* vma; @@ -276,8 +239,8 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask) address = vma->vm_start; for (;;) { - result = swap_out_vma(mm, vma, address, gfp_mask); - if (result) + count = swap_out_vma(mm, vma, address, count); + if (!count) goto out_unlock; vma = vma->vm_next; if (!vma) @@ -287,94 +250,63 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask) } /* Reset to 0 when we reach the end of address space */ mm->swap_address = 0; - mm->swap_cnt = 0; out_unlock: spin_unlock(&mm->page_table_lock); - return result; + return !count; } /* - * Select the task with maximal swap_cnt and try to swap out a page. * N.B. This function returns only 0 or 1. Return values != 1 from * the lower level routines result in continued processing. */ #define SWAP_SHIFT 5 #define SWAP_MIN 8 +static inline int swap_amount(struct mm_struct *mm) +{ + int nr = mm->rss >> SWAP_SHIFT; + return nr < SWAP_MIN ? SWAP_MIN : nr; +} + static int swap_out(unsigned int priority, int gfp_mask) { int counter; - int __ret = 0; - - /* - * We make one or two passes through the task list, indexed by - * assign = {0, 1}: - * Pass 1: select the swappable task with maximal RSS that has - * not yet been swapped out. - * Pass 2: re-assign rss swap_cnt values, then select as above. - * - * With this approach, there's no need to remember the last task - * swapped out. If the swap-out fails, we clear swap_cnt so the - * task won't be selected again until all others have been tried. - * - * Think of swap_cnt as a "shadow rss" - it tells us which process - * we want to page out (always try largest first). - */ - counter = (nr_threads << SWAP_SHIFT) >> priority; - if (counter < 1) - counter = 1; + int retval = 0; + struct mm_struct *mm = current->mm; - for (; counter >= 0; counter--) { + /* Always start by trying to penalize the process that is allocating memory */ + if (mm) + retval = swap_out_mm(mm, swap_amount(mm)); + + /* Then, look at the other mm's */ + counter = mmlist_nr >> priority; + do { struct list_head *p; - unsigned long max_cnt = 0; - struct mm_struct *best = NULL; - int assign = 0; - int found_task = 0; - select: + spin_lock(&mmlist_lock); p = init_mm.mmlist.next; - for (; p != &init_mm.mmlist; p = p->next) { - struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist); - if (mm->rss <= 0) - continue; - found_task++; - /* Refresh swap_cnt? */ - if (assign == 1) { - mm->swap_cnt = (mm->rss >> SWAP_SHIFT); - if (mm->swap_cnt < SWAP_MIN) - mm->swap_cnt = SWAP_MIN; - } - if (mm->swap_cnt > max_cnt) { - max_cnt = mm->swap_cnt; - best = mm; - } - } + if (p == &init_mm.mmlist) + goto empty; + + /* Move it to the back of the queue.. */ + list_del(p); + list_add_tail(p, &init_mm.mmlist); + mm = list_entry(p, struct mm_struct, mmlist); - /* Make sure it doesn't disappear */ - if (best) - atomic_inc(&best->mm_users); + /* Make sure the mm doesn't disappear when we drop the lock.. */ + atomic_inc(&mm->mm_users); spin_unlock(&mmlist_lock); - /* - * We have dropped the tasklist_lock, but we - * know that "mm" still exists: we are running - * with the big kernel lock, and exit_mm() - * cannot race with us. - */ - if (!best) { - if (!assign && found_task > 0) { - assign = 1; - goto select; - } - break; - } else { - __ret = swap_out_mm(best, gfp_mask); - mmput(best); - break; - } - } - return __ret; + /* Walk about 6% of the address space each time */ + retval |= swap_out_mm(mm, swap_amount(mm)); + mmput(mm); + } while (--counter >= 0); + return retval; + +empty: + spin_unlock(&mmlist_lock); + return 0; } @@ -540,7 +472,6 @@ dirty_page_rescan: */ if (PageDirty(page)) { int (*writepage)(struct page *) = page->mapping->a_ops->writepage; - int result; if (!writepage) goto page_active; @@ -558,16 +489,12 @@ dirty_page_rescan: page_cache_get(page); spin_unlock(&pagemap_lru_lock); - result = writepage(page); + writepage(page); page_cache_release(page); /* And re-start the thing.. */ spin_lock(&pagemap_lru_lock); - if (result != 1) - continue; - /* writepage refused to do anything */ - set_page_dirty(page); - goto page_active; + continue; } /* @@ -808,6 +735,9 @@ int free_shortage(void) int inactive_shortage(void) { int shortage = 0; + pg_data_t *pgdat = pgdat_list; + + /* Is the inactive dirty list too small? */ shortage += freepages.high; shortage += inactive_target; @@ -818,7 +748,27 @@ int inactive_shortage(void) if (shortage > 0) return shortage; - return 0; + /* If not, do we have enough per-zone pages on the inactive list? */ + + shortage = 0; + + do { + int i; + for(i = 0; i < MAX_NR_ZONES; i++) { + int zone_shortage; + zone_t *zone = pgdat->node_zones+ i; + + zone_shortage = zone->pages_high; + zone_shortage -= zone->inactive_dirty_pages; + zone_shortage -= zone->inactive_clean_pages; + zone_shortage -= zone->free_pages; + if (zone_shortage > 0) + shortage += zone_shortage; + } + pgdat = pgdat->node_next; + } while (pgdat); + + return shortage; } /* @@ -833,72 +783,35 @@ int inactive_shortage(void) * really care about latency. In that case we don't try * to free too many pages. */ +#define DEF_PRIORITY (6) static int refill_inactive(unsigned int gfp_mask, int user) { - int priority, count, start_count, made_progress; + int count, start_count, maxtry; count = inactive_shortage() + free_shortage(); if (user) count = (1 << page_cluster); start_count = count; - /* Always trim SLAB caches when memory gets low. */ - kmem_cache_reap(gfp_mask); - - priority = 6; + maxtry = 6; do { - made_progress = 0; - if (current->need_resched) { __set_current_state(TASK_RUNNING); schedule(); } - while (refill_inactive_scan(priority, 1)) { - made_progress = 1; - if (--count <= 0) - goto done; - } - - /* - * don't be too light against the d/i cache since - * refill_inactive() almost never fail when there's - * really plenty of memory free. - */ - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); - - /* - * Then, try to page stuff out.. - */ - while (swap_out(priority, gfp_mask)) { - made_progress = 1; + while (refill_inactive_scan(DEF_PRIORITY, 1)) { if (--count <= 0) goto done; } - /* - * If we either have enough free memory, or if - * page_launder() will be able to make enough - * free memory, then stop. - */ - if (!inactive_shortage() || !free_shortage()) - goto done; + /* If refill_inactive_scan failed, try to page stuff out.. */ + swap_out(DEF_PRIORITY, gfp_mask); - /* - * Only switch to a lower "priority" if we - * didn't make any useful progress in the - * last loop. - */ - if (!made_progress) - priority--; - } while (priority >= 0); - - /* Always end on a refill_inactive.., may sleep... */ - while (refill_inactive_scan(0, 1)) { - if (--count <= 0) - goto done; - } + if (--maxtry <= 0) + return 0; + + } while (inactive_shortage()); done: return (count < start_count); @@ -922,20 +835,29 @@ static int do_try_to_free_pages(unsigned int gfp_mask, int user) /* * If needed, we move pages from the active list - * to the inactive list. We also "eat" pages from - * the inode and dentry cache whenever we do this. + * to the inactive list. */ - if (free_shortage() || inactive_shortage()) { - shrink_dcache_memory(6, gfp_mask); - shrink_icache_memory(6, gfp_mask); + if (inactive_shortage()) ret += refill_inactive(gfp_mask, user); + + /* + * Delete pages from the inode and dentry caches and + * reclaim unused slab cache if memory is low. + */ + if (free_shortage()) { + shrink_dcache_memory(DEF_PRIORITY, gfp_mask); + shrink_icache_memory(DEF_PRIORITY, gfp_mask); } else { /* - * Reclaim unused slab cache memory. + * Illogical, but true. At least for now. + * + * If we're _not_ under shortage any more, we + * reap the caches. Why? Because a noticeable + * part of the caches are the buffer-heads, + * which we'll want to keep if under shortage. */ kmem_cache_reap(gfp_mask); - ret = 1; - } + } return ret; } @@ -988,13 +910,8 @@ int kswapd(void *unused) static int recalc = 0; /* If needed, try to free some memory. */ - if (inactive_shortage() || free_shortage()) { - int wait = 0; - /* Do we need to do some synchronous flushing? */ - if (waitqueue_active(&kswapd_done)) - wait = 1; - do_try_to_free_pages(GFP_KSWAPD, wait); - } + if (inactive_shortage() || free_shortage()) + do_try_to_free_pages(GFP_KSWAPD, 0); /* * Do some (very minimal) background scanning. This @@ -1002,7 +919,7 @@ int kswapd(void *unused) * every minute. This clears old referenced bits * and moves unused pages to the inactive list. */ - refill_inactive_scan(6, 0); + refill_inactive_scan(DEF_PRIORITY, 0); /* Once a second, recalculate some VM stats. */ if (time_after(jiffies, recalc + HZ)) { @@ -1010,11 +927,6 @@ int kswapd(void *unused) recalculate_vm_stats(); } - /* - * Wake up everybody waiting for free memory - * and unplug the disk queue. - */ - wake_up_all(&kswapd_done); run_task_queue(&tq_disk); /* @@ -1045,33 +957,10 @@ int kswapd(void *unused) } } -void wakeup_kswapd(int block) +void wakeup_kswapd(void) { - DECLARE_WAITQUEUE(wait, current); - - if (current == kswapd_task) - return; - - if (!block) { - if (waitqueue_active(&kswapd_wait)) - wake_up(&kswapd_wait); - return; - } - - /* - * Kswapd could wake us up before we get a chance - * to sleep, so we have to be very careful here to - * prevent SMP races... - */ - __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&kswapd_done, &wait); - - if (waitqueue_active(&kswapd_wait)) - wake_up(&kswapd_wait); - schedule(); - - remove_wait_queue(&kswapd_done, &wait); - __set_current_state(TASK_RUNNING); + if (current != kswapd_task) + wake_up_process(kswapd_task); } /* @@ -1096,7 +985,7 @@ DECLARE_WAIT_QUEUE_HEAD(kreclaimd_wait); /* * Kreclaimd will move pages from the inactive_clean list to the * free list, in order to keep atomic allocations possible under - * all circumstances. Even when kswapd is blocked on IO. + * all circumstances. */ int kreclaimd(void *unused) { |