From 6d403070f28cd44860fdb3a53be5da0275c65cf4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 19 Jun 2000 22:45:37 +0000 Subject: Merge with 2.4.0-test1-ac21 + pile of MIPS cleanups to make merging possible. Chainsawed RM200 kernel to compile again. Jazz machine status unknown. --- mm/filemap.c | 194 ++++++++++++++++++++++++++++++++++++++------------------ mm/memory.c | 4 +- mm/mmap.c | 29 +++++---- mm/mremap.c | 2 +- mm/page_alloc.c | 65 ++++++++++++++++--- mm/slab.c | 19 +----- mm/swap_state.c | 6 +- mm/swapfile.c | 14 +--- mm/vmscan.c | 37 ++++++++--- 9 files changed, 244 insertions(+), 126 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index b1e2b8547..ba0048cb8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -56,6 +56,8 @@ spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) +#define min(a,b) ((a < b) ? a : b) + void __add_page_to_hash_queue(struct page * page, struct page **p) { atomic_inc(&page_cache_size); @@ -90,10 +92,16 @@ static inline int sync_page(struct page *page) /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. + * is safe. We need that the page don't have any buffers. */ static inline void __remove_inode_page(struct page *page) { + if (!PageLocked(page)) + PAGE_BUG(page); + + if (page->buffers) + BUG(); + remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->mapping = NULL; @@ -101,9 +109,6 @@ static inline void __remove_inode_page(struct page *page) void remove_inode_page(struct page *page) { - if (!PageLocked(page)) - PAGE_BUG(page); - spin_lock(&pagecache_lock); __remove_inode_page(page); spin_unlock(&pagecache_lock); @@ -114,16 +119,16 @@ void remove_inode_page(struct page *page) * @inode: the inode which pages we want to invalidate * * This function only removes the unlocked pages, if you want to - * remove all the pages of one inode, you must call truncate_inode_pages. + * remove all the pages of one inode, you must call + * truncate_inode_pages. This function is not supposed to be called + * by block based filesystems. */ - void invalidate_inode_pages(struct inode * inode) { struct list_head *head, *curr; struct page * page; head = &inode->i_mapping->pages; - spin_lock(&pagecache_lock); spin_lock(&pagemap_lru_lock); curr = head->next; @@ -135,20 +140,53 @@ void invalidate_inode_pages(struct inode * inode) /* We cannot invalidate a locked page */ if (TryLockPage(page)) continue; + /* We _should not be called_ by block based filesystems */ + if (page->buffers) + BUG(); - __lru_cache_del(page); __remove_inode_page(page); + __lru_cache_del(page); UnlockPage(page); page_cache_release(page); } - spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); } -/* +static inline void truncate_partial_page(struct page *page, unsigned partial) +{ + memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); + + if (page->buffers) + block_flushpage(page, partial); + +} + +static inline void truncate_complete_page(struct page *page) +{ + if (page->buffers) + block_destroy_buffers(page); + lru_cache_del(page); + + /* + * We remove the page from the page cache _after_ we have + * destroyed all buffer-cache references to it. Otherwise some + * other process might think this inode page is not in the + * page cache and creates a buffer-cache alias to it causing + * all sorts of fun problems ... + */ + remove_inode_page(page); + page_cache_release(page); +} + +/** + * truncate_inode_pages - truncate *all* the pages from an offset + * @mapping: mapping to truncate + * @lstart: offset from with to truncate + * * Truncate the page cache at a set offset, removing the pages * that are beyond that offset (and zeroing out partial pages). + * If any page is locked we wait for it to become unlocked. */ void truncate_inode_pages(struct address_space * mapping, loff_t lstart) { @@ -168,11 +206,10 @@ repeat: page = list_entry(curr, struct page, list); curr = curr->next; - offset = page->index; - /* page wholly truncated - free it */ - if (offset >= start) { + /* Is one of the pages to truncate? */ + if ((offset >= start) || (partial && (offset + 1) == start)) { if (TryLockPage(page)) { page_cache_get(page); spin_unlock(&pagecache_lock); @@ -183,22 +220,14 @@ repeat: page_cache_get(page); spin_unlock(&pagecache_lock); - if (!page->buffers || block_flushpage(page, 0)) - lru_cache_del(page); - - /* - * We remove the page from the page cache - * _after_ we have destroyed all buffer-cache - * references to it. Otherwise some other process - * might think this inode page is not in the - * page cache and creates a buffer-cache alias - * to it causing all sorts of fun problems ... - */ - remove_inode_page(page); + if (partial && (offset + 1) == start) { + truncate_partial_page(page, partial); + partial = 0; + } else + truncate_complete_page(page); UnlockPage(page); page_cache_release(page); - page_cache_release(page); /* * We have done things without the pagecache lock, @@ -209,38 +238,59 @@ repeat: */ goto repeat; } - /* - * there is only one partial page possible. - */ - if (!partial) - continue; + } + spin_unlock(&pagecache_lock); +} - /* and it's the one preceeding the first wholly truncated page */ - if ((offset + 1) != start) - continue; +/** + * truncate_all_inode_pages - truncate *all* the pages + * @mapping: mapping to truncate + * + * Truncate all the inode pages. If any page is locked we wait for it + * to become unlocked. This function can block. + */ +void truncate_all_inode_pages(struct address_space * mapping) +{ + struct list_head *head, *curr; + struct page * page; + + head = &mapping->pages; +repeat: + spin_lock(&pagecache_lock); + spin_lock(&pagemap_lru_lock); + curr = head->next; + + while (curr != head) { + page = list_entry(curr, struct page, list); + curr = curr->next; - /* partial truncate, clear end of page */ if (TryLockPage(page)) { + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); + wait_on_page(page); + page_cache_release(page); goto repeat; } - page_cache_get(page); - spin_unlock(&pagecache_lock); - - memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); - if (page->buffers) - block_flushpage(page, partial); - - partial = 0; - - /* - * we have dropped the spinlock so we have to - * restart. - */ + if (page->buffers) { + page_cache_get(page); + spin_unlock(&pagemap_lru_lock); + spin_unlock(&pagecache_lock); + block_destroy_buffers(page); + remove_inode_page(page); + lru_cache_del(page); + page_cache_release(page); + UnlockPage(page); + page_cache_release(page); + goto repeat; + } + __lru_cache_del(page); + __remove_inode_page(page); UnlockPage(page); page_cache_release(page); - goto repeat; } + + spin_unlock(&pagemap_lru_lock); spin_unlock(&pagecache_lock); } @@ -264,7 +314,15 @@ int shrink_mmap(int priority, int gfp_mask) page = list_entry(page_lru, struct page, lru); list_del(page_lru); - if (PageTestandClearReferenced(page)) + if (PageTestandClearReferenced(page)) { + page->age += PG_AGE_ADV; + if (page->age > PG_AGE_MAX) + page->age = PG_AGE_MAX; + goto dispose_continue; + } + page->age -= min(PG_AGE_DECL, page->age); + + if (page->age) goto dispose_continue; count--; @@ -322,17 +380,23 @@ int shrink_mmap(int priority, int gfp_mask) * were to be marked referenced.. */ if (PageSwapCache(page)) { - spin_unlock(&pagecache_lock); - __delete_from_swap_cache(page); - goto made_inode_progress; - } - - /* - * Page is from a zone we don't care about. - * Don't drop page cache entries in vain. - */ - if (page->zone->free_pages > page->zone->pages_high) + if (!PageDirty(page)) { + spin_unlock(&pagecache_lock); + __delete_from_swap_cache(page); + goto made_inode_progress; + } + /* PageDeferswap -> we swap out the page now. */ + if (gfp_mask & __GFP_IO) { + spin_unlock(&pagecache_lock); + /* Do NOT unlock the page ... brw_page does. */ + ClearPageDirty(page); + rw_swap_page(WRITE, page, 0); + spin_lock(&pagemap_lru_lock); + page_cache_release(page); + goto dispose_continue; + } goto cache_unlock_continue; + } /* is it a page-cache page? */ if (page->mapping) { @@ -1744,7 +1808,7 @@ static int msync_interval(struct vm_area_struct * vma, if (!error && (flags & MS_SYNC)) { struct file * file = vma->vm_file; if (file && file->f_op && file->f_op->fsync) - error = file->f_op->fsync(file, file->f_dentry); + error = file->f_op->fsync(file, file->f_dentry, 1); } return error; } @@ -2483,7 +2547,7 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) if (count) { remove_suid(inode); inode->i_ctime = inode->i_mtime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); } while (count) { @@ -2540,7 +2604,13 @@ unlock: if (cached_page) page_cache_free(cached_page); + /* For now, when the user asks for O_SYNC, we'll actually + * provide O_DSYNC. */ + if ((status >= 0) && (file->f_flags & O_SYNC)) + status = generic_osync_inode(inode, 1); /* 1 means datasync */ + err = written ? written : status; + out: up(&inode->i_sem); return err; diff --git a/mm/memory.c b/mm/memory.c index de7dc07f8..e2609758e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -847,7 +847,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, UnlockPage(old_page); break; } - delete_from_swap_cache_nolock(old_page); + SetPageDirty(old_page); UnlockPage(old_page); /* FallThrough */ case 1: @@ -1058,7 +1058,7 @@ static int do_swap_page(struct mm_struct * mm, */ lock_page(page); swap_free(entry); - if (write_access && !is_page_shared(page)) { + if (write_access && !is_page_shared(page) && nr_free_highpages()) { delete_from_swap_cache_nolock(page); UnlockPage(page); page = replace_with_highmem(page); diff --git a/mm/mmap.c b/mm/mmap.c index 8a81bfb20..9edabc02e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -166,6 +166,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon { struct mm_struct * mm = current->mm; struct vm_area_struct * vma; + int correct_wcount = 0; int error; if (file && (!file->f_op || !file->f_op->mmap)) @@ -296,26 +297,15 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon goto free_vma; if (file) { - int correct_wcount = 0; if (vma->vm_flags & VM_DENYWRITE) { - if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) { - error = -ETXTBSY; + error = deny_write_access(file); + if (error) goto free_vma; - } - /* f_op->mmap might possibly sleep - * (generic_file_mmap doesn't, but other code - * might). In any case, this takes care of any - * race that this might cause. - */ - atomic_dec(&file->f_dentry->d_inode->i_writecount); correct_wcount = 1; } vma->vm_file = file; get_file(file); error = file->f_op->mmap(file, vma); - /* Fix up the count if necessary, then check for an error */ - if (correct_wcount) - atomic_inc(&file->f_dentry->d_inode->i_writecount); if (error) goto unmap_and_free_vma; } else if (flags & MAP_SHARED) { @@ -330,6 +320,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon addr = vma->vm_start; /* can addr have changed?? */ vmlist_modify_lock(mm); insert_vm_struct(mm, vma); + if (correct_wcount) + atomic_inc(&file->f_dentry->d_inode->i_writecount); merge_segments(mm, vma->vm_start, vma->vm_end); vmlist_modify_unlock(mm); @@ -341,6 +333,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon return addr; unmap_and_free_vma: + if (correct_wcount) + atomic_inc(&file->f_dentry->d_inode->i_writecount); vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ @@ -692,9 +686,11 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) * so release them, and unmap the page range.. * If the one of the segments is only being partially unmapped, * it will put new vm_area_struct(s) into the address space. + * In that case we have to be careful with VM_DENYWRITE. */ while ((mpnt = free) != NULL) { unsigned long st, end, size; + struct file *file = NULL; free = free->vm_next; @@ -706,6 +702,11 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) if (mpnt->vm_ops && mpnt->vm_ops->unmap) mpnt->vm_ops->unmap(mpnt, st, size); + if (mpnt->vm_flags & VM_DENYWRITE && + (st != mpnt->vm_start || end != mpnt->vm_end) && + (file = mpnt->vm_file) != NULL) { + atomic_dec(&file->f_dentry->d_inode->i_writecount); + } remove_shared_vm_struct(mpnt); mm->map_count--; @@ -717,6 +718,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) * Fix the mapping, and free the old area if it wasn't reused. */ extra = unmap_fixup(mm, mpnt, st, size, extra); + if (file) + atomic_inc(&file->f_dentry->d_inode->i_writecount); } /* Release the extra vma struct if it wasn't used */ diff --git a/mm/mremap.c b/mm/mremap.c index 0404dd795..a48125178 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -144,7 +144,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, vmlist_modify_lock(current->mm); insert_vm_struct(current->mm, new_vma); merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); - vmlist_modify_unlock(vma->vm_mm); + vmlist_modify_unlock(current->mm); do_munmap(current->mm, addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 926364499..4766127b2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -29,7 +29,7 @@ int nr_lru_pages; pg_data_t *pgdat_list; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; -static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 128, }; +static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 512, }; static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, }; static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, }; @@ -93,6 +93,8 @@ void __free_pages_ok (struct page *page, unsigned long order) BUG(); if (PageDecrAfter(page)) BUG(); + if (PageDirty(page)) + BUG(); zone = page->zone; @@ -139,10 +141,13 @@ void __free_pages_ok (struct page *page, unsigned long order) spin_unlock_irqrestore(&zone->lock, flags); - if (zone->free_pages > zone->pages_high) { - zone->zone_wake_kswapd = 0; + if (zone->free_pages >= zone->pages_low) { zone->low_on_memory = 0; } + + if (zone->free_pages >= zone->pages_high) { + zone->zone_wake_kswapd = 0; + } } #define MARK_USED(index, order, area) \ @@ -217,6 +222,9 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) { zone_t **zone = zonelist->zones; extern wait_queue_head_t kswapd_wait; + static int last_woke_kswapd; + static int kswapd_pause = HZ; + int gfp_mask = zonelist->gfp_mask; /* * (If anyone calls gfp from interrupts nonatomically then it @@ -237,18 +245,34 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) struct page *page = rmqueue(z, order); if (z->free_pages < z->pages_low) { z->zone_wake_kswapd = 1; - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); } if (page) return page; } } + /* + * Kswapd should be freeing enough memory to satisfy all allocations + * immediately. Calling try_to_free_pages from processes will slow + * down the system a lot. On the other hand, waking up kswapd too + * often means wasted memory and cpu time. + * + * We tune the kswapd pause interval in such a way that kswapd is + * always just agressive enough to free the amount of memory we + * want freed. + */ + if (waitqueue_active(&kswapd_wait) && + time_after(jiffies, last_woke_kswapd + kswapd_pause)) { + kswapd_pause++; + last_woke_kswapd = jiffies; + wake_up_interruptible(&kswapd_wait); + } + /* * Ok, we don't have any zones that don't need some * balancing.. See if we have any that aren't critical.. */ +again: zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); @@ -256,20 +280,33 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) break; if (!z->low_on_memory) { struct page *page = rmqueue(z, order); - if (z->free_pages < z->pages_min) + if (z->free_pages < (z->pages_min + z->pages_low) / 2) z->low_on_memory = 1; if (page) return page; + } else { + if (kswapd_pause > 0) + kswapd_pause--; } } + /* We didn't kick kswapd often enough... */ + kswapd_pause /= 2; + if (waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); + /* If we're low priority, we just wait a bit and try again later. */ + if ((gfp_mask & __GFP_WAIT) && current->need_resched && + current->state == TASK_RUNNING) { + schedule(); + goto again; + } + /* * Uhhuh. All the zones have been critical, which means that * we'd better do some synchronous swap-out. kswapd has not * been able to cope.. */ if (!(current->flags & PF_MEMALLOC)) { - int gfp_mask = zonelist->gfp_mask; if (!try_to_free_pages(gfp_mask)) { if (!(gfp_mask & __GFP_HIGH)) goto fail; @@ -277,7 +314,7 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) } /* - * Final phase: allocate anything we can! + * We freed something, so we're allowed to allocate anything we can! */ zone = zonelist->zones; for (;;) { @@ -292,6 +329,18 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) } fail: + /* Last try, zone->low_on_memory isn't reset until we hit pages_low */ + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (z->free_pages > z->pages_min) { + struct page *page = rmqueue(z, order); + if (page) + return page; + } + } /* No luck.. */ return NULL; } diff --git a/mm/slab.c b/mm/slab.c index 64f33cb33..f3d04da8e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -106,11 +106,6 @@ #include #include #include -#ifdef __mips__ -#include -#include -#include -#endif /* If there is a different PAGE_SIZE around, and it works with this allocator, * then change the following. @@ -1691,19 +1686,11 @@ void * kmalloc(size_t size, int flags) { cache_sizes_t *csizep = cache_sizes; - unsigned long addr; for (; csizep->cs_size; csizep++) { if (size > csizep->cs_size) continue; - addr = __kmem_cache_alloc(csizep->cs_cachep, flags); -#ifdef __mips__ - if (addr && (flags & GFP_UNCACHED)) { - flush_cache_all(); /* Ouch ... */ - addr = KSEG1ADDR(addr); - } -#endif /* __mips__ */ - return addr; + return __kmem_cache_alloc(csizep->cs_cachep, flags); } printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size); return NULL; @@ -1717,10 +1704,6 @@ kfree(const void *objp) if (!objp) goto null_ptr; -#ifdef __mips__ - if (KSEGX(objp) == KSEG1) - objp = KSEG0ADDR(objp); -#endif __mips__ nr = MAP_NR(objp); if (nr >= max_mapnr) goto bad_ptr; diff --git a/mm/swap_state.c b/mm/swap_state.c index 2405aba2f..87ecc0c10 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -73,6 +73,7 @@ static inline void remove_from_swap_cache(struct page *page) PAGE_BUG(page); PageClearSwapCache(page); + ClearPageDirty(page); remove_inode_page(page); } @@ -102,9 +103,10 @@ void delete_from_swap_cache_nolock(struct page *page) if (!PageLocked(page)) BUG(); - if (block_flushpage(page, 0)) - lru_cache_del(page); + if (page->buffers) + block_destroy_buffers(page); + lru_cache_del(page); __delete_from_swap_cache(page); page_cache_release(page); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 55ef476a3..5d3a7f23e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -407,11 +407,11 @@ asmlinkage long sys_swapoff(const char * specialfile) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lock_kernel(); err = user_path_walk(specialfile, &nd); if (err) goto out; + lock_kernel(); prev = -1; swap_list_lock(); for (type = swap_list.head; type >= 0; type = swap_info[type].next) { @@ -478,9 +478,9 @@ asmlinkage long sys_swapoff(const char * specialfile) err = 0; out_dput: + unlock_kernel(); path_release(&nd); out: - unlock_kernel(); return err; } @@ -555,7 +555,6 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) unsigned long maxpages; int swapfilesize; struct block_device *bdev = NULL; - char *name; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -586,14 +585,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) } else { p->prio = --least_priority; } - name = getname(specialfile); - error = PTR_ERR(name); - if (IS_ERR(name)) - goto bad_swap_2; - error = 0; - if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) - error = path_walk(name, &nd); - putname(name); + error = user_path_walk(specialfile, &nd); if (error) goto bad_swap_2; diff --git a/mm/vmscan.c b/mm/vmscan.c index 1919c0961..597a1b093 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -62,6 +62,10 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un goto out_failed; } + /* Can only do this if we age all active pages. */ + if (PageActive(page) && page->age > 1) + goto out_failed; + if (TryLockPage(page)) goto out_failed; @@ -74,6 +78,8 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { + if (pte_dirty(pte)) + SetPageDirty(page); entry.val = page->index; swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); @@ -181,7 +187,10 @@ drop_pte: vmlist_access_unlock(vma->vm_mm); /* OK, do a physical asynchronous write to swap. */ - rw_swap_page(WRITE, page, 0); + // rw_swap_page(WRITE, page, 0); + /* Let shrink_mmap handle this swapout. */ + SetPageDirty(page); + UnlockPage(page); out_free_success: page_cache_release(page); @@ -430,12 +439,12 @@ out: * latency. */ #define FREE_COUNT 8 -#define SWAP_COUNT 16 static int do_try_to_free_pages(unsigned int gfp_mask) { int priority; int count = FREE_COUNT; - int swap_count; + int swap_count = 0; + int ret = 0; /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); @@ -443,6 +452,7 @@ static int do_try_to_free_pages(unsigned int gfp_mask) priority = 64; do { while (shrink_mmap(priority, gfp_mask)) { + ret = 1; if (!--count) goto done; } @@ -457,9 +467,12 @@ static int do_try_to_free_pages(unsigned int gfp_mask) */ count -= shrink_dcache_memory(priority, gfp_mask); count -= shrink_icache_memory(priority, gfp_mask); - if (count <= 0) + if (count <= 0) { + ret = 1; goto done; + } while (shm_swap(priority, gfp_mask)) { + ret = 1; if (!--count) goto done; } @@ -471,24 +484,30 @@ static int do_try_to_free_pages(unsigned int gfp_mask) * This will not actually free any pages (they get * put in the swap cache), so we must not count this * as a "count" success. + * + * The amount we page out is the amount of pages we're + * short freeing, amplified by the number of times we + * failed above. This generates a negative feedback loop: + * the more difficult it was to free pages, the easier we + * will make it. */ - swap_count = SWAP_COUNT; - while (swap_out(priority, gfp_mask)) + swap_count += count; + while (swap_out(priority, gfp_mask)) { if (--swap_count < 0) break; + } } while (--priority >= 0); /* Always end on a shrink_mmap.. */ while (shrink_mmap(0, gfp_mask)) { + ret = 1; if (!--count) goto done; } - /* We return 1 if we are freed some page */ - return (count != FREE_COUNT); done: - return 1; + return ret; } DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); -- cgit v1.2.3