diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-05-12 21:05:59 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-05-12 21:05:59 +0000 |
commit | ba2dacab305c598cd4c34a604f8e276bf5bab5ff (patch) | |
tree | 78670a0139bf4d5ace617b29b7eba82bbc74d602 /mm | |
parent | b77bf69998121e689c5e86cc5630d39a0a9ee6ca (diff) |
Merge with Linux 2.3.99-pre7 and various other bits.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 208 | ||||
-rw-r--r-- | mm/highmem.c | 17 | ||||
-rw-r--r-- | mm/memory.c | 16 | ||||
-rw-r--r-- | mm/page_alloc.c | 144 | ||||
-rw-r--r-- | mm/page_io.c | 7 | ||||
-rw-r--r-- | mm/slab.c | 3 | ||||
-rw-r--r-- | mm/swap_state.c | 12 | ||||
-rw-r--r-- | mm/swapfile.c | 75 | ||||
-rw-r--r-- | mm/vmscan.c | 151 |
9 files changed, 298 insertions, 335 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index d0df8bd2c..acafb3353 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -46,7 +46,7 @@ unsigned int page_hash_bits; struct page **page_hash_table; struct list_head lru_cache; -spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED; /* * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with * the pagemap_lru_lock held. @@ -67,7 +67,7 @@ void __add_page_to_hash_queue(struct page * page, struct page **p) PAGE_BUG(page); } -static void remove_page_from_hash_queue(struct page * page) +static inline void remove_page_from_hash_queue(struct page * page) { if(page->pprev_hash) { if(page->next_hash) @@ -92,47 +92,71 @@ static inline int sync_page(struct page *page) * sure the page is locked and that nobody else uses it - or that usage * is safe. */ +static inline void __remove_inode_page(struct page *page) +{ + remove_page_from_inode_queue(page); + remove_page_from_hash_queue(page); + page->mapping = NULL; +} + void remove_inode_page(struct page *page) { if (!PageLocked(page)) PAGE_BUG(page); - /* Initiate completion of any async operations */ - sync_page(page); - spin_lock(&pagecache_lock); - remove_page_from_inode_queue(page); - remove_page_from_hash_queue(page); - page->mapping = NULL; + __remove_inode_page(page); spin_unlock(&pagecache_lock); } +#define ITERATIONS 100 + void invalidate_inode_pages(struct inode * inode) { struct list_head *head, *curr; struct page * page; + int count; - repeat: head = &inode->i_mapping->pages; - spin_lock(&pagecache_lock); - curr = head->next; - while (curr != head) { - page = list_entry(curr, struct page, list); - curr = curr->next; + while (head != head->next) { + spin_lock(&pagecache_lock); + spin_lock(&pagemap_lru_lock); + head = &inode->i_mapping->pages; + curr = head->next; + count = 0; - /* We cannot invalidate a locked page */ - if (TryLockPage(page)) - continue; - spin_unlock(&pagecache_lock); + while ((curr != head) && (count++ < ITERATIONS)) { + page = list_entry(curr, struct page, list); + curr = curr->next; - lru_cache_del(page); - remove_inode_page(page); - UnlockPage(page); - page_cache_release(page); - goto repeat; + /* We cannot invalidate a locked page */ + if (TryLockPage(page)) + continue; + + __lru_cache_del(page); + __remove_inode_page(page); + UnlockPage(page); + page_cache_release(page); + } + + /* At this stage we have passed through the list + * once, and there may still be locked pages. */ + + if (head->next!=head) { + page = list_entry(head->next, struct page, list); + get_page(page); + spin_unlock(&pagemap_lru_lock); + spin_unlock(&pagecache_lock); + /* We need to block */ + lock_page(page); + UnlockPage(page); + page_cache_release(page); + } else { + spin_unlock(&pagemap_lru_lock); + spin_unlock(&pagecache_lock); + } } - spin_unlock(&pagecache_lock); } /* @@ -163,10 +187,10 @@ repeat: /* page wholly truncated - free it */ if (offset >= start) { if (TryLockPage(page)) { - spin_unlock(&pagecache_lock); get_page(page); + spin_unlock(&pagecache_lock); wait_on_page(page); - put_page(page); + page_cache_release(page); goto repeat; } get_page(page); @@ -236,57 +260,47 @@ repeat: spin_unlock(&pagecache_lock); } -int shrink_mmap(int priority, int gfp_mask, zone_t *zone) +int shrink_mmap(int priority, int gfp_mask) { - int ret = 0, loop = 0, count; - LIST_HEAD(young); + int ret = 0, count; LIST_HEAD(old); - LIST_HEAD(forget); struct list_head * page_lru, * dispose; struct page * page = NULL; - struct zone_struct * p_zone; - int maxloop = 256 >> priority; - if (!zone) - BUG(); - - count = nr_lru_pages >> priority; - if (!count) - return ret; + count = nr_lru_pages / (priority + 1); - spin_lock(&pagemap_lru_lock); -again: /* we need pagemap_lru_lock for list_del() ... subtle code below */ + spin_lock(&pagemap_lru_lock); while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) { page = list_entry(page_lru, struct page, lru); list_del(page_lru); - p_zone = page->zone; - /* - * These two tests are there to make sure we don't free too - * many pages from the "wrong" zone. We free some anyway, - * they are the least recently used pages in the system. - * When we don't free them, leave them in &old. - */ - dispose = &old; - if (p_zone != zone && (loop > (maxloop / 4) || - p_zone->free_pages > p_zone->pages_high)) + dispose = &lru_cache; + if (PageTestandClearReferenced(page)) goto dispose_continue; - /* The page is in use, or was used very recently, put it in - * &young to make sure that we won't try to free it the next - * time */ - dispose = &young; + count--; - if (test_and_clear_bit(PG_referenced, &page->flags)) - goto dispose_continue; + /* + * I'm ambivalent on this one.. Should we try to + * maintain LRU on the LRU list, and put pages that + * are old at the end of the queue, even if that + * means that we'll re-scan then again soon and + * often waste CPU time? Or should be just let any + * pages we do not want to touch now for one reason + * or another percolate to be "young"? + * + dispose = &old; + * + */ - count--; + /* + * Avoid unscalable SMP locking for pages we can + * immediate tell are untouchable.. + */ if (!page->buffers && page_count(page) > 1) goto dispose_continue; - /* Page not used -> free it; if that fails -> &old */ - dispose = &old; if (TryLockPage(page)) goto dispose_continue; @@ -300,7 +314,10 @@ again: /* avoid freeing the page while it's locked */ get_page(page); - /* Is it a buffer page? */ + /* + * Is it a buffer page? Try to clean it up regardless + * of zone - it's old. + */ if (page->buffers) { if (!try_to_free_buffers(page)) goto unlock_continue; @@ -335,19 +352,23 @@ again: goto made_inode_progress; } + /* + * Page is from a zone we don't care about. + * Don't drop page cache entries in vain. + */ + if (page->zone->free_pages > page->zone->pages_high) + goto cache_unlock_continue; + /* is it a page-cache page? */ if (page->mapping) { if (!PageDirty(page) && !pgcache_under_min()) { - remove_page_from_inode_queue(page); - remove_page_from_hash_queue(page); - page->mapping = NULL; + __remove_inode_page(page); spin_unlock(&pagecache_lock); goto made_inode_progress; } goto cache_unlock_continue; } - dispose = &forget; printk(KERN_ERR "shrink_mmap: unknown LRU page!\n"); cache_unlock_continue: @@ -356,10 +377,6 @@ unlock_continue: spin_lock(&pagemap_lru_lock); UnlockPage(page); put_page(page); - list_add(page_lru, dispose); - continue; - - /* we're holding pagemap_lru_lock, so we can just loop again */ dispose_continue: list_add(page_lru, dispose); } @@ -375,13 +392,7 @@ made_buffer_progress: /* nr_lru_pages needs the spinlock */ nr_lru_pages--; - loop++; - /* wrong zone? not looped too often? roll again... */ - if (page->zone != zone && loop < maxloop) - goto again; - out: - list_splice(&young, &lru_cache); list_splice(&old, lru_cache.prev); spin_unlock(&pagemap_lru_lock); @@ -403,7 +414,7 @@ inside: if (page->index == offset) break; } - set_bit(PG_referenced, &page->flags); + SetPageReferenced(page); not_found: return page; } @@ -495,6 +506,26 @@ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsig } /* + * Add a page to the inode page cache. + * + * The caller must have locked the page and + * set all the page flags correctly.. + */ +void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index) +{ + if (!PageLocked(page)) + BUG(); + + get_page(page); + spin_lock(&pagecache_lock); + page->index = index; + add_page_to_inode_queue(mapping, page); + __add_page_to_hash_queue(page, page_hash(mapping, index)); + lru_cache_add(page); + spin_unlock(&pagecache_lock); +} + +/* * This adds a page to the page cache, starting out as locked, * owned by us, referenced, but not uptodate and with no errors. */ @@ -569,7 +600,7 @@ static inline int page_cache_read(struct file * file, unsigned long offset) return -ENOMEM; if (!add_to_page_cache_unique(page, mapping, offset, hash)) { - int error = mapping->a_ops->readpage(file->f_dentry, page); + int error = mapping->a_ops->readpage(file, page); page_cache_release(page); return error; } @@ -1104,7 +1135,7 @@ page_not_up_to_date: readpage: /* ... and start the actual read. The read will unlock the page. */ - error = mapping->a_ops->readpage(filp->f_dentry, page); + error = mapping->a_ops->readpage(filp, page); if (!error) { if (Page_Uptodate(page)) @@ -1486,7 +1517,7 @@ page_not_uptodate: goto success; } - if (!mapping->a_ops->readpage(file->f_dentry, page)) { + if (!mapping->a_ops->readpage(file, page)) { wait_on_page(page); if (Page_Uptodate(page)) goto success; @@ -1504,7 +1535,7 @@ page_not_uptodate: goto success; } ClearPageError(page); - if (!mapping->a_ops->readpage(file->f_dentry, page)) { + if (!mapping->a_ops->readpage(file, page)) { wait_on_page(page); if (Page_Uptodate(page)) goto success; @@ -1519,27 +1550,16 @@ page_not_uptodate: } static int filemap_write_page(struct file *file, - unsigned long index, struct page * page, int wait) { - int result; - struct dentry * dentry; - struct inode * inode; - - dentry = file->f_dentry; - inode = dentry->d_inode; - /* * If a task terminates while we're swapping the page, the vma and * and file could be released: try_to_swap_out has done a get_file. * vma/file is guaranteed to exist in the unmap/sync cases because * mmap_sem is held. */ - lock_page(page); - result = inode->i_mapping->a_ops->writepage(file, dentry, page); - UnlockPage(page); - return result; + return page->mapping->a_ops->writepage(file, page); } @@ -1551,7 +1571,7 @@ static int filemap_write_page(struct file *file, extern void wakeup_bdflush(int); int filemap_swapout(struct page * page, struct file * file) { - int retval = filemap_write_page(file, page->index, page, 0); + int retval = filemap_write_page(file, page, 0); wakeup_bdflush(0); return retval; } @@ -1597,7 +1617,9 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, printk("weirdness: pgoff=%lu index=%lu address=%lu vm_start=%lu vm_pgoff=%lu\n", pgoff, page->index, address, vma->vm_start, vma->vm_pgoff); } - error = filemap_write_page(vma->vm_file, pgoff, page, 1); + lock_page(page); + error = filemap_write_page(vma->vm_file, page, 1); + UnlockPage(page); page_cache_free(page); return error; } diff --git a/mm/highmem.c b/mm/highmem.c index 691e3df1f..3e028dced 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -24,8 +24,12 @@ unsigned long highmem_mapnr; +/* + * Take one locked page, return another low-memory locked page. + */ struct page * prepare_highmem_swapout(struct page * page) { + struct page *new_page; unsigned long regular_page; unsigned long vaddr; /* @@ -36,6 +40,14 @@ struct page * prepare_highmem_swapout(struct page * page) if (!PageHighMem(page)) return page; + /* + * Here we break the page lock, and we split the + * dirty page into two. We can unlock the old page, + * and we'll now have two of them. Too bad, it would + * have been nice to continue to potentially share + * across a fork(). + */ + UnlockPage(page); regular_page = __get_free_page(GFP_ATOMIC); if (!regular_page) return NULL; @@ -49,8 +61,9 @@ struct page * prepare_highmem_swapout(struct page * page) * we stored its data into the new regular_page. */ __free_page(page); - - return mem_map + MAP_NR(regular_page); + new_page = mem_map + MAP_NR(regular_page); + LockPage(new_page); + return new_page; } struct page * replace_with_highmem(struct page * page) diff --git a/mm/memory.c b/mm/memory.c index 84ecb57b5..f0baed69f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -824,7 +824,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, map_nr = pte_pagenr(pte); if (map_nr >= max_mapnr) goto bad_wp_page; - mm->min_flt++; old_page = mem_map + map_nr; /* @@ -855,7 +854,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); spin_unlock(&mm->page_table_lock); - return 1; + return 1; /* Minor fault */ } /* @@ -880,7 +879,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, } spin_unlock(&mm->page_table_lock); __free_page(new_page); - return 1; + return 1; /* Minor fault */ bad_wp_page: spin_unlock(&mm->page_table_lock); @@ -1049,12 +1048,9 @@ static int do_swap_page(struct mm_struct * mm, } mm->rss++; - mm->min_flt++; pte = mk_pte(page, vma->vm_page_prot); - SetPageSwapEntry(page); - /* * Freeze the "shared"ness of the page, ie page_count + swap_count. * Must lock page before transferring our swap count to already @@ -1074,7 +1070,7 @@ static int do_swap_page(struct mm_struct * mm, set_pte(page_table, pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); - return 1; + return 1; /* Minor fault */ } /* @@ -1094,13 +1090,12 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, clear_user_highpage(page, addr); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); mm->rss++; - mm->min_flt++; flush_page_to_ram(page); } set_pte(page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); - return 1; + return 1; /* Minor fault */ } /* @@ -1133,7 +1128,6 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, return 0; if (new_page == NOPAGE_OOM) return -1; - ++mm->maj_flt; ++mm->rss; /* * This silly early PAGE_DIRTY setting removes a race @@ -1156,7 +1150,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, set_pte(page_table, entry); /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); - return 1; + return 2; /* Major fault */ } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ba5ba3013..c3ea96efc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,23 +58,6 @@ static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, }; */ #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size)) -#if 0 - -static inline unsigned long classfree(zone_t *zone) -{ - unsigned long free = 0; - zone_t *z = zone->zone_pgdat->node_zones; - - while (z != zone) { - free += z->free_pages; - z++; - } - free += zone->free_pages; - return(free); -} - -#endif - /* * Buddy system. Hairy. You really aren't expected to understand this * @@ -227,67 +210,13 @@ static struct page * rmqueue(zone_t *zone, unsigned long order) return NULL; } -static int zone_balance_memory(zonelist_t *zonelist) -{ - int tried = 0, freed = 0; - zone_t **zone; - int gfp_mask = zonelist->gfp_mask; - extern wait_queue_head_t kswapd_wait; - - zone = zonelist->zones; - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - if (z->free_pages > z->pages_low) - continue; - - z->zone_wake_kswapd = 1; - wake_up_interruptible(&kswapd_wait); - - /* Are we reaching the critical stage? */ - if (!z->low_on_memory) { - /* Not yet critical, so let kswapd handle it.. */ - if (z->free_pages > z->pages_min) - continue; - z->low_on_memory = 1; - } - /* - * In the atomic allocation case we only 'kick' the - * state machine, but do not try to free pages - * ourselves. - */ - tried = 1; - freed |= try_to_free_pages(gfp_mask, z); - } - if (tried && !freed) { - if (!(gfp_mask & __GFP_HIGH)) - return 0; - } - return 1; -} - /* * This is the 'heart' of the zoned buddy allocator: */ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) { zone_t **zone = zonelist->zones; - int gfp_mask = zonelist->gfp_mask; - static int low_on_memory; - - /* - * If this is a recursive call, we'd better - * do our best to just allocate things without - * further thought. - */ - if (current->flags & PF_MEMALLOC) - goto allocate_ok; - - /* If we're a memory hog, unmap some pages */ - if (current->hog && low_on_memory && - (gfp_mask & __GFP_WAIT)) - swap_out(4, gfp_mask); + extern wait_queue_head_t kswapd_wait; /* * (If anyone calls gfp from interrupts nonatomically then it @@ -304,38 +233,67 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order) BUG(); /* Are we supposed to free memory? Don't make it worse.. */ - if (!z->zone_wake_kswapd && z->free_pages > z->pages_low) { + if (!z->zone_wake_kswapd) { struct page *page = rmqueue(z, order); - low_on_memory = 0; + if (z->free_pages < z->pages_low) { + z->zone_wake_kswapd = 1; + if (waitqueue_active(&kswapd_wait)) + wake_up_interruptible(&kswapd_wait); + } if (page) return page; } } - low_on_memory = 1; /* - * Ok, no obvious zones were available, start - * balancing things a bit.. + * Ok, we don't have any zones that don't need some + * balancing.. See if we have any that aren't critical.. */ - if (zone_balance_memory(zonelist)) { - zone = zonelist->zones; -allocate_ok: - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; - if (z->free_pages) { - struct page *page = rmqueue(z, order); - if (page) - return page; - } + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; + if (!z->low_on_memory) { + struct page *page = rmqueue(z, order); + if (z->free_pages < z->pages_min) + z->low_on_memory = 1; + if (page) + return page; } } - return NULL; -/* - * The main chunk of the balancing code is in this offline branch: - */ + /* + * Uhhuh. All the zones have been critical, which means that + * we'd better do some synchronous swap-out. kswapd has not + * been able to cope.. + */ + if (!(current->flags & PF_MEMALLOC)) { + int gfp_mask = zonelist->gfp_mask; + if (!try_to_free_pages(gfp_mask)) { + if (!(gfp_mask & __GFP_HIGH)) + goto fail; + } + } + + /* + * Final phase: allocate anything we can! + */ + zone = zonelist->zones; + for (;;) { + struct page *page; + + zone_t *z = *(zone++); + if (!z) + break; + page = rmqueue(z, order); + if (page) + return page; + } + +fail: + /* No luck.. */ + return NULL; } /* diff --git a/mm/page_io.c b/mm/page_io.c index 23acf5af4..b2b6359d0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -74,7 +74,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page, int w return 0; } if (!wait) { - set_bit(PG_decr_after, &page->flags); + SetPageDecrAfter(page); atomic_inc(&nr_async_pages); } @@ -132,6 +132,11 @@ void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf, int wait) PAGE_BUG(page); if (PageSwapCache(page)) PAGE_BUG(page); + if (page->mapping) + PAGE_BUG(page); + /* needs sync_page to wait I/O completation */ + page->mapping = &swapper_space; if (!rw_swap_page_base(rw, entry, page, wait)) UnlockPage(page); + page->mapping = NULL; } @@ -687,6 +687,9 @@ kmem_cache_create(const char *name, size_t size, size_t offset, size_t left_over; size_t align; +#if SLAB_DEBUG_SUPPORT + flags |= SLAB_POISON; +#endif /* Sanity checks... */ #if SLAB_MGMT_CHECKS if (!name) { diff --git a/mm/swap_state.c b/mm/swap_state.c index 29ba0d78b..ad686e4c3 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -47,14 +47,20 @@ void show_swap_cache_info(void) void add_to_swap_cache(struct page *page, swp_entry_t entry) { + unsigned long flags; + #ifdef SWAP_CACHE_INFO swap_cache_add_total++; #endif + if (!PageLocked(page)) + BUG(); if (PageTestandSetSwapCache(page)) BUG(); if (page->mapping) BUG(); - add_to_page_cache(page, &swapper_space, entry.val); + flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty)); + page->flags = flags | (1 << PG_referenced) | (1 << PG_uptodate); + add_to_page_cache_locked(page, &swapper_space, entry.val); } static inline void remove_from_swap_cache(struct page *page) @@ -130,9 +136,6 @@ void free_page_and_swap_cache(struct page *page) } UnlockPage(page); } - - ClearPageSwapEntry(page); - __free_page(page); } @@ -228,6 +231,7 @@ struct page * read_swap_cache_async(swp_entry_t entry, int wait) /* * Add it to the swap cache and read its contents. */ + lock_page(new_page); add_to_swap_cache(new_page, entry); rw_swap_page(READ, new_page, wait); return new_page; diff --git a/mm/swapfile.c b/mm/swapfile.c index da2dd9147..c5f8db242 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -200,49 +200,6 @@ bad_count: goto out; } -/* needs the big kernel lock */ -swp_entry_t acquire_swap_entry(struct page *page) -{ - struct swap_info_struct * p; - unsigned long offset, type; - swp_entry_t entry; - - if (!PageSwapEntry(page)) - goto new_swap_entry; - - /* We have the old entry in the page offset still */ - if (!page->index) - goto new_swap_entry; - entry.val = page->index; - type = SWP_TYPE(entry); - if (type >= nr_swapfiles) - goto new_swap_entry; - p = type + swap_info; - if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK) - goto new_swap_entry; - offset = SWP_OFFSET(entry); - if (offset >= p->max) - goto new_swap_entry; - /* Has it been re-used for something else? */ - swap_list_lock(); - swap_device_lock(p); - if (p->swap_map[offset]) - goto unlock_new_swap_entry; - - /* We're cool, we can just use the old one */ - p->swap_map[offset] = 1; - swap_device_unlock(p); - nr_swap_pages--; - swap_list_unlock(); - return entry; - -unlock_new_swap_entry: - swap_device_unlock(p); - swap_list_unlock(); -new_swap_entry: - return get_swap_page(); -} - /* * The swap entry has been read in advance, and we return 1 to indicate * that the page has been used or is no longer needed. @@ -443,8 +400,7 @@ static int try_to_unuse(unsigned int type) asmlinkage long sys_swapoff(const char * specialfile) { struct swap_info_struct * p = NULL; - struct dentry * dentry; - struct vfsmount *mnt; + struct nameidata nd; int i, type, prev; int err; @@ -452,9 +408,8 @@ asmlinkage long sys_swapoff(const char * specialfile) return -EPERM; lock_kernel(); - dentry = namei(specialfile); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) + err = user_path_walk(specialfile, &nd); + if (err) goto out; prev = -1; @@ -463,11 +418,11 @@ asmlinkage long sys_swapoff(const char * specialfile) p = swap_info + type; if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { if (p->swap_file) { - if (p->swap_file == dentry) + if (p->swap_file == nd.dentry) break; } else { - if (S_ISBLK(dentry->d_inode->i_mode) - && (p->swap_device == dentry->d_inode->i_rdev)) + if (S_ISBLK(nd.dentry->d_inode->i_mode) + && (p->swap_device == nd.dentry->d_inode->i_rdev)) break; } } @@ -509,22 +464,21 @@ asmlinkage long sys_swapoff(const char * specialfile) goto out_dput; } if (p->swap_device) - blkdev_put(dentry->d_inode->i_bdev, BDEV_SWAP); - dput(dentry); + blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP); + path_release(&nd); - dentry = p->swap_file; + nd.dentry = p->swap_file; p->swap_file = NULL; - mnt = p->swap_vfsmnt; + nd.mnt = p->swap_vfsmnt; p->swap_vfsmnt = NULL; p->swap_device = 0; vfree(p->swap_map); p->swap_map = NULL; p->flags = 0; err = 0; - mntput(mnt); out_dput: - dput(dentry); + path_release(&nd); out: unlock_kernel(); return err; @@ -637,8 +591,8 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags) if (IS_ERR(name)) goto bad_swap_2; error = 0; - if (walk_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) - error = walk_name(name, &nd); + if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) + error = path_walk(name, &nd); putname(name); if (error) goto bad_swap_2; @@ -835,8 +789,7 @@ bad_swap_2: p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) ++least_priority; - dput(nd.dentry); - mntput(nd.mnt); + path_release(&nd); out: if (swap_header) free_page((long) swap_header); diff --git a/mm/vmscan.c b/mm/vmscan.c index 691d47f18..2c07830d0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -48,7 +48,6 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un if ((page-mem_map >= max_mapnr) || PageReserved(page)) goto out_failed; - mm->swap_cnt--; /* Don't look at this pte if it's been accessed recently. */ if (pte_young(pte)) { /* @@ -56,11 +55,11 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un * tables to the global page map. */ set_pte(page_table, pte_mkold(pte)); - set_bit(PG_referenced, &page->flags); + SetPageReferenced(page); goto out_failed; } - if (PageLocked(page)) + if (TryLockPage(page)) goto out_failed; /* @@ -76,6 +75,8 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); drop_pte: + UnlockPage(page); + mm->swap_cnt--; vma->vm_mm->rss--; flush_tlb_page(vma, address); __free_page(page); @@ -107,7 +108,14 @@ drop_pte: * locks etc. */ if (!(gfp_mask & __GFP_IO)) - goto out_failed; + goto out_unlock; + + /* + * Don't do any of the expensive stuff if + * we're not really interested in this zone. + */ + if (page->zone->free_pages > page->zone->pages_high) + goto out_unlock; /* * Ok, it's really dirty. That means that @@ -134,10 +142,12 @@ drop_pte: struct file *file = vma->vm_file; if (file) get_file(file); pte_clear(page_table); + mm->swap_cnt--; vma->vm_mm->rss--; flush_tlb_page(vma, address); vmlist_access_unlock(vma->vm_mm); error = swapout(page, file); + UnlockPage(page); if (file) fput(file); if (!error) goto out_free_success; @@ -151,18 +161,20 @@ drop_pte: * we have the swap cache set up to associate the * page with that swap entry. */ - entry = acquire_swap_entry(page); + entry = get_swap_page(); if (!entry.val) - goto out_failed; /* No swap space left */ - + goto out_unlock; /* No swap space left */ + if (!(page = prepare_highmem_swapout(page))) goto out_swap_free; swap_duplicate(entry); /* One for the process, one for the swap cache */ - /* This will also lock the page */ + /* Add it to the swap cache */ add_to_swap_cache(page, entry); + /* Put the swap entry into the pte after the page is in swapcache */ + mm->swap_cnt--; vma->vm_mm->rss--; set_pte(page_table, swp_entry_to_pte(entry)); flush_tlb_page(vma, address); @@ -178,7 +190,9 @@ out_swap_free: swap_free(entry); out_failed: return 0; - +out_unlock: + UnlockPage(page); + return 0; } /* @@ -328,12 +342,11 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask) * N.B. This function returns only 0 or 1. Return values != 1 from * the lower level routines result in continued processing. */ -int swap_out(unsigned int priority, int gfp_mask) +static int swap_out(unsigned int priority, int gfp_mask) { struct task_struct * p; int counter; int __ret = 0; - int assign = 0; lock_kernel(); /* @@ -350,7 +363,7 @@ int swap_out(unsigned int priority, int gfp_mask) * Think of swap_cnt as a "shadow rss" - it tells us which process * we want to page out (always try largest first). */ - counter = nr_threads / (priority+1); + counter = (nr_threads << 1) >> (priority >> 1); if (counter < 1) counter = 1; @@ -358,12 +371,12 @@ int swap_out(unsigned int priority, int gfp_mask) unsigned long max_cnt = 0; struct mm_struct *best = NULL; int pid = 0; + int assign = 0; select: read_lock(&tasklist_lock); p = init_task.next_task; for (; p != &init_task; p = p->next_task) { struct mm_struct *mm = p->mm; - p->hog = 0; if (!p->swappable || !mm) continue; if (mm->rss <= 0) @@ -377,25 +390,6 @@ int swap_out(unsigned int priority, int gfp_mask) pid = p->pid; } } - if (assign == 1) { - /* we just assigned swap_cnt, normalise values */ - assign = 2; - p = init_task.next_task; - for (; p != &init_task; p = p->next_task) { - int i = 0; - struct mm_struct *mm = p->mm; - if (!p->swappable || !mm || mm->rss <= 0) - continue; - /* small processes are swapped out less */ - while ((mm->swap_cnt << 2 * (i + 1) < max_cnt)) - i++; - mm->swap_cnt >>= i; - mm->swap_cnt += i; /* if swap_cnt reaches 0 */ - /* we're big -> hog treatment */ - if (!i) - p->hog = 1; - } - } read_unlock(&tasklist_lock); if (!best) { if (!assign) { @@ -429,22 +423,25 @@ out: * now we need this so that we can do page allocations * without holding the kernel lock etc. * - * We want to try to free "count" pages, and we need to - * cluster them so that we get good swap-out behaviour. See - * the "free_memory()" macro for details. + * We want to try to free "count" pages, and we want to + * cluster them so that we get good swap-out behaviour. + * + * Don't try _too_ hard, though. We don't want to have bad + * latency. */ -static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone) +#define FREE_COUNT 8 +#define SWAP_COUNT 8 +static int do_try_to_free_pages(unsigned int gfp_mask) { int priority; - int count = SWAP_CLUSTER_MAX; - int ret; + int count = FREE_COUNT; /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); priority = 6; do { - while ((ret = shrink_mmap(priority, gfp_mask, zone))) { + while (shrink_mmap(priority, gfp_mask)) { if (!--count) goto done; } @@ -457,27 +454,41 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone) * shrink_mmap() almost never fail when there's * really plenty of memory free. */ - count -= shrink_dcache_memory(priority, gfp_mask, zone); - count -= shrink_icache_memory(priority, gfp_mask, zone); + count -= shrink_dcache_memory(priority, gfp_mask); + count -= shrink_icache_memory(priority, gfp_mask); if (count <= 0) goto done; - while (shm_swap(priority, gfp_mask, zone)) { + while (shm_swap(priority, gfp_mask)) { if (!--count) goto done; } } - /* Then, try to page stuff out.. - * We use swapcount here because this doesn't actually - * free pages */ - while (swap_out(priority, gfp_mask)) { - if (!--count) - goto done; + /* + * Then, try to page stuff out.. + * + * This will not actually free any pages (they get + * put in the swap cache), so we must not count this + * as a "count" success. + */ + { + int swap_count = SWAP_COUNT; + while (swap_out(priority, gfp_mask)) + if (--swap_count < 0) + break; } } while (--priority >= 0); -done: - return priority >= 0; + /* Always end on a shrink_mmap.. */ + while (shrink_mmap(0, gfp_mask)) { + if (!--count) + goto done; + } + + return 0; + +done: + return 1; } DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); @@ -497,10 +508,7 @@ DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); */ int kswapd(void *unused) { - int i; struct task_struct *tsk = current; - pg_data_t *pgdat; - zone_t *zone; tsk->session = 1; tsk->pgrp = 1; @@ -521,27 +529,30 @@ int kswapd(void *unused) */ tsk->flags |= PF_MEMALLOC; - while (1) { - /* - * If we actually get into a low-memory situation, - * the processes needing more memory will wake us - * up on a more timely basis. - */ + for (;;) { + pg_data_t *pgdat; + int something_to_do = 0; + pgdat = pgdat_list; - while (pgdat) { - for (i = 0; i < MAX_NR_ZONES; i++) { - zone = pgdat->node_zones + i; + do { + int i; + for(i = 0; i < MAX_NR_ZONES; i++) { + zone_t *zone = pgdat->node_zones+ i; + if (!zone->size || !zone->zone_wake_kswapd) + continue; + something_to_do = 1; + do_try_to_free_pages(GFP_KSWAPD); if (tsk->need_resched) schedule(); - if ((!zone->size) || (!zone->zone_wake_kswapd)) - continue; - do_try_to_free_pages(GFP_KSWAPD, zone); } + run_task_queue(&tq_disk); pgdat = pgdat->node_next; + } while (pgdat); + + if (!something_to_do) { + tsk->state = TASK_INTERRUPTIBLE; + interruptible_sleep_on(&kswapd_wait); } - run_task_queue(&tq_disk); - tsk->state = TASK_INTERRUPTIBLE; - interruptible_sleep_on(&kswapd_wait); } } @@ -560,13 +571,13 @@ int kswapd(void *unused) * can be done by just dropping cached pages without having * any deadlock issues. */ -int try_to_free_pages(unsigned int gfp_mask, zone_t *zone) +int try_to_free_pages(unsigned int gfp_mask) { int retval = 1; if (gfp_mask & __GFP_WAIT) { current->flags |= PF_MEMALLOC; - retval = do_try_to_free_pages(gfp_mask, zone); + retval = do_try_to_free_pages(gfp_mask); current->flags &= ~PF_MEMALLOC; } return retval; |