summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-05-12 21:05:59 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-05-12 21:05:59 +0000
commitba2dacab305c598cd4c34a604f8e276bf5bab5ff (patch)
tree78670a0139bf4d5ace617b29b7eba82bbc74d602 /mm
parentb77bf69998121e689c5e86cc5630d39a0a9ee6ca (diff)
Merge with Linux 2.3.99-pre7 and various other bits.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c208
-rw-r--r--mm/highmem.c17
-rw-r--r--mm/memory.c16
-rw-r--r--mm/page_alloc.c144
-rw-r--r--mm/page_io.c7
-rw-r--r--mm/slab.c3
-rw-r--r--mm/swap_state.c12
-rw-r--r--mm/swapfile.c75
-rw-r--r--mm/vmscan.c151
9 files changed, 298 insertions, 335 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index d0df8bd2c..acafb3353 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -46,7 +46,7 @@ unsigned int page_hash_bits;
struct page **page_hash_table;
struct list_head lru_cache;
-spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
/*
* NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
* the pagemap_lru_lock held.
@@ -67,7 +67,7 @@ void __add_page_to_hash_queue(struct page * page, struct page **p)
PAGE_BUG(page);
}
-static void remove_page_from_hash_queue(struct page * page)
+static inline void remove_page_from_hash_queue(struct page * page)
{
if(page->pprev_hash) {
if(page->next_hash)
@@ -92,47 +92,71 @@ static inline int sync_page(struct page *page)
* sure the page is locked and that nobody else uses it - or that usage
* is safe.
*/
+static inline void __remove_inode_page(struct page *page)
+{
+ remove_page_from_inode_queue(page);
+ remove_page_from_hash_queue(page);
+ page->mapping = NULL;
+}
+
void remove_inode_page(struct page *page)
{
if (!PageLocked(page))
PAGE_BUG(page);
- /* Initiate completion of any async operations */
- sync_page(page);
-
spin_lock(&pagecache_lock);
- remove_page_from_inode_queue(page);
- remove_page_from_hash_queue(page);
- page->mapping = NULL;
+ __remove_inode_page(page);
spin_unlock(&pagecache_lock);
}
+#define ITERATIONS 100
+
void invalidate_inode_pages(struct inode * inode)
{
struct list_head *head, *curr;
struct page * page;
+ int count;
- repeat:
head = &inode->i_mapping->pages;
- spin_lock(&pagecache_lock);
- curr = head->next;
- while (curr != head) {
- page = list_entry(curr, struct page, list);
- curr = curr->next;
+ while (head != head->next) {
+ spin_lock(&pagecache_lock);
+ spin_lock(&pagemap_lru_lock);
+ head = &inode->i_mapping->pages;
+ curr = head->next;
+ count = 0;
- /* We cannot invalidate a locked page */
- if (TryLockPage(page))
- continue;
- spin_unlock(&pagecache_lock);
+ while ((curr != head) && (count++ < ITERATIONS)) {
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
- lru_cache_del(page);
- remove_inode_page(page);
- UnlockPage(page);
- page_cache_release(page);
- goto repeat;
+ /* We cannot invalidate a locked page */
+ if (TryLockPage(page))
+ continue;
+
+ __lru_cache_del(page);
+ __remove_inode_page(page);
+ UnlockPage(page);
+ page_cache_release(page);
+ }
+
+ /* At this stage we have passed through the list
+ * once, and there may still be locked pages. */
+
+ if (head->next!=head) {
+ page = list_entry(head->next, struct page, list);
+ get_page(page);
+ spin_unlock(&pagemap_lru_lock);
+ spin_unlock(&pagecache_lock);
+ /* We need to block */
+ lock_page(page);
+ UnlockPage(page);
+ page_cache_release(page);
+ } else {
+ spin_unlock(&pagemap_lru_lock);
+ spin_unlock(&pagecache_lock);
+ }
}
- spin_unlock(&pagecache_lock);
}
/*
@@ -163,10 +187,10 @@ repeat:
/* page wholly truncated - free it */
if (offset >= start) {
if (TryLockPage(page)) {
- spin_unlock(&pagecache_lock);
get_page(page);
+ spin_unlock(&pagecache_lock);
wait_on_page(page);
- put_page(page);
+ page_cache_release(page);
goto repeat;
}
get_page(page);
@@ -236,57 +260,47 @@ repeat:
spin_unlock(&pagecache_lock);
}
-int shrink_mmap(int priority, int gfp_mask, zone_t *zone)
+int shrink_mmap(int priority, int gfp_mask)
{
- int ret = 0, loop = 0, count;
- LIST_HEAD(young);
+ int ret = 0, count;
LIST_HEAD(old);
- LIST_HEAD(forget);
struct list_head * page_lru, * dispose;
struct page * page = NULL;
- struct zone_struct * p_zone;
- int maxloop = 256 >> priority;
- if (!zone)
- BUG();
-
- count = nr_lru_pages >> priority;
- if (!count)
- return ret;
+ count = nr_lru_pages / (priority + 1);
- spin_lock(&pagemap_lru_lock);
-again:
/* we need pagemap_lru_lock for list_del() ... subtle code below */
+ spin_lock(&pagemap_lru_lock);
while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
page = list_entry(page_lru, struct page, lru);
list_del(page_lru);
- p_zone = page->zone;
- /*
- * These two tests are there to make sure we don't free too
- * many pages from the "wrong" zone. We free some anyway,
- * they are the least recently used pages in the system.
- * When we don't free them, leave them in &old.
- */
- dispose = &old;
- if (p_zone != zone && (loop > (maxloop / 4) ||
- p_zone->free_pages > p_zone->pages_high))
+ dispose = &lru_cache;
+ if (PageTestandClearReferenced(page))
goto dispose_continue;
- /* The page is in use, or was used very recently, put it in
- * &young to make sure that we won't try to free it the next
- * time */
- dispose = &young;
+ count--;
- if (test_and_clear_bit(PG_referenced, &page->flags))
- goto dispose_continue;
+ /*
+ * I'm ambivalent on this one.. Should we try to
+ * maintain LRU on the LRU list, and put pages that
+ * are old at the end of the queue, even if that
+ * means that we'll re-scan then again soon and
+ * often waste CPU time? Or should be just let any
+ * pages we do not want to touch now for one reason
+ * or another percolate to be "young"?
+ *
+ dispose = &old;
+ *
+ */
- count--;
+ /*
+ * Avoid unscalable SMP locking for pages we can
+ * immediate tell are untouchable..
+ */
if (!page->buffers && page_count(page) > 1)
goto dispose_continue;
- /* Page not used -> free it; if that fails -> &old */
- dispose = &old;
if (TryLockPage(page))
goto dispose_continue;
@@ -300,7 +314,10 @@ again:
/* avoid freeing the page while it's locked */
get_page(page);
- /* Is it a buffer page? */
+ /*
+ * Is it a buffer page? Try to clean it up regardless
+ * of zone - it's old.
+ */
if (page->buffers) {
if (!try_to_free_buffers(page))
goto unlock_continue;
@@ -335,19 +352,23 @@ again:
goto made_inode_progress;
}
+ /*
+ * Page is from a zone we don't care about.
+ * Don't drop page cache entries in vain.
+ */
+ if (page->zone->free_pages > page->zone->pages_high)
+ goto cache_unlock_continue;
+
/* is it a page-cache page? */
if (page->mapping) {
if (!PageDirty(page) && !pgcache_under_min()) {
- remove_page_from_inode_queue(page);
- remove_page_from_hash_queue(page);
- page->mapping = NULL;
+ __remove_inode_page(page);
spin_unlock(&pagecache_lock);
goto made_inode_progress;
}
goto cache_unlock_continue;
}
- dispose = &forget;
printk(KERN_ERR "shrink_mmap: unknown LRU page!\n");
cache_unlock_continue:
@@ -356,10 +377,6 @@ unlock_continue:
spin_lock(&pagemap_lru_lock);
UnlockPage(page);
put_page(page);
- list_add(page_lru, dispose);
- continue;
-
- /* we're holding pagemap_lru_lock, so we can just loop again */
dispose_continue:
list_add(page_lru, dispose);
}
@@ -375,13 +392,7 @@ made_buffer_progress:
/* nr_lru_pages needs the spinlock */
nr_lru_pages--;
- loop++;
- /* wrong zone? not looped too often? roll again... */
- if (page->zone != zone && loop < maxloop)
- goto again;
-
out:
- list_splice(&young, &lru_cache);
list_splice(&old, lru_cache.prev);
spin_unlock(&pagemap_lru_lock);
@@ -403,7 +414,7 @@ inside:
if (page->index == offset)
break;
}
- set_bit(PG_referenced, &page->flags);
+ SetPageReferenced(page);
not_found:
return page;
}
@@ -495,6 +506,26 @@ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsig
}
/*
+ * Add a page to the inode page cache.
+ *
+ * The caller must have locked the page and
+ * set all the page flags correctly..
+ */
+void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index)
+{
+ if (!PageLocked(page))
+ BUG();
+
+ get_page(page);
+ spin_lock(&pagecache_lock);
+ page->index = index;
+ add_page_to_inode_queue(mapping, page);
+ __add_page_to_hash_queue(page, page_hash(mapping, index));
+ lru_cache_add(page);
+ spin_unlock(&pagecache_lock);
+}
+
+/*
* This adds a page to the page cache, starting out as locked,
* owned by us, referenced, but not uptodate and with no errors.
*/
@@ -569,7 +600,7 @@ static inline int page_cache_read(struct file * file, unsigned long offset)
return -ENOMEM;
if (!add_to_page_cache_unique(page, mapping, offset, hash)) {
- int error = mapping->a_ops->readpage(file->f_dentry, page);
+ int error = mapping->a_ops->readpage(file, page);
page_cache_release(page);
return error;
}
@@ -1104,7 +1135,7 @@ page_not_up_to_date:
readpage:
/* ... and start the actual read. The read will unlock the page. */
- error = mapping->a_ops->readpage(filp->f_dentry, page);
+ error = mapping->a_ops->readpage(filp, page);
if (!error) {
if (Page_Uptodate(page))
@@ -1486,7 +1517,7 @@ page_not_uptodate:
goto success;
}
- if (!mapping->a_ops->readpage(file->f_dentry, page)) {
+ if (!mapping->a_ops->readpage(file, page)) {
wait_on_page(page);
if (Page_Uptodate(page))
goto success;
@@ -1504,7 +1535,7 @@ page_not_uptodate:
goto success;
}
ClearPageError(page);
- if (!mapping->a_ops->readpage(file->f_dentry, page)) {
+ if (!mapping->a_ops->readpage(file, page)) {
wait_on_page(page);
if (Page_Uptodate(page))
goto success;
@@ -1519,27 +1550,16 @@ page_not_uptodate:
}
static int filemap_write_page(struct file *file,
- unsigned long index,
struct page * page,
int wait)
{
- int result;
- struct dentry * dentry;
- struct inode * inode;
-
- dentry = file->f_dentry;
- inode = dentry->d_inode;
-
/*
* If a task terminates while we're swapping the page, the vma and
* and file could be released: try_to_swap_out has done a get_file.
* vma/file is guaranteed to exist in the unmap/sync cases because
* mmap_sem is held.
*/
- lock_page(page);
- result = inode->i_mapping->a_ops->writepage(file, dentry, page);
- UnlockPage(page);
- return result;
+ return page->mapping->a_ops->writepage(file, page);
}
@@ -1551,7 +1571,7 @@ static int filemap_write_page(struct file *file,
extern void wakeup_bdflush(int);
int filemap_swapout(struct page * page, struct file * file)
{
- int retval = filemap_write_page(file, page->index, page, 0);
+ int retval = filemap_write_page(file, page, 0);
wakeup_bdflush(0);
return retval;
}
@@ -1597,7 +1617,9 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
printk("weirdness: pgoff=%lu index=%lu address=%lu vm_start=%lu vm_pgoff=%lu\n",
pgoff, page->index, address, vma->vm_start, vma->vm_pgoff);
}
- error = filemap_write_page(vma->vm_file, pgoff, page, 1);
+ lock_page(page);
+ error = filemap_write_page(vma->vm_file, page, 1);
+ UnlockPage(page);
page_cache_free(page);
return error;
}
diff --git a/mm/highmem.c b/mm/highmem.c
index 691e3df1f..3e028dced 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -24,8 +24,12 @@
unsigned long highmem_mapnr;
+/*
+ * Take one locked page, return another low-memory locked page.
+ */
struct page * prepare_highmem_swapout(struct page * page)
{
+ struct page *new_page;
unsigned long regular_page;
unsigned long vaddr;
/*
@@ -36,6 +40,14 @@ struct page * prepare_highmem_swapout(struct page * page)
if (!PageHighMem(page))
return page;
+ /*
+ * Here we break the page lock, and we split the
+ * dirty page into two. We can unlock the old page,
+ * and we'll now have two of them. Too bad, it would
+ * have been nice to continue to potentially share
+ * across a fork().
+ */
+ UnlockPage(page);
regular_page = __get_free_page(GFP_ATOMIC);
if (!regular_page)
return NULL;
@@ -49,8 +61,9 @@ struct page * prepare_highmem_swapout(struct page * page)
* we stored its data into the new regular_page.
*/
__free_page(page);
-
- return mem_map + MAP_NR(regular_page);
+ new_page = mem_map + MAP_NR(regular_page);
+ LockPage(new_page);
+ return new_page;
}
struct page * replace_with_highmem(struct page * page)
diff --git a/mm/memory.c b/mm/memory.c
index 84ecb57b5..f0baed69f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -824,7 +824,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
map_nr = pte_pagenr(pte);
if (map_nr >= max_mapnr)
goto bad_wp_page;
- mm->min_flt++;
old_page = mem_map + map_nr;
/*
@@ -855,7 +854,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
flush_cache_page(vma, address);
establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
spin_unlock(&mm->page_table_lock);
- return 1;
+ return 1; /* Minor fault */
}
/*
@@ -880,7 +879,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
}
spin_unlock(&mm->page_table_lock);
__free_page(new_page);
- return 1;
+ return 1; /* Minor fault */
bad_wp_page:
spin_unlock(&mm->page_table_lock);
@@ -1049,12 +1048,9 @@ static int do_swap_page(struct mm_struct * mm,
}
mm->rss++;
- mm->min_flt++;
pte = mk_pte(page, vma->vm_page_prot);
- SetPageSwapEntry(page);
-
/*
* Freeze the "shared"ness of the page, ie page_count + swap_count.
* Must lock page before transferring our swap count to already
@@ -1074,7 +1070,7 @@ static int do_swap_page(struct mm_struct * mm,
set_pte(page_table, pte);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
- return 1;
+ return 1; /* Minor fault */
}
/*
@@ -1094,13 +1090,12 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
clear_user_highpage(page, addr);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
mm->rss++;
- mm->min_flt++;
flush_page_to_ram(page);
}
set_pte(page_table, entry);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
- return 1;
+ return 1; /* Minor fault */
}
/*
@@ -1133,7 +1128,6 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
return 0;
if (new_page == NOPAGE_OOM)
return -1;
- ++mm->maj_flt;
++mm->rss;
/*
* This silly early PAGE_DIRTY setting removes a race
@@ -1156,7 +1150,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
set_pte(page_table, entry);
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
- return 1;
+ return 2; /* Major fault */
}
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ba5ba3013..c3ea96efc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -58,23 +58,6 @@ static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
*/
#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size))
-#if 0
-
-static inline unsigned long classfree(zone_t *zone)
-{
- unsigned long free = 0;
- zone_t *z = zone->zone_pgdat->node_zones;
-
- while (z != zone) {
- free += z->free_pages;
- z++;
- }
- free += zone->free_pages;
- return(free);
-}
-
-#endif
-
/*
* Buddy system. Hairy. You really aren't expected to understand this
*
@@ -227,67 +210,13 @@ static struct page * rmqueue(zone_t *zone, unsigned long order)
return NULL;
}
-static int zone_balance_memory(zonelist_t *zonelist)
-{
- int tried = 0, freed = 0;
- zone_t **zone;
- int gfp_mask = zonelist->gfp_mask;
- extern wait_queue_head_t kswapd_wait;
-
- zone = zonelist->zones;
- for (;;) {
- zone_t *z = *(zone++);
- if (!z)
- break;
- if (z->free_pages > z->pages_low)
- continue;
-
- z->zone_wake_kswapd = 1;
- wake_up_interruptible(&kswapd_wait);
-
- /* Are we reaching the critical stage? */
- if (!z->low_on_memory) {
- /* Not yet critical, so let kswapd handle it.. */
- if (z->free_pages > z->pages_min)
- continue;
- z->low_on_memory = 1;
- }
- /*
- * In the atomic allocation case we only 'kick' the
- * state machine, but do not try to free pages
- * ourselves.
- */
- tried = 1;
- freed |= try_to_free_pages(gfp_mask, z);
- }
- if (tried && !freed) {
- if (!(gfp_mask & __GFP_HIGH))
- return 0;
- }
- return 1;
-}
-
/*
* This is the 'heart' of the zoned buddy allocator:
*/
struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
{
zone_t **zone = zonelist->zones;
- int gfp_mask = zonelist->gfp_mask;
- static int low_on_memory;
-
- /*
- * If this is a recursive call, we'd better
- * do our best to just allocate things without
- * further thought.
- */
- if (current->flags & PF_MEMALLOC)
- goto allocate_ok;
-
- /* If we're a memory hog, unmap some pages */
- if (current->hog && low_on_memory &&
- (gfp_mask & __GFP_WAIT))
- swap_out(4, gfp_mask);
+ extern wait_queue_head_t kswapd_wait;
/*
* (If anyone calls gfp from interrupts nonatomically then it
@@ -304,38 +233,67 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
BUG();
/* Are we supposed to free memory? Don't make it worse.. */
- if (!z->zone_wake_kswapd && z->free_pages > z->pages_low) {
+ if (!z->zone_wake_kswapd) {
struct page *page = rmqueue(z, order);
- low_on_memory = 0;
+ if (z->free_pages < z->pages_low) {
+ z->zone_wake_kswapd = 1;
+ if (waitqueue_active(&kswapd_wait))
+ wake_up_interruptible(&kswapd_wait);
+ }
if (page)
return page;
}
}
- low_on_memory = 1;
/*
- * Ok, no obvious zones were available, start
- * balancing things a bit..
+ * Ok, we don't have any zones that don't need some
+ * balancing.. See if we have any that aren't critical..
*/
- if (zone_balance_memory(zonelist)) {
- zone = zonelist->zones;
-allocate_ok:
- for (;;) {
- zone_t *z = *(zone++);
- if (!z)
- break;
- if (z->free_pages) {
- struct page *page = rmqueue(z, order);
- if (page)
- return page;
- }
+ zone = zonelist->zones;
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
+ if (!z->low_on_memory) {
+ struct page *page = rmqueue(z, order);
+ if (z->free_pages < z->pages_min)
+ z->low_on_memory = 1;
+ if (page)
+ return page;
}
}
- return NULL;
-/*
- * The main chunk of the balancing code is in this offline branch:
- */
+ /*
+ * Uhhuh. All the zones have been critical, which means that
+ * we'd better do some synchronous swap-out. kswapd has not
+ * been able to cope..
+ */
+ if (!(current->flags & PF_MEMALLOC)) {
+ int gfp_mask = zonelist->gfp_mask;
+ if (!try_to_free_pages(gfp_mask)) {
+ if (!(gfp_mask & __GFP_HIGH))
+ goto fail;
+ }
+ }
+
+ /*
+ * Final phase: allocate anything we can!
+ */
+ zone = zonelist->zones;
+ for (;;) {
+ struct page *page;
+
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
+ page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
+
+fail:
+ /* No luck.. */
+ return NULL;
}
/*
diff --git a/mm/page_io.c b/mm/page_io.c
index 23acf5af4..b2b6359d0 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -74,7 +74,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page, int w
return 0;
}
if (!wait) {
- set_bit(PG_decr_after, &page->flags);
+ SetPageDecrAfter(page);
atomic_inc(&nr_async_pages);
}
@@ -132,6 +132,11 @@ void rw_swap_page_nolock(int rw, swp_entry_t entry, char *buf, int wait)
PAGE_BUG(page);
if (PageSwapCache(page))
PAGE_BUG(page);
+ if (page->mapping)
+ PAGE_BUG(page);
+ /* needs sync_page to wait I/O completation */
+ page->mapping = &swapper_space;
if (!rw_swap_page_base(rw, entry, page, wait))
UnlockPage(page);
+ page->mapping = NULL;
}
diff --git a/mm/slab.c b/mm/slab.c
index 68bbb7d17..055282872 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -687,6 +687,9 @@ kmem_cache_create(const char *name, size_t size, size_t offset,
size_t left_over;
size_t align;
+#if SLAB_DEBUG_SUPPORT
+ flags |= SLAB_POISON;
+#endif
/* Sanity checks... */
#if SLAB_MGMT_CHECKS
if (!name) {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 29ba0d78b..ad686e4c3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -47,14 +47,20 @@ void show_swap_cache_info(void)
void add_to_swap_cache(struct page *page, swp_entry_t entry)
{
+ unsigned long flags;
+
#ifdef SWAP_CACHE_INFO
swap_cache_add_total++;
#endif
+ if (!PageLocked(page))
+ BUG();
if (PageTestandSetSwapCache(page))
BUG();
if (page->mapping)
BUG();
- add_to_page_cache(page, &swapper_space, entry.val);
+ flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty));
+ page->flags = flags | (1 << PG_referenced) | (1 << PG_uptodate);
+ add_to_page_cache_locked(page, &swapper_space, entry.val);
}
static inline void remove_from_swap_cache(struct page *page)
@@ -130,9 +136,6 @@ void free_page_and_swap_cache(struct page *page)
}
UnlockPage(page);
}
-
- ClearPageSwapEntry(page);
-
__free_page(page);
}
@@ -228,6 +231,7 @@ struct page * read_swap_cache_async(swp_entry_t entry, int wait)
/*
* Add it to the swap cache and read its contents.
*/
+ lock_page(new_page);
add_to_swap_cache(new_page, entry);
rw_swap_page(READ, new_page, wait);
return new_page;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index da2dd9147..c5f8db242 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -200,49 +200,6 @@ bad_count:
goto out;
}
-/* needs the big kernel lock */
-swp_entry_t acquire_swap_entry(struct page *page)
-{
- struct swap_info_struct * p;
- unsigned long offset, type;
- swp_entry_t entry;
-
- if (!PageSwapEntry(page))
- goto new_swap_entry;
-
- /* We have the old entry in the page offset still */
- if (!page->index)
- goto new_swap_entry;
- entry.val = page->index;
- type = SWP_TYPE(entry);
- if (type >= nr_swapfiles)
- goto new_swap_entry;
- p = type + swap_info;
- if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
- goto new_swap_entry;
- offset = SWP_OFFSET(entry);
- if (offset >= p->max)
- goto new_swap_entry;
- /* Has it been re-used for something else? */
- swap_list_lock();
- swap_device_lock(p);
- if (p->swap_map[offset])
- goto unlock_new_swap_entry;
-
- /* We're cool, we can just use the old one */
- p->swap_map[offset] = 1;
- swap_device_unlock(p);
- nr_swap_pages--;
- swap_list_unlock();
- return entry;
-
-unlock_new_swap_entry:
- swap_device_unlock(p);
- swap_list_unlock();
-new_swap_entry:
- return get_swap_page();
-}
-
/*
* The swap entry has been read in advance, and we return 1 to indicate
* that the page has been used or is no longer needed.
@@ -443,8 +400,7 @@ static int try_to_unuse(unsigned int type)
asmlinkage long sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p = NULL;
- struct dentry * dentry;
- struct vfsmount *mnt;
+ struct nameidata nd;
int i, type, prev;
int err;
@@ -452,9 +408,8 @@ asmlinkage long sys_swapoff(const char * specialfile)
return -EPERM;
lock_kernel();
- dentry = namei(specialfile);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
+ err = user_path_walk(specialfile, &nd);
+ if (err)
goto out;
prev = -1;
@@ -463,11 +418,11 @@ asmlinkage long sys_swapoff(const char * specialfile)
p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
if (p->swap_file) {
- if (p->swap_file == dentry)
+ if (p->swap_file == nd.dentry)
break;
} else {
- if (S_ISBLK(dentry->d_inode->i_mode)
- && (p->swap_device == dentry->d_inode->i_rdev))
+ if (S_ISBLK(nd.dentry->d_inode->i_mode)
+ && (p->swap_device == nd.dentry->d_inode->i_rdev))
break;
}
}
@@ -509,22 +464,21 @@ asmlinkage long sys_swapoff(const char * specialfile)
goto out_dput;
}
if (p->swap_device)
- blkdev_put(dentry->d_inode->i_bdev, BDEV_SWAP);
- dput(dentry);
+ blkdev_put(nd.dentry->d_inode->i_bdev, BDEV_SWAP);
+ path_release(&nd);
- dentry = p->swap_file;
+ nd.dentry = p->swap_file;
p->swap_file = NULL;
- mnt = p->swap_vfsmnt;
+ nd.mnt = p->swap_vfsmnt;
p->swap_vfsmnt = NULL;
p->swap_device = 0;
vfree(p->swap_map);
p->swap_map = NULL;
p->flags = 0;
err = 0;
- mntput(mnt);
out_dput:
- dput(dentry);
+ path_release(&nd);
out:
unlock_kernel();
return err;
@@ -637,8 +591,8 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (IS_ERR(name))
goto bad_swap_2;
error = 0;
- if (walk_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
- error = walk_name(name, &nd);
+ if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+ error = path_walk(name, &nd);
putname(name);
if (error)
goto bad_swap_2;
@@ -835,8 +789,7 @@ bad_swap_2:
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
- dput(nd.dentry);
- mntput(nd.mnt);
+ path_release(&nd);
out:
if (swap_header)
free_page((long) swap_header);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 691d47f18..2c07830d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -48,7 +48,6 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
if ((page-mem_map >= max_mapnr) || PageReserved(page))
goto out_failed;
- mm->swap_cnt--;
/* Don't look at this pte if it's been accessed recently. */
if (pte_young(pte)) {
/*
@@ -56,11 +55,11 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
* tables to the global page map.
*/
set_pte(page_table, pte_mkold(pte));
- set_bit(PG_referenced, &page->flags);
+ SetPageReferenced(page);
goto out_failed;
}
- if (PageLocked(page))
+ if (TryLockPage(page))
goto out_failed;
/*
@@ -76,6 +75,8 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
+ UnlockPage(page);
+ mm->swap_cnt--;
vma->vm_mm->rss--;
flush_tlb_page(vma, address);
__free_page(page);
@@ -107,7 +108,14 @@ drop_pte:
* locks etc.
*/
if (!(gfp_mask & __GFP_IO))
- goto out_failed;
+ goto out_unlock;
+
+ /*
+ * Don't do any of the expensive stuff if
+ * we're not really interested in this zone.
+ */
+ if (page->zone->free_pages > page->zone->pages_high)
+ goto out_unlock;
/*
* Ok, it's really dirty. That means that
@@ -134,10 +142,12 @@ drop_pte:
struct file *file = vma->vm_file;
if (file) get_file(file);
pte_clear(page_table);
+ mm->swap_cnt--;
vma->vm_mm->rss--;
flush_tlb_page(vma, address);
vmlist_access_unlock(vma->vm_mm);
error = swapout(page, file);
+ UnlockPage(page);
if (file) fput(file);
if (!error)
goto out_free_success;
@@ -151,18 +161,20 @@ drop_pte:
* we have the swap cache set up to associate the
* page with that swap entry.
*/
- entry = acquire_swap_entry(page);
+ entry = get_swap_page();
if (!entry.val)
- goto out_failed; /* No swap space left */
-
+ goto out_unlock; /* No swap space left */
+
if (!(page = prepare_highmem_swapout(page)))
goto out_swap_free;
swap_duplicate(entry); /* One for the process, one for the swap cache */
- /* This will also lock the page */
+ /* Add it to the swap cache */
add_to_swap_cache(page, entry);
+
/* Put the swap entry into the pte after the page is in swapcache */
+ mm->swap_cnt--;
vma->vm_mm->rss--;
set_pte(page_table, swp_entry_to_pte(entry));
flush_tlb_page(vma, address);
@@ -178,7 +190,9 @@ out_swap_free:
swap_free(entry);
out_failed:
return 0;
-
+out_unlock:
+ UnlockPage(page);
+ return 0;
}
/*
@@ -328,12 +342,11 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
* N.B. This function returns only 0 or 1. Return values != 1 from
* the lower level routines result in continued processing.
*/
-int swap_out(unsigned int priority, int gfp_mask)
+static int swap_out(unsigned int priority, int gfp_mask)
{
struct task_struct * p;
int counter;
int __ret = 0;
- int assign = 0;
lock_kernel();
/*
@@ -350,7 +363,7 @@ int swap_out(unsigned int priority, int gfp_mask)
* Think of swap_cnt as a "shadow rss" - it tells us which process
* we want to page out (always try largest first).
*/
- counter = nr_threads / (priority+1);
+ counter = (nr_threads << 1) >> (priority >> 1);
if (counter < 1)
counter = 1;
@@ -358,12 +371,12 @@ int swap_out(unsigned int priority, int gfp_mask)
unsigned long max_cnt = 0;
struct mm_struct *best = NULL;
int pid = 0;
+ int assign = 0;
select:
read_lock(&tasklist_lock);
p = init_task.next_task;
for (; p != &init_task; p = p->next_task) {
struct mm_struct *mm = p->mm;
- p->hog = 0;
if (!p->swappable || !mm)
continue;
if (mm->rss <= 0)
@@ -377,25 +390,6 @@ int swap_out(unsigned int priority, int gfp_mask)
pid = p->pid;
}
}
- if (assign == 1) {
- /* we just assigned swap_cnt, normalise values */
- assign = 2;
- p = init_task.next_task;
- for (; p != &init_task; p = p->next_task) {
- int i = 0;
- struct mm_struct *mm = p->mm;
- if (!p->swappable || !mm || mm->rss <= 0)
- continue;
- /* small processes are swapped out less */
- while ((mm->swap_cnt << 2 * (i + 1) < max_cnt))
- i++;
- mm->swap_cnt >>= i;
- mm->swap_cnt += i; /* if swap_cnt reaches 0 */
- /* we're big -> hog treatment */
- if (!i)
- p->hog = 1;
- }
- }
read_unlock(&tasklist_lock);
if (!best) {
if (!assign) {
@@ -429,22 +423,25 @@ out:
* now we need this so that we can do page allocations
* without holding the kernel lock etc.
*
- * We want to try to free "count" pages, and we need to
- * cluster them so that we get good swap-out behaviour. See
- * the "free_memory()" macro for details.
+ * We want to try to free "count" pages, and we want to
+ * cluster them so that we get good swap-out behaviour.
+ *
+ * Don't try _too_ hard, though. We don't want to have bad
+ * latency.
*/
-static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone)
+#define FREE_COUNT 8
+#define SWAP_COUNT 8
+static int do_try_to_free_pages(unsigned int gfp_mask)
{
int priority;
- int count = SWAP_CLUSTER_MAX;
- int ret;
+ int count = FREE_COUNT;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
priority = 6;
do {
- while ((ret = shrink_mmap(priority, gfp_mask, zone))) {
+ while (shrink_mmap(priority, gfp_mask)) {
if (!--count)
goto done;
}
@@ -457,27 +454,41 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone)
* shrink_mmap() almost never fail when there's
* really plenty of memory free.
*/
- count -= shrink_dcache_memory(priority, gfp_mask, zone);
- count -= shrink_icache_memory(priority, gfp_mask, zone);
+ count -= shrink_dcache_memory(priority, gfp_mask);
+ count -= shrink_icache_memory(priority, gfp_mask);
if (count <= 0)
goto done;
- while (shm_swap(priority, gfp_mask, zone)) {
+ while (shm_swap(priority, gfp_mask)) {
if (!--count)
goto done;
}
}
- /* Then, try to page stuff out..
- * We use swapcount here because this doesn't actually
- * free pages */
- while (swap_out(priority, gfp_mask)) {
- if (!--count)
- goto done;
+ /*
+ * Then, try to page stuff out..
+ *
+ * This will not actually free any pages (they get
+ * put in the swap cache), so we must not count this
+ * as a "count" success.
+ */
+ {
+ int swap_count = SWAP_COUNT;
+ while (swap_out(priority, gfp_mask))
+ if (--swap_count < 0)
+ break;
}
} while (--priority >= 0);
-done:
- return priority >= 0;
+ /* Always end on a shrink_mmap.. */
+ while (shrink_mmap(0, gfp_mask)) {
+ if (!--count)
+ goto done;
+ }
+
+ return 0;
+
+done:
+ return 1;
}
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
@@ -497,10 +508,7 @@ DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
*/
int kswapd(void *unused)
{
- int i;
struct task_struct *tsk = current;
- pg_data_t *pgdat;
- zone_t *zone;
tsk->session = 1;
tsk->pgrp = 1;
@@ -521,27 +529,30 @@ int kswapd(void *unused)
*/
tsk->flags |= PF_MEMALLOC;
- while (1) {
- /*
- * If we actually get into a low-memory situation,
- * the processes needing more memory will wake us
- * up on a more timely basis.
- */
+ for (;;) {
+ pg_data_t *pgdat;
+ int something_to_do = 0;
+
pgdat = pgdat_list;
- while (pgdat) {
- for (i = 0; i < MAX_NR_ZONES; i++) {
- zone = pgdat->node_zones + i;
+ do {
+ int i;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone_t *zone = pgdat->node_zones+ i;
+ if (!zone->size || !zone->zone_wake_kswapd)
+ continue;
+ something_to_do = 1;
+ do_try_to_free_pages(GFP_KSWAPD);
if (tsk->need_resched)
schedule();
- if ((!zone->size) || (!zone->zone_wake_kswapd))
- continue;
- do_try_to_free_pages(GFP_KSWAPD, zone);
}
+ run_task_queue(&tq_disk);
pgdat = pgdat->node_next;
+ } while (pgdat);
+
+ if (!something_to_do) {
+ tsk->state = TASK_INTERRUPTIBLE;
+ interruptible_sleep_on(&kswapd_wait);
}
- run_task_queue(&tq_disk);
- tsk->state = TASK_INTERRUPTIBLE;
- interruptible_sleep_on(&kswapd_wait);
}
}
@@ -560,13 +571,13 @@ int kswapd(void *unused)
* can be done by just dropping cached pages without having
* any deadlock issues.
*/
-int try_to_free_pages(unsigned int gfp_mask, zone_t *zone)
+int try_to_free_pages(unsigned int gfp_mask)
{
int retval = 1;
if (gfp_mask & __GFP_WAIT) {
current->flags |= PF_MEMALLOC;
- retval = do_try_to_free_pages(gfp_mask, zone);
+ retval = do_try_to_free_pages(gfp_mask);
current->flags &= ~PF_MEMALLOC;
}
return retval;