summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-06-19 22:45:37 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-06-19 22:45:37 +0000
commit6d403070f28cd44860fdb3a53be5da0275c65cf4 (patch)
tree0d0e7fe7b5fb7568d19e11d7d862b77a866ce081 /mm
parentecf1bf5f6c2e668d03b0a9fb026db7aa41e292e1 (diff)
Merge with 2.4.0-test1-ac21 + pile of MIPS cleanups to make merging
possible. Chainsawed RM200 kernel to compile again. Jazz machine status unknown.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c194
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mmap.c29
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/page_alloc.c65
-rw-r--r--mm/slab.c19
-rw-r--r--mm/swap_state.c6
-rw-r--r--mm/swapfile.c14
-rw-r--r--mm/vmscan.c37
9 files changed, 244 insertions, 126 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index b1e2b8547..ba0048cb8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -56,6 +56,8 @@ spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED;
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
+#define min(a,b) ((a < b) ? a : b)
+
void __add_page_to_hash_queue(struct page * page, struct page **p)
{
atomic_inc(&page_cache_size);
@@ -90,10 +92,16 @@ static inline int sync_page(struct page *page)
/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
- * is safe.
+ * is safe. We need that the page don't have any buffers.
*/
static inline void __remove_inode_page(struct page *page)
{
+ if (!PageLocked(page))
+ PAGE_BUG(page);
+
+ if (page->buffers)
+ BUG();
+
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
page->mapping = NULL;
@@ -101,9 +109,6 @@ static inline void __remove_inode_page(struct page *page)
void remove_inode_page(struct page *page)
{
- if (!PageLocked(page))
- PAGE_BUG(page);
-
spin_lock(&pagecache_lock);
__remove_inode_page(page);
spin_unlock(&pagecache_lock);
@@ -114,16 +119,16 @@ void remove_inode_page(struct page *page)
* @inode: the inode which pages we want to invalidate
*
* This function only removes the unlocked pages, if you want to
- * remove all the pages of one inode, you must call truncate_inode_pages.
+ * remove all the pages of one inode, you must call
+ * truncate_inode_pages. This function is not supposed to be called
+ * by block based filesystems.
*/
-
void invalidate_inode_pages(struct inode * inode)
{
struct list_head *head, *curr;
struct page * page;
head = &inode->i_mapping->pages;
-
spin_lock(&pagecache_lock);
spin_lock(&pagemap_lru_lock);
curr = head->next;
@@ -135,20 +140,53 @@ void invalidate_inode_pages(struct inode * inode)
/* We cannot invalidate a locked page */
if (TryLockPage(page))
continue;
+ /* We _should not be called_ by block based filesystems */
+ if (page->buffers)
+ BUG();
- __lru_cache_del(page);
__remove_inode_page(page);
+ __lru_cache_del(page);
UnlockPage(page);
page_cache_release(page);
}
-
spin_unlock(&pagemap_lru_lock);
spin_unlock(&pagecache_lock);
}
-/*
+static inline void truncate_partial_page(struct page *page, unsigned partial)
+{
+ memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
+
+ if (page->buffers)
+ block_flushpage(page, partial);
+
+}
+
+static inline void truncate_complete_page(struct page *page)
+{
+ if (page->buffers)
+ block_destroy_buffers(page);
+ lru_cache_del(page);
+
+ /*
+ * We remove the page from the page cache _after_ we have
+ * destroyed all buffer-cache references to it. Otherwise some
+ * other process might think this inode page is not in the
+ * page cache and creates a buffer-cache alias to it causing
+ * all sorts of fun problems ...
+ */
+ remove_inode_page(page);
+ page_cache_release(page);
+}
+
+/**
+ * truncate_inode_pages - truncate *all* the pages from an offset
+ * @mapping: mapping to truncate
+ * @lstart: offset from with to truncate
+ *
* Truncate the page cache at a set offset, removing the pages
* that are beyond that offset (and zeroing out partial pages).
+ * If any page is locked we wait for it to become unlocked.
*/
void truncate_inode_pages(struct address_space * mapping, loff_t lstart)
{
@@ -168,11 +206,10 @@ repeat:
page = list_entry(curr, struct page, list);
curr = curr->next;
-
offset = page->index;
- /* page wholly truncated - free it */
- if (offset >= start) {
+ /* Is one of the pages to truncate? */
+ if ((offset >= start) || (partial && (offset + 1) == start)) {
if (TryLockPage(page)) {
page_cache_get(page);
spin_unlock(&pagecache_lock);
@@ -183,22 +220,14 @@ repeat:
page_cache_get(page);
spin_unlock(&pagecache_lock);
- if (!page->buffers || block_flushpage(page, 0))
- lru_cache_del(page);
-
- /*
- * We remove the page from the page cache
- * _after_ we have destroyed all buffer-cache
- * references to it. Otherwise some other process
- * might think this inode page is not in the
- * page cache and creates a buffer-cache alias
- * to it causing all sorts of fun problems ...
- */
- remove_inode_page(page);
+ if (partial && (offset + 1) == start) {
+ truncate_partial_page(page, partial);
+ partial = 0;
+ } else
+ truncate_complete_page(page);
UnlockPage(page);
page_cache_release(page);
- page_cache_release(page);
/*
* We have done things without the pagecache lock,
@@ -209,38 +238,59 @@ repeat:
*/
goto repeat;
}
- /*
- * there is only one partial page possible.
- */
- if (!partial)
- continue;
+ }
+ spin_unlock(&pagecache_lock);
+}
- /* and it's the one preceeding the first wholly truncated page */
- if ((offset + 1) != start)
- continue;
+/**
+ * truncate_all_inode_pages - truncate *all* the pages
+ * @mapping: mapping to truncate
+ *
+ * Truncate all the inode pages. If any page is locked we wait for it
+ * to become unlocked. This function can block.
+ */
+void truncate_all_inode_pages(struct address_space * mapping)
+{
+ struct list_head *head, *curr;
+ struct page * page;
+
+ head = &mapping->pages;
+repeat:
+ spin_lock(&pagecache_lock);
+ spin_lock(&pagemap_lru_lock);
+ curr = head->next;
+
+ while (curr != head) {
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
- /* partial truncate, clear end of page */
if (TryLockPage(page)) {
+ page_cache_get(page);
+ spin_unlock(&pagemap_lru_lock);
spin_unlock(&pagecache_lock);
+ wait_on_page(page);
+ page_cache_release(page);
goto repeat;
}
- page_cache_get(page);
- spin_unlock(&pagecache_lock);
-
- memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
- if (page->buffers)
- block_flushpage(page, partial);
-
- partial = 0;
-
- /*
- * we have dropped the spinlock so we have to
- * restart.
- */
+ if (page->buffers) {
+ page_cache_get(page);
+ spin_unlock(&pagemap_lru_lock);
+ spin_unlock(&pagecache_lock);
+ block_destroy_buffers(page);
+ remove_inode_page(page);
+ lru_cache_del(page);
+ page_cache_release(page);
+ UnlockPage(page);
+ page_cache_release(page);
+ goto repeat;
+ }
+ __lru_cache_del(page);
+ __remove_inode_page(page);
UnlockPage(page);
page_cache_release(page);
- goto repeat;
}
+
+ spin_unlock(&pagemap_lru_lock);
spin_unlock(&pagecache_lock);
}
@@ -264,7 +314,15 @@ int shrink_mmap(int priority, int gfp_mask)
page = list_entry(page_lru, struct page, lru);
list_del(page_lru);
- if (PageTestandClearReferenced(page))
+ if (PageTestandClearReferenced(page)) {
+ page->age += PG_AGE_ADV;
+ if (page->age > PG_AGE_MAX)
+ page->age = PG_AGE_MAX;
+ goto dispose_continue;
+ }
+ page->age -= min(PG_AGE_DECL, page->age);
+
+ if (page->age)
goto dispose_continue;
count--;
@@ -322,17 +380,23 @@ int shrink_mmap(int priority, int gfp_mask)
* were to be marked referenced..
*/
if (PageSwapCache(page)) {
- spin_unlock(&pagecache_lock);
- __delete_from_swap_cache(page);
- goto made_inode_progress;
- }
-
- /*
- * Page is from a zone we don't care about.
- * Don't drop page cache entries in vain.
- */
- if (page->zone->free_pages > page->zone->pages_high)
+ if (!PageDirty(page)) {
+ spin_unlock(&pagecache_lock);
+ __delete_from_swap_cache(page);
+ goto made_inode_progress;
+ }
+ /* PageDeferswap -> we swap out the page now. */
+ if (gfp_mask & __GFP_IO) {
+ spin_unlock(&pagecache_lock);
+ /* Do NOT unlock the page ... brw_page does. */
+ ClearPageDirty(page);
+ rw_swap_page(WRITE, page, 0);
+ spin_lock(&pagemap_lru_lock);
+ page_cache_release(page);
+ goto dispose_continue;
+ }
goto cache_unlock_continue;
+ }
/* is it a page-cache page? */
if (page->mapping) {
@@ -1744,7 +1808,7 @@ static int msync_interval(struct vm_area_struct * vma,
if (!error && (flags & MS_SYNC)) {
struct file * file = vma->vm_file;
if (file && file->f_op && file->f_op->fsync)
- error = file->f_op->fsync(file, file->f_dentry);
+ error = file->f_op->fsync(file, file->f_dentry, 1);
}
return error;
}
@@ -2483,7 +2547,7 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
if (count) {
remove_suid(inode);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- mark_inode_dirty(inode);
+ mark_inode_dirty_sync(inode);
}
while (count) {
@@ -2540,7 +2604,13 @@ unlock:
if (cached_page)
page_cache_free(cached_page);
+ /* For now, when the user asks for O_SYNC, we'll actually
+ * provide O_DSYNC. */
+ if ((status >= 0) && (file->f_flags & O_SYNC))
+ status = generic_osync_inode(inode, 1); /* 1 means datasync */
+
err = written ? written : status;
+
out:
up(&inode->i_sem);
return err;
diff --git a/mm/memory.c b/mm/memory.c
index de7dc07f8..e2609758e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -847,7 +847,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
UnlockPage(old_page);
break;
}
- delete_from_swap_cache_nolock(old_page);
+ SetPageDirty(old_page);
UnlockPage(old_page);
/* FallThrough */
case 1:
@@ -1058,7 +1058,7 @@ static int do_swap_page(struct mm_struct * mm,
*/
lock_page(page);
swap_free(entry);
- if (write_access && !is_page_shared(page)) {
+ if (write_access && !is_page_shared(page) && nr_free_highpages()) {
delete_from_swap_cache_nolock(page);
UnlockPage(page);
page = replace_with_highmem(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8a81bfb20..9edabc02e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -166,6 +166,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
{
struct mm_struct * mm = current->mm;
struct vm_area_struct * vma;
+ int correct_wcount = 0;
int error;
if (file && (!file->f_op || !file->f_op->mmap))
@@ -296,26 +297,15 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
goto free_vma;
if (file) {
- int correct_wcount = 0;
if (vma->vm_flags & VM_DENYWRITE) {
- if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
- error = -ETXTBSY;
+ error = deny_write_access(file);
+ if (error)
goto free_vma;
- }
- /* f_op->mmap might possibly sleep
- * (generic_file_mmap doesn't, but other code
- * might). In any case, this takes care of any
- * race that this might cause.
- */
- atomic_dec(&file->f_dentry->d_inode->i_writecount);
correct_wcount = 1;
}
vma->vm_file = file;
get_file(file);
error = file->f_op->mmap(file, vma);
- /* Fix up the count if necessary, then check for an error */
- if (correct_wcount)
- atomic_inc(&file->f_dentry->d_inode->i_writecount);
if (error)
goto unmap_and_free_vma;
} else if (flags & MAP_SHARED) {
@@ -330,6 +320,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
addr = vma->vm_start; /* can addr have changed?? */
vmlist_modify_lock(mm);
insert_vm_struct(mm, vma);
+ if (correct_wcount)
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
merge_segments(mm, vma->vm_start, vma->vm_end);
vmlist_modify_unlock(mm);
@@ -341,6 +333,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
return addr;
unmap_and_free_vma:
+ if (correct_wcount)
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
vma->vm_file = NULL;
fput(file);
/* Undo any partial mapping done by a device driver. */
@@ -692,9 +686,11 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
* so release them, and unmap the page range..
* If the one of the segments is only being partially unmapped,
* it will put new vm_area_struct(s) into the address space.
+ * In that case we have to be careful with VM_DENYWRITE.
*/
while ((mpnt = free) != NULL) {
unsigned long st, end, size;
+ struct file *file = NULL;
free = free->vm_next;
@@ -706,6 +702,11 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
if (mpnt->vm_ops && mpnt->vm_ops->unmap)
mpnt->vm_ops->unmap(mpnt, st, size);
+ if (mpnt->vm_flags & VM_DENYWRITE &&
+ (st != mpnt->vm_start || end != mpnt->vm_end) &&
+ (file = mpnt->vm_file) != NULL) {
+ atomic_dec(&file->f_dentry->d_inode->i_writecount);
+ }
remove_shared_vm_struct(mpnt);
mm->map_count--;
@@ -717,6 +718,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
* Fix the mapping, and free the old area if it wasn't reused.
*/
extra = unmap_fixup(mm, mpnt, st, size, extra);
+ if (file)
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
}
/* Release the extra vma struct if it wasn't used */
diff --git a/mm/mremap.c b/mm/mremap.c
index 0404dd795..a48125178 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -144,7 +144,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma,
vmlist_modify_lock(current->mm);
insert_vm_struct(current->mm, new_vma);
merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end);
- vmlist_modify_unlock(vma->vm_mm);
+ vmlist_modify_unlock(current->mm);
do_munmap(current->mm, addr, old_len);
current->mm->total_vm += new_len >> PAGE_SHIFT;
if (new_vma->vm_flags & VM_LOCKED) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 926364499..4766127b2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -29,7 +29,7 @@ int nr_lru_pages;
pg_data_t *pgdat_list;
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
-static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 128, };
+static int zone_balance_ratio[MAX_NR_ZONES] = { 128, 128, 512, };
static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, };
static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
@@ -93,6 +93,8 @@ void __free_pages_ok (struct page *page, unsigned long order)
BUG();
if (PageDecrAfter(page))
BUG();
+ if (PageDirty(page))
+ BUG();
zone = page->zone;
@@ -139,10 +141,13 @@ void __free_pages_ok (struct page *page, unsigned long order)
spin_unlock_irqrestore(&zone->lock, flags);
- if (zone->free_pages > zone->pages_high) {
- zone->zone_wake_kswapd = 0;
+ if (zone->free_pages >= zone->pages_low) {
zone->low_on_memory = 0;
}
+
+ if (zone->free_pages >= zone->pages_high) {
+ zone->zone_wake_kswapd = 0;
+ }
}
#define MARK_USED(index, order, area) \
@@ -217,6 +222,9 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
{
zone_t **zone = zonelist->zones;
extern wait_queue_head_t kswapd_wait;
+ static int last_woke_kswapd;
+ static int kswapd_pause = HZ;
+ int gfp_mask = zonelist->gfp_mask;
/*
* (If anyone calls gfp from interrupts nonatomically then it
@@ -237,8 +245,6 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
struct page *page = rmqueue(z, order);
if (z->free_pages < z->pages_low) {
z->zone_wake_kswapd = 1;
- if (waitqueue_active(&kswapd_wait))
- wake_up_interruptible(&kswapd_wait);
}
if (page)
return page;
@@ -246,9 +252,27 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
}
/*
+ * Kswapd should be freeing enough memory to satisfy all allocations
+ * immediately. Calling try_to_free_pages from processes will slow
+ * down the system a lot. On the other hand, waking up kswapd too
+ * often means wasted memory and cpu time.
+ *
+ * We tune the kswapd pause interval in such a way that kswapd is
+ * always just agressive enough to free the amount of memory we
+ * want freed.
+ */
+ if (waitqueue_active(&kswapd_wait) &&
+ time_after(jiffies, last_woke_kswapd + kswapd_pause)) {
+ kswapd_pause++;
+ last_woke_kswapd = jiffies;
+ wake_up_interruptible(&kswapd_wait);
+ }
+
+ /*
* Ok, we don't have any zones that don't need some
* balancing.. See if we have any that aren't critical..
*/
+again:
zone = zonelist->zones;
for (;;) {
zone_t *z = *(zone++);
@@ -256,20 +280,33 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
break;
if (!z->low_on_memory) {
struct page *page = rmqueue(z, order);
- if (z->free_pages < z->pages_min)
+ if (z->free_pages < (z->pages_min + z->pages_low) / 2)
z->low_on_memory = 1;
if (page)
return page;
+ } else {
+ if (kswapd_pause > 0)
+ kswapd_pause--;
}
}
+ /* We didn't kick kswapd often enough... */
+ kswapd_pause /= 2;
+ if (waitqueue_active(&kswapd_wait))
+ wake_up_interruptible(&kswapd_wait);
+ /* If we're low priority, we just wait a bit and try again later. */
+ if ((gfp_mask & __GFP_WAIT) && current->need_resched &&
+ current->state == TASK_RUNNING) {
+ schedule();
+ goto again;
+ }
+
/*
* Uhhuh. All the zones have been critical, which means that
* we'd better do some synchronous swap-out. kswapd has not
* been able to cope..
*/
if (!(current->flags & PF_MEMALLOC)) {
- int gfp_mask = zonelist->gfp_mask;
if (!try_to_free_pages(gfp_mask)) {
if (!(gfp_mask & __GFP_HIGH))
goto fail;
@@ -277,7 +314,7 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
}
/*
- * Final phase: allocate anything we can!
+ * We freed something, so we're allowed to allocate anything we can!
*/
zone = zonelist->zones;
for (;;) {
@@ -292,6 +329,18 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
}
fail:
+ /* Last try, zone->low_on_memory isn't reset until we hit pages_low */
+ zone = zonelist->zones;
+ for (;;) {
+ zone_t *z = *(zone++);
+ if (!z)
+ break;
+ if (z->free_pages > z->pages_min) {
+ struct page *page = rmqueue(z, order);
+ if (page)
+ return page;
+ }
+ }
/* No luck.. */
return NULL;
}
diff --git a/mm/slab.c b/mm/slab.c
index 64f33cb33..f3d04da8e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -106,11 +106,6 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#ifdef __mips__
-#include <asm/pgtable.h>
-#include <asm/addrspace.h>
-#include <asm/pgalloc.h>
-#endif
/* If there is a different PAGE_SIZE around, and it works with this allocator,
* then change the following.
@@ -1691,19 +1686,11 @@ void *
kmalloc(size_t size, int flags)
{
cache_sizes_t *csizep = cache_sizes;
- unsigned long addr;
for (; csizep->cs_size; csizep++) {
if (size > csizep->cs_size)
continue;
- addr = __kmem_cache_alloc(csizep->cs_cachep, flags);
-#ifdef __mips__
- if (addr && (flags & GFP_UNCACHED)) {
- flush_cache_all(); /* Ouch ... */
- addr = KSEG1ADDR(addr);
- }
-#endif /* __mips__ */
- return addr;
+ return __kmem_cache_alloc(csizep->cs_cachep, flags);
}
printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
return NULL;
@@ -1717,10 +1704,6 @@ kfree(const void *objp)
if (!objp)
goto null_ptr;
-#ifdef __mips__
- if (KSEGX(objp) == KSEG1)
- objp = KSEG0ADDR(objp);
-#endif __mips__
nr = MAP_NR(objp);
if (nr >= max_mapnr)
goto bad_ptr;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2405aba2f..87ecc0c10 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -73,6 +73,7 @@ static inline void remove_from_swap_cache(struct page *page)
PAGE_BUG(page);
PageClearSwapCache(page);
+ ClearPageDirty(page);
remove_inode_page(page);
}
@@ -102,9 +103,10 @@ void delete_from_swap_cache_nolock(struct page *page)
if (!PageLocked(page))
BUG();
- if (block_flushpage(page, 0))
- lru_cache_del(page);
+ if (page->buffers)
+ block_destroy_buffers(page);
+ lru_cache_del(page);
__delete_from_swap_cache(page);
page_cache_release(page);
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 55ef476a3..5d3a7f23e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -407,11 +407,11 @@ asmlinkage long sys_swapoff(const char * specialfile)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- lock_kernel();
err = user_path_walk(specialfile, &nd);
if (err)
goto out;
+ lock_kernel();
prev = -1;
swap_list_lock();
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
@@ -478,9 +478,9 @@ asmlinkage long sys_swapoff(const char * specialfile)
err = 0;
out_dput:
+ unlock_kernel();
path_release(&nd);
out:
- unlock_kernel();
return err;
}
@@ -555,7 +555,6 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
unsigned long maxpages;
int swapfilesize;
struct block_device *bdev = NULL;
- char *name;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -586,14 +585,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} else {
p->prio = --least_priority;
}
- name = getname(specialfile);
- error = PTR_ERR(name);
- if (IS_ERR(name))
- goto bad_swap_2;
- error = 0;
- if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
- error = path_walk(name, &nd);
- putname(name);
+ error = user_path_walk(specialfile, &nd);
if (error)
goto bad_swap_2;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1919c0961..597a1b093 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -62,6 +62,10 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
goto out_failed;
}
+ /* Can only do this if we age all active pages. */
+ if (PageActive(page) && page->age > 1)
+ goto out_failed;
+
if (TryLockPage(page))
goto out_failed;
@@ -74,6 +78,8 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
* memory, and we should just continue our scan.
*/
if (PageSwapCache(page)) {
+ if (pte_dirty(pte))
+ SetPageDirty(page);
entry.val = page->index;
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
@@ -181,7 +187,10 @@ drop_pte:
vmlist_access_unlock(vma->vm_mm);
/* OK, do a physical asynchronous write to swap. */
- rw_swap_page(WRITE, page, 0);
+ // rw_swap_page(WRITE, page, 0);
+ /* Let shrink_mmap handle this swapout. */
+ SetPageDirty(page);
+ UnlockPage(page);
out_free_success:
page_cache_release(page);
@@ -430,12 +439,12 @@ out:
* latency.
*/
#define FREE_COUNT 8
-#define SWAP_COUNT 16
static int do_try_to_free_pages(unsigned int gfp_mask)
{
int priority;
int count = FREE_COUNT;
- int swap_count;
+ int swap_count = 0;
+ int ret = 0;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
@@ -443,6 +452,7 @@ static int do_try_to_free_pages(unsigned int gfp_mask)
priority = 64;
do {
while (shrink_mmap(priority, gfp_mask)) {
+ ret = 1;
if (!--count)
goto done;
}
@@ -457,9 +467,12 @@ static int do_try_to_free_pages(unsigned int gfp_mask)
*/
count -= shrink_dcache_memory(priority, gfp_mask);
count -= shrink_icache_memory(priority, gfp_mask);
- if (count <= 0)
+ if (count <= 0) {
+ ret = 1;
goto done;
+ }
while (shm_swap(priority, gfp_mask)) {
+ ret = 1;
if (!--count)
goto done;
}
@@ -471,24 +484,30 @@ static int do_try_to_free_pages(unsigned int gfp_mask)
* This will not actually free any pages (they get
* put in the swap cache), so we must not count this
* as a "count" success.
+ *
+ * The amount we page out is the amount of pages we're
+ * short freeing, amplified by the number of times we
+ * failed above. This generates a negative feedback loop:
+ * the more difficult it was to free pages, the easier we
+ * will make it.
*/
- swap_count = SWAP_COUNT;
- while (swap_out(priority, gfp_mask))
+ swap_count += count;
+ while (swap_out(priority, gfp_mask)) {
if (--swap_count < 0)
break;
+ }
} while (--priority >= 0);
/* Always end on a shrink_mmap.. */
while (shrink_mmap(0, gfp_mask)) {
+ ret = 1;
if (!--count)
goto done;
}
- /* We return 1 if we are freed some page */
- return (count != FREE_COUNT);
done:
- return 1;
+ return ret;
}
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);