summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-04-28 01:09:25 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-04-28 01:09:25 +0000
commitb9ba7aeb165cffecdffb60aec8c3fa8d590d9ca9 (patch)
tree42d07b0c7246ae2536a702e7c5de9e2732341116 /mm
parent7406b0a326f2d70ade2671c37d1beef62249db97 (diff)
Merge with 2.3.99-pre6.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c113
-rw-r--r--mm/memory.c62
-rw-r--r--mm/mmap.c50
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/slab.c6
-rw-r--r--mm/swap_state.c7
-rw-r--r--mm/swapfile.c49
-rw-r--r--mm/vmscan.c54
9 files changed, 233 insertions, 127 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 533747f96..d0df8bd2c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -44,6 +44,7 @@
atomic_t page_cache_size = ATOMIC_INIT(0);
unsigned int page_hash_bits;
struct page **page_hash_table;
+struct list_head lru_cache;
spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
/*
@@ -77,6 +78,15 @@ static void remove_page_from_hash_queue(struct page * page)
atomic_dec(&page_cache_size);
}
+static inline int sync_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+
+ if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+ return mapping->a_ops->sync_page(page);
+ return 0;
+}
+
/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
@@ -87,6 +97,9 @@ void remove_inode_page(struct page *page)
if (!PageLocked(page))
PAGE_BUG(page);
+ /* Initiate completion of any async operations */
+ sync_page(page);
+
spin_lock(&pagecache_lock);
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
@@ -99,6 +112,7 @@ void invalidate_inode_pages(struct inode * inode)
struct list_head *head, *curr;
struct page * page;
+ repeat:
head = &inode->i_mapping->pages;
spin_lock(&pagecache_lock);
curr = head->next;
@@ -110,14 +124,13 @@ void invalidate_inode_pages(struct inode * inode)
/* We cannot invalidate a locked page */
if (TryLockPage(page))
continue;
+ spin_unlock(&pagecache_lock);
lru_cache_del(page);
- remove_page_from_inode_queue(page);
- remove_page_from_hash_queue(page);
- page->mapping = NULL;
+ remove_inode_page(page);
UnlockPage(page);
-
page_cache_release(page);
+ goto repeat;
}
spin_unlock(&pagecache_lock);
}
@@ -149,11 +162,16 @@ repeat:
/* page wholly truncated - free it */
if (offset >= start) {
+ if (TryLockPage(page)) {
+ spin_unlock(&pagecache_lock);
+ get_page(page);
+ wait_on_page(page);
+ put_page(page);
+ goto repeat;
+ }
get_page(page);
spin_unlock(&pagecache_lock);
- lock_page(page);
-
if (!page->buffers || block_flushpage(page, 0))
lru_cache_del(page);
@@ -191,11 +209,13 @@ repeat:
continue;
/* partial truncate, clear end of page */
+ if (TryLockPage(page)) {
+ spin_unlock(&pagecache_lock);
+ goto repeat;
+ }
get_page(page);
spin_unlock(&pagecache_lock);
- lock_page(page);
-
memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
if (page->buffers)
block_flushpage(page, partial);
@@ -208,6 +228,9 @@ repeat:
*/
UnlockPage(page);
page_cache_release(page);
+ get_page(page);
+ wait_on_page(page);
+ put_page(page);
goto repeat;
}
spin_unlock(&pagecache_lock);
@@ -215,46 +238,55 @@ repeat:
int shrink_mmap(int priority, int gfp_mask, zone_t *zone)
{
- int ret = 0, count;
+ int ret = 0, loop = 0, count;
LIST_HEAD(young);
LIST_HEAD(old);
LIST_HEAD(forget);
struct list_head * page_lru, * dispose;
- struct page * page;
-
+ struct page * page = NULL;
+ struct zone_struct * p_zone;
+ int maxloop = 256 >> priority;
+
if (!zone)
BUG();
- count = nr_lru_pages / (priority+1);
+ count = nr_lru_pages >> priority;
+ if (!count)
+ return ret;
spin_lock(&pagemap_lru_lock);
-
- while (count > 0 && (page_lru = zone->lru_cache.prev) != &zone->lru_cache) {
+again:
+ /* we need pagemap_lru_lock for list_del() ... subtle code below */
+ while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
page = list_entry(page_lru, struct page, lru);
list_del(page_lru);
+ p_zone = page->zone;
- dispose = &zone->lru_cache;
- if (test_and_clear_bit(PG_referenced, &page->flags))
- /* Roll the page at the top of the lru list,
- * we could also be more aggressive putting
- * the page in the young-dispose-list, so
- * avoiding to free young pages in each pass.
- */
- goto dispose_continue;
-
+ /*
+ * These two tests are there to make sure we don't free too
+ * many pages from the "wrong" zone. We free some anyway,
+ * they are the least recently used pages in the system.
+ * When we don't free them, leave them in &old.
+ */
dispose = &old;
- /* don't account passes over not DMA pages */
- if (zone && (!memclass(page->zone, zone)))
+ if (p_zone != zone && (loop > (maxloop / 4) ||
+ p_zone->free_pages > p_zone->pages_high))
goto dispose_continue;
- count--;
-
+ /* The page is in use, or was used very recently, put it in
+ * &young to make sure that we won't try to free it the next
+ * time */
dispose = &young;
- /* avoid unscalable SMP locking */
+ if (test_and_clear_bit(PG_referenced, &page->flags))
+ goto dispose_continue;
+
+ count--;
if (!page->buffers && page_count(page) > 1)
goto dispose_continue;
+ /* Page not used -> free it; if that fails -> &old */
+ dispose = &old;
if (TryLockPage(page))
goto dispose_continue;
@@ -327,6 +359,7 @@ unlock_continue:
list_add(page_lru, dispose);
continue;
+ /* we're holding pagemap_lru_lock, so we can just loop again */
dispose_continue:
list_add(page_lru, dispose);
}
@@ -342,9 +375,14 @@ made_buffer_progress:
/* nr_lru_pages needs the spinlock */
nr_lru_pages--;
+ loop++;
+ /* wrong zone? not looped too often? roll again... */
+ if (page->zone != zone && loop < maxloop)
+ goto again;
+
out:
- list_splice(&young, &zone->lru_cache);
- list_splice(&old, zone->lru_cache.prev);
+ list_splice(&young, &lru_cache);
+ list_splice(&old, lru_cache.prev);
spin_unlock(&pagemap_lru_lock);
@@ -467,6 +505,9 @@ static inline void __add_to_page_cache(struct page * page,
struct page *alias;
unsigned long flags;
+ if (PageLocked(page))
+ BUG();
+
flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_dirty));
page->flags = flags | (1 << PG_locked) | (1 << PG_referenced);
get_page(page);
@@ -574,7 +615,7 @@ void ___wait_on_page(struct page *page)
add_wait_queue(&page->wait, &wait);
do {
- run_task_queue(&tq_disk);
+ sync_page(page);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!PageLocked(page))
break;
@@ -619,7 +660,7 @@ repeat:
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
- run_task_queue(&tq_disk);
+ sync_page(page);
__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
add_wait_queue(&page->wait, &wait);
@@ -669,7 +710,7 @@ repeat:
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
- run_task_queue(&tq_disk);
+ sync_page(page);
__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
add_wait_queue(&page->wait, &wait);
@@ -1496,7 +1537,7 @@ static int filemap_write_page(struct file *file,
* mmap_sem is held.
*/
lock_page(page);
- result = inode->i_mapping->a_ops->writepage(dentry, page);
+ result = inode->i_mapping->a_ops->writepage(file, dentry, page);
UnlockPage(page);
return result;
}
@@ -1707,8 +1748,8 @@ static int msync_interval(struct vm_area_struct * vma,
error = vma->vm_ops->sync(vma, start, end-start, flags);
if (!error && (flags & MS_SYNC)) {
struct file * file = vma->vm_file;
- if (file)
- error = file_fsync(file, file->f_dentry);
+ if (file && file->f_op && file->f_op->fsync)
+ error = file->f_op->fsync(file, file->f_dentry);
}
return error;
}
diff --git a/mm/memory.c b/mm/memory.c
index 28791baa2..84ecb57b5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -472,7 +472,7 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
goto out_unlock;
}
}
- if (handle_mm_fault(current, vma, ptr, datain) <= 0)
+ if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0)
goto out_unlock;
spin_lock(&mm->page_table_lock);
map = follow_page(ptr);
@@ -815,7 +815,7 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * old_page
* We enter with the page table read-lock held, and need to exit without
* it.
*/
-static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte)
{
unsigned long map_nr;
@@ -824,7 +824,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
map_nr = pte_pagenr(pte);
if (map_nr >= max_mapnr)
goto bad_wp_page;
- tsk->min_flt++;
+ mm->min_flt++;
old_page = mem_map + map_nr;
/*
@@ -854,36 +854,36 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
case 1:
flush_cache_page(vma, address);
establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
- spin_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
return 1;
}
/*
* Ok, we need to copy. Oh, well..
*/
- spin_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
new_page = alloc_page(GFP_HIGHUSER);
if (!new_page)
return -1;
- spin_lock(&tsk->mm->page_table_lock);
+ spin_lock(&mm->page_table_lock);
/*
* Re-check the pte - we dropped the lock
*/
if (pte_val(*page_table) == pte_val(pte)) {
if (PageReserved(old_page))
- ++vma->vm_mm->rss;
+ ++mm->rss;
break_cow(vma, old_page, new_page, address, page_table);
/* Free the old page.. */
new_page = old_page;
}
- spin_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
__free_page(new_page);
return 1;
bad_wp_page:
- spin_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
printk("do_wp_page: bogus page at address %08lx (nr %ld)\n",address,map_nr);
return -1;
}
@@ -1029,7 +1029,7 @@ void swapin_readahead(swp_entry_t entry)
return;
}
-static int do_swap_page(struct task_struct * tsk,
+static int do_swap_page(struct mm_struct * mm,
struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, swp_entry_t entry, int write_access)
{
@@ -1048,8 +1048,8 @@ static int do_swap_page(struct task_struct * tsk,
flush_icache_page(vma, page);
}
- vma->vm_mm->rss++;
- tsk->min_flt++;
+ mm->rss++;
+ mm->min_flt++;
pte = mk_pte(page, vma->vm_page_prot);
@@ -1080,7 +1080,7 @@ static int do_swap_page(struct task_struct * tsk,
/*
* This only needs the MM semaphore
*/
-static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
+static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
int high = 0;
struct page *page = NULL;
@@ -1093,8 +1093,8 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v
high = 1;
clear_user_highpage(page, addr);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
- vma->vm_mm->rss++;
- tsk->min_flt++;
+ mm->rss++;
+ mm->min_flt++;
flush_page_to_ram(page);
}
set_pte(page_table, entry);
@@ -1114,14 +1114,14 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v
*
* This is called with the MM semaphore held.
*/
-static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
+static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
- return do_anonymous_page(tsk, vma, page_table, write_access, address);
+ return do_anonymous_page(mm, vma, page_table, write_access, address);
/*
* The third argument is "no_share", which tells the low-level code
@@ -1133,8 +1133,8 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
return 0;
if (new_page == NOPAGE_OOM)
return -1;
- ++tsk->maj_flt;
- ++vma->vm_mm->rss;
+ ++mm->maj_flt;
+ ++mm->rss;
/*
* This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it's valid
@@ -1177,7 +1177,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
* so we don't need to worry about a page being suddenly been added into
* our VM.
*/
-static inline int handle_pte_fault(struct task_struct *tsk,
+static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
@@ -1186,8 +1186,8 @@ static inline int handle_pte_fault(struct task_struct *tsk,
entry = *pte;
if (!pte_present(entry)) {
if (pte_none(entry))
- return do_no_page(tsk, vma, address, write_access, pte);
- return do_swap_page(tsk, vma, address, pte, pte_to_swp_entry(entry), write_access);
+ return do_no_page(mm, vma, address, write_access, pte);
+ return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
}
/*
@@ -1195,38 +1195,38 @@ static inline int handle_pte_fault(struct task_struct *tsk,
* lock to synchronize with kswapd, and verify that the entry
* didn't change from under us..
*/
- spin_lock(&tsk->mm->page_table_lock);
+ spin_lock(&mm->page_table_lock);
if (pte_val(entry) == pte_val(*pte)) {
if (write_access) {
if (!pte_write(entry))
- return do_wp_page(tsk, vma, address, pte, entry);
+ return do_wp_page(mm, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
}
- spin_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&mm->page_table_lock);
return 1;
}
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
int ret = -1;
pgd_t *pgd;
pmd_t *pmd;
- pgd = pgd_offset(vma->vm_mm, address);
+ pgd = pgd_offset(mm, address);
pmd = pmd_alloc(pgd, address);
if (pmd) {
pte_t * pte = pte_alloc(pmd, address);
if (pte)
- ret = handle_pte_fault(tsk, vma, address, write_access, pte);
+ ret = handle_pte_fault(mm, vma, address, write_access, pte);
}
return ret;
}
@@ -1237,15 +1237,15 @@ int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
int make_pages_present(unsigned long addr, unsigned long end)
{
int write;
- struct task_struct *tsk = current;
+ struct mm_struct *mm = current->mm;
struct vm_area_struct * vma;
- vma = find_vma(tsk->mm, addr);
+ vma = find_vma(mm, addr);
write = (vma->vm_flags & VM_WRITE) != 0;
if (addr >= end)
BUG();
do {
- if (handle_mm_fault(tsk, vma, addr, write) < 0)
+ if (handle_mm_fault(mm, vma, addr, write) < 0)
return -1;
addr += PAGE_SIZE;
} while (addr < end);
diff --git a/mm/mmap.c b/mm/mmap.c
index 604624168..8a81bfb20 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -110,7 +110,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
/* Always allow shrinking brk. */
if (brk <= mm->brk) {
- if (!do_munmap(newbrk, oldbrk-newbrk))
+ if (!do_munmap(mm, newbrk, oldbrk-newbrk))
goto set_brk;
goto out;
}
@@ -220,8 +220,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
default:
return -EINVAL;
}
- } else if ((flags & MAP_TYPE) != MAP_PRIVATE)
- return -EINVAL;
+ }
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
@@ -269,8 +268,11 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
if (!(file->f_mode & FMODE_WRITE))
vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
}
- } else
+ } else {
vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ if (flags & MAP_SHARED)
+ vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
+ }
vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
vma->vm_ops = NULL;
vma->vm_pgoff = pgoff;
@@ -279,7 +281,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
/* Clear old maps */
error = -ENOMEM;
- if (do_munmap(addr, len))
+ if (do_munmap(mm, addr, len))
goto free_vma;
/* Check against address space limit. */
@@ -316,6 +318,8 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
atomic_inc(&file->f_dentry->d_inode->i_writecount);
if (error)
goto unmap_and_free_vma;
+ } else if (flags & MAP_SHARED) {
+ error = map_zero_setup(vma);
}
/*
@@ -468,13 +472,13 @@ struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
return NULL;
}
-struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr)
+struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
{
struct vm_area_struct * vma;
unsigned long start;
addr &= PAGE_MASK;
- vma = find_vma(tsk->mm,addr);
+ vma = find_vma(mm,addr);
if (!vma)
return NULL;
if (vma->vm_start <= addr)
@@ -513,8 +517,9 @@ struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long
* allocate a new one, and the return indicates whether the old
* area was reused.
*/
-static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area,
- unsigned long addr, size_t len, struct vm_area_struct *extra)
+static struct vm_area_struct * unmap_fixup(struct mm_struct *mm,
+ struct vm_area_struct *area, unsigned long addr, size_t len,
+ struct vm_area_struct *extra)
{
struct vm_area_struct *mpnt;
unsigned long end = addr + len;
@@ -536,11 +541,11 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area,
/* Work out to one of the ends. */
if (end == area->vm_end) {
area->vm_end = addr;
- vmlist_modify_lock(current->mm);
+ vmlist_modify_lock(mm);
} else if (addr == area->vm_start) {
area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
area->vm_start = end;
- vmlist_modify_lock(current->mm);
+ vmlist_modify_lock(mm);
} else {
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
/* Add end mapping -- leave beginning for below */
@@ -562,12 +567,12 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area,
if (mpnt->vm_ops && mpnt->vm_ops->open)
mpnt->vm_ops->open(mpnt);
area->vm_end = addr; /* Truncate area */
- vmlist_modify_lock(current->mm);
- insert_vm_struct(current->mm, mpnt);
+ vmlist_modify_lock(mm);
+ insert_vm_struct(mm, mpnt);
}
- insert_vm_struct(current->mm, area);
- vmlist_modify_unlock(current->mm);
+ insert_vm_struct(mm, area);
+ vmlist_modify_unlock(mm);
return extra;
}
@@ -634,9 +639,8 @@ no_mmaps:
* work. This now handles partial unmappings.
* Jeremy Fitzhardine <jeremy@sw.oz.au>
*/
-int do_munmap(unsigned long addr, size_t len)
+int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
{
- struct mm_struct * mm;
struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
@@ -650,7 +654,6 @@ int do_munmap(unsigned long addr, size_t len)
* every area affected in some way (by any overlap) is put
* on the list. If nothing is put on, nothing is affected.
*/
- mm = current->mm;
mpnt = find_vma_prev(mm, addr, &prev);
if (!mpnt)
return 0;
@@ -713,7 +716,7 @@ int do_munmap(unsigned long addr, size_t len)
/*
* Fix the mapping, and free the old area if it wasn't reused.
*/
- extra = unmap_fixup(mpnt, st, size, extra);
+ extra = unmap_fixup(mm, mpnt, st, size, extra);
}
/* Release the extra vma struct if it wasn't used */
@@ -728,10 +731,11 @@ int do_munmap(unsigned long addr, size_t len)
asmlinkage long sys_munmap(unsigned long addr, size_t len)
{
int ret;
+ struct mm_struct *mm = current->mm;
- down(&current->mm->mmap_sem);
- ret = do_munmap(addr, len);
- up(&current->mm->mmap_sem);
+ down(&mm->mmap_sem);
+ ret = do_munmap(mm, addr, len);
+ up(&mm->mmap_sem);
return ret;
}
@@ -763,7 +767,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
/*
* Clear old maps. this also does some error checking for us
*/
- retval = do_munmap(addr, len);
+ retval = do_munmap(mm, addr, len);
if (retval != 0)
return retval;
diff --git a/mm/mremap.c b/mm/mremap.c
index d8d18cf62..0404dd795 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -145,7 +145,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma,
insert_vm_struct(current->mm, new_vma);
merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end);
vmlist_modify_unlock(vma->vm_mm);
- do_munmap(addr, old_len);
+ do_munmap(current->mm, addr, old_len);
current->mm->total_vm += new_len >> PAGE_SHIFT;
if (new_vma->vm_flags & VM_LOCKED) {
current->mm->locked_vm += new_len >> PAGE_SHIFT;
@@ -201,7 +201,7 @@ unsigned long do_mremap(unsigned long addr,
if ((addr <= new_addr) && (addr+old_len) > new_addr)
goto out;
- do_munmap(new_addr, new_len);
+ do_munmap(current->mm, new_addr, new_len);
}
/*
@@ -210,7 +210,7 @@ unsigned long do_mremap(unsigned long addr,
*/
ret = addr;
if (old_len >= new_len) {
- do_munmap(addr+new_len, old_len - new_len);
+ do_munmap(current->mm, addr+new_len, old_len - new_len);
if (!(flags & MREMAP_FIXED) || (new_addr == addr))
goto out;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07fdaa021..ba5ba3013 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -25,7 +25,7 @@
#endif
int nr_swap_pages = 0;
-int nr_lru_pages;
+int nr_lru_pages = 0;
pg_data_t *pgdat_list = (pg_data_t *)0;
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
@@ -273,6 +273,8 @@ static int zone_balance_memory(zonelist_t *zonelist)
struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
{
zone_t **zone = zonelist->zones;
+ int gfp_mask = zonelist->gfp_mask;
+ static int low_on_memory;
/*
* If this is a recursive call, we'd better
@@ -282,6 +284,11 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
if (current->flags & PF_MEMALLOC)
goto allocate_ok;
+ /* If we're a memory hog, unmap some pages */
+ if (current->hog && low_on_memory &&
+ (gfp_mask & __GFP_WAIT))
+ swap_out(4, gfp_mask);
+
/*
* (If anyone calls gfp from interrupts nonatomically then it
* will sooner or later tripped up by a schedule().)
@@ -299,11 +306,13 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
/* Are we supposed to free memory? Don't make it worse.. */
if (!z->zone_wake_kswapd && z->free_pages > z->pages_low) {
struct page *page = rmqueue(z, order);
+ low_on_memory = 0;
if (page)
return page;
}
}
+ low_on_memory = 1;
/*
* Ok, no obvious zones were available, start
* balancing things a bit..
@@ -530,6 +539,7 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
freepages.min += i;
freepages.low += i * 2;
freepages.high += i * 3;
+ memlist_init(&lru_cache);
/*
* Some architectures (with lots of mem and discontinous memory
@@ -609,7 +619,6 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
unsigned long bitmap_size;
memlist_init(&zone->free_area[i].free_list);
- memlist_init(&zone->lru_cache);
mask += mask;
size = (size + ~mask) & mask;
bitmap_size = size >> i;
diff --git a/mm/slab.c b/mm/slab.c
index 976f78c1a..68bbb7d17 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1534,7 +1534,7 @@ nul_ptr:
* it should be in this state _before_ it is released.
*/
static inline void
-__kmem_cache_free(kmem_cache_t *cachep, const void *objp)
+__kmem_cache_free(kmem_cache_t *cachep, void *objp)
{
kmem_slab_t *slabp;
kmem_bufctl_t *bufp;
@@ -1739,7 +1739,7 @@ kfree(const void *objp)
*/
cachep = SLAB_GET_PAGE_CACHE(page);
if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
- __kmem_cache_free(cachep, objp);
+ __kmem_cache_free(cachep, (void *)objp);
return;
}
}
@@ -1774,7 +1774,7 @@ kfree_s(const void *objp, size_t size)
cachep = SLAB_GET_PAGE_CACHE(page);
if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
if (size <= cachep->c_org_size) { /* XXX better check */
- __kmem_cache_free(cachep, objp);
+ __kmem_cache_free(cachep, (void *)objp);
return;
}
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index defe9b463..29ba0d78b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,12 +17,17 @@
#include <asm/pgtable.h>
+static struct address_space_operations swap_aops = {
+ sync_page: block_sync_page
+};
+
struct address_space swapper_space = {
{ /* pages */
&swapper_space.pages, /* .next */
&swapper_space.pages /* .prev */
},
- 0 /* nrpages */
+ 0, /* nrpages */
+ &swap_aops,
};
#ifdef SWAP_CACHE_INFO
diff --git a/mm/swapfile.c b/mm/swapfile.c
index abdb08e57..da2dd9147 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -444,6 +444,7 @@ asmlinkage long sys_swapoff(const char * specialfile)
{
struct swap_info_struct * p = NULL;
struct dentry * dentry;
+ struct vfsmount *mnt;
int i, type, prev;
int err;
@@ -513,11 +514,14 @@ asmlinkage long sys_swapoff(const char * specialfile)
dentry = p->swap_file;
p->swap_file = NULL;
+ mnt = p->swap_vfsmnt;
+ p->swap_vfsmnt = NULL;
p->swap_device = 0;
vfree(p->swap_map);
p->swap_map = NULL;
p->flags = 0;
err = 0;
+ mntput(mnt);
out_dput:
dput(dentry);
@@ -538,7 +542,8 @@ int get_swaparea_info(char *buf)
len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
if (ptr->flags & SWP_USED) {
- char * path = d_path(ptr->swap_file, NULL, page, PAGE_SIZE);
+ char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
+ page, PAGE_SIZE);
len += sprintf(buf + len, "%-31s ", path);
@@ -584,7 +589,8 @@ int is_swap_partition(kdev_t dev) {
asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
{
struct swap_info_struct * p;
- struct dentry * swap_dentry;
+ struct nameidata nd;
+ struct inode * swap_inode;
unsigned int type;
int i, j, prev;
int error;
@@ -595,6 +601,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
unsigned long maxpages;
int swapfilesize;
struct block_device *bdev = NULL;
+ char *name;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -610,6 +617,7 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
nr_swapfiles = type+1;
p->flags = SWP_USED;
p->swap_file = NULL;
+ p->swap_vfsmnt = NULL;
p->swap_device = 0;
p->swap_map = NULL;
p->lowest_bit = 0;
@@ -624,24 +632,31 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
} else {
p->prio = --least_priority;
}
- swap_dentry = namei(specialfile);
- error = PTR_ERR(swap_dentry);
- if (IS_ERR(swap_dentry))
+ name = getname(specialfile);
+ error = PTR_ERR(name);
+ if (IS_ERR(name))
+ goto bad_swap_2;
+ error = 0;
+ if (walk_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
+ error = walk_name(name, &nd);
+ putname(name);
+ if (error)
goto bad_swap_2;
- p->swap_file = swap_dentry;
+ p->swap_file = nd.dentry;
+ p->swap_vfsmnt = nd.mnt;
+ swap_inode = nd.dentry->d_inode;
error = -EINVAL;
- if (S_ISBLK(swap_dentry->d_inode->i_mode)) {
- kdev_t dev = swap_dentry->d_inode->i_rdev;
+ if (S_ISBLK(swap_inode->i_mode)) {
+ kdev_t dev = swap_inode->i_rdev;
struct block_device_operations *bdops;
p->swap_device = dev;
set_blocksize(dev, PAGE_SIZE);
- bdev = swap_dentry->d_inode->i_bdev;
- bdops = devfs_get_ops ( devfs_get_handle_from_inode
- (swap_dentry->d_inode) );
+ bdev = swap_inode->i_bdev;
+ bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode));
if (bdops) bdev->bd_op = bdops;
error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
@@ -663,15 +678,15 @@ asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10);
- } else if (S_ISREG(swap_dentry->d_inode->i_mode)) {
+ } else if (S_ISREG(swap_inode->i_mode)) {
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type || !swap_info[i].swap_file)
continue;
- if (swap_dentry->d_inode == swap_info[i].swap_file->d_inode)
+ if (swap_inode == swap_info[i].swap_file->d_inode)
goto bad_swap;
}
- swapfilesize = swap_dentry->d_inode->i_size >> PAGE_SHIFT;
+ swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
} else
goto bad_swap;
@@ -811,13 +826,17 @@ bad_swap:
bad_swap_2:
if (p->swap_map)
vfree(p->swap_map);
- dput(p->swap_file);
+ nd.mnt = p->swap_vfsmnt;
+ nd.dentry = p->swap_file;
p->swap_device = 0;
p->swap_file = NULL;
+ p->swap_vfsmnt = NULL;
p->swap_map = NULL;
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
+ dput(nd.dentry);
+ mntput(nd.mnt);
out:
if (swap_header)
free_page((long) swap_header);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1057dbb60..691d47f18 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -34,7 +34,7 @@
* using a process that no longer actually exists (it might
* have died while we slept).
*/
-static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
+static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
pte_t pte;
swp_entry_t entry;
@@ -48,6 +48,7 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt
if ((page-mem_map >= max_mapnr) || PageReserved(page))
goto out_failed;
+ mm->swap_cnt--;
/* Don't look at this pte if it's been accessed recently. */
if (pte_young(pte)) {
/*
@@ -194,7 +195,7 @@ out_failed:
* (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
*/
-static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
{
pte_t * pte;
unsigned long pmd_end;
@@ -216,16 +217,18 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned
do {
int result;
vma->vm_mm->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(vma, address, pte, gfp_mask);
+ result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
if (result)
return result;
+ if (!mm->swap_cnt)
+ return 0;
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
return 0;
}
-static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -245,16 +248,18 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned
end = pgd_end;
do {
- int result = swap_out_pmd(vma, pmd, address, end, gfp_mask);
+ int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
if (result)
return result;
+ if (!mm->swap_cnt)
+ return 0;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
return 0;
}
-static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int gfp_mask)
+static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
{
pgd_t *pgdir;
unsigned long end;
@@ -269,9 +274,11 @@ static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int
if (address >= end)
BUG();
do {
- int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask);
+ int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
if (result)
return result;
+ if (!mm->swap_cnt)
+ return 0;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
} while (address && (address < end));
@@ -299,7 +306,7 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
address = vma->vm_start;
for (;;) {
- int result = swap_out_vma(vma, address, gfp_mask);
+ int result = swap_out_vma(mm, vma, address, gfp_mask);
if (result)
return result;
vma = vma->vm_next;
@@ -321,7 +328,7 @@ static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
* N.B. This function returns only 0 or 1. Return values != 1 from
* the lower level routines result in continued processing.
*/
-static int swap_out(unsigned int priority, int gfp_mask)
+int swap_out(unsigned int priority, int gfp_mask)
{
struct task_struct * p;
int counter;
@@ -356,6 +363,7 @@ static int swap_out(unsigned int priority, int gfp_mask)
p = init_task.next_task;
for (; p != &init_task; p = p->next_task) {
struct mm_struct *mm = p->mm;
+ p->hog = 0;
if (!p->swappable || !mm)
continue;
if (mm->rss <= 0)
@@ -369,9 +377,26 @@ static int swap_out(unsigned int priority, int gfp_mask)
pid = p->pid;
}
}
- read_unlock(&tasklist_lock);
- if (assign == 1)
+ if (assign == 1) {
+ /* we just assigned swap_cnt, normalise values */
assign = 2;
+ p = init_task.next_task;
+ for (; p != &init_task; p = p->next_task) {
+ int i = 0;
+ struct mm_struct *mm = p->mm;
+ if (!p->swappable || !mm || mm->rss <= 0)
+ continue;
+ /* small processes are swapped out less */
+ while ((mm->swap_cnt << 2 * (i + 1) < max_cnt))
+ i++;
+ mm->swap_cnt >>= i;
+ mm->swap_cnt += i; /* if swap_cnt reaches 0 */
+ /* we're big -> hog treatment */
+ if (!i)
+ p->hog = 1;
+ }
+ }
+ read_unlock(&tasklist_lock);
if (!best) {
if (!assign) {
assign = 1;
@@ -412,13 +437,14 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone)
{
int priority;
int count = SWAP_CLUSTER_MAX;
+ int ret;
/* Always trim SLAB caches when memory gets low. */
kmem_cache_reap(gfp_mask);
priority = 6;
do {
- while (shrink_mmap(priority, gfp_mask, zone)) {
+ while ((ret = shrink_mmap(priority, gfp_mask, zone))) {
if (!--count)
goto done;
}
@@ -441,7 +467,9 @@ static int do_try_to_free_pages(unsigned int gfp_mask, zone_t *zone)
}
}
- /* Then, try to page stuff out.. */
+ /* Then, try to page stuff out..
+ * We use swapcount here because this doesn't actually
+ * free pages */
while (swap_out(priority, gfp_mask)) {
if (!--count)
goto done;