summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-09-12 01:29:55 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-09-12 01:29:55 +0000
commit545f435ebcfd94a1e7c20b46efe81b4d6ac4e698 (patch)
treee9ce4bc598d06374bda906f18365984bf22a526a /mm
parent4291a610eef89d0d5c69d9a10ee6560e1aa36c74 (diff)
Merge with Linux 2.1.55. More bugfixes and goodies from my private
CVS archive.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/memory.c63
-rw-r--r--mm/mmap.c66
-rw-r--r--mm/slab.c17
-rw-r--r--mm/swapfile.c121
-rw-r--r--mm/vmscan.c143
6 files changed, 251 insertions, 167 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 03e82469e..e6e4cd424 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -894,8 +894,9 @@ no_page:
static inline int do_write_page(struct inode * inode, struct file * file,
const char * page, unsigned long offset)
{
- int old_fs, retval;
+ int retval;
unsigned long size;
+ unsigned long old_fs;
size = offset + PAGE_SIZE;
/* refuse to extend file size.. */
@@ -954,11 +955,14 @@ static int filemap_write_page(struct vm_area_struct * vma,
file.f_reada = 0;
/*
- * WSH: could vm_area struct (and inode) be released while writing?
+ * If a task terminates while we're swapping the page, the vma and
+ * and dentry could be released ... increment the count to be safe.
*/
+ dget(dentry);
down(&inode->i_sem);
result = do_write_page(inode, &file, (const char *) page, offset);
up(&inode->i_sem);
+ dput(dentry);
return result;
}
diff --git a/mm/memory.c b/mm/memory.c
index 3e94fd1f3..772fbbec2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -143,8 +143,7 @@ void clear_page_tables(struct task_struct * tsk)
/*
* This function frees up all page tables of a process when it exits. It
- * is the same as "clear_page_tables()", except it also changes the process'
- * page table directory to the kernel page tables and then frees the old
+ * is the same as "clear_page_tables()", except it also frees the old
* page table directory.
*/
void free_page_tables(struct mm_struct * mm)
@@ -153,13 +152,15 @@ void free_page_tables(struct mm_struct * mm)
pgd_t * page_dir;
page_dir = mm->pgd;
- if (!page_dir || page_dir == swapper_pg_dir) {
- printk("Trying to free kernel page-directory: not good\n");
- return;
+ if (page_dir) {
+ if (page_dir == swapper_pg_dir) {
+ printk("free_page_tables: Trying to free kernel pgd\n");
+ return;
+ }
+ for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
+ free_one_pgd(page_dir + i);
+ pgd_free(page_dir);
}
- for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
- free_one_pgd(page_dir + i);
- pgd_free(page_dir);
}
int new_page_tables(struct task_struct * tsk)
@@ -291,19 +292,20 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
return error;
}
-static inline void free_pte(pte_t page)
+/*
+ * Return indicates whether a page was freed so caller can adjust rss
+ */
+static inline int free_pte(pte_t page)
{
if (pte_present(page)) {
unsigned long addr = pte_page(page);
if (MAP_NR(addr) >= max_mapnr || PageReserved(mem_map+MAP_NR(addr)))
- return;
+ return 0;
free_page(addr);
- if (current->mm->rss <= 0)
- return;
- current->mm->rss--;
- return;
+ return 1;
}
swap_free(pte_val(page));
+ return 0;
}
static inline void forget_pte(pte_t page)
@@ -314,22 +316,24 @@ static inline void forget_pte(pte_t page)
}
}
-static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
+static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
{
pte_t * pte;
+ int freed;
if (pmd_none(*pmd))
- return;
+ return 0;
if (pmd_bad(*pmd)) {
printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
pmd_clear(pmd);
- return;
+ return 0;
}
pte = pte_offset(pmd, address);
address &= ~PMD_MASK;
if (address + size > PMD_SIZE)
size = PMD_SIZE - address;
size >>= PAGE_SHIFT;
+ freed = 0;
for (;;) {
pte_t page;
if (!size)
@@ -340,32 +344,36 @@ static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned lo
if (pte_none(page))
continue;
pte_clear(pte-1);
- free_pte(page);
+ freed += free_pte(page);
}
+ return freed;
}
-static inline void zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
+static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
{
pmd_t * pmd;
unsigned long end;
+ int freed;
if (pgd_none(*dir))
- return;
+ return 0;
if (pgd_bad(*dir)) {
printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
pgd_clear(dir);
- return;
+ return 0;
}
pmd = pmd_offset(dir, address);
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
+ freed = 0;
do {
- zap_pte_range(pmd, address, end - address);
+ freed += zap_pte_range(pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
+ return freed;
}
/*
@@ -375,13 +383,22 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
{
pgd_t * dir;
unsigned long end = address + size;
+ int freed = 0;
dir = pgd_offset(mm, address);
while (address < end) {
- zap_pmd_range(dir, address, end - address);
+ freed += zap_pmd_range(dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
}
+ /*
+ * Update rss for the mm_struct (not necessarily current->mm)
+ */
+ if (mm->rss > 0) {
+ mm->rss -= freed;
+ if (mm->rss < 0)
+ mm->rss = 0;
+ }
}
static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte)
diff --git a/mm/mmap.c b/mm/mmap.c
index be225e83b..212fc5150 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -373,10 +373,12 @@ unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
* Unmapping between to intermediate points, making a hole.
*
* Case 4 involves the creation of 2 new areas, for each side of
- * the hole.
+ * the hole. If possible, we reuse the existing area rather than
+ * allocate a new one, and the return indicates whether the old
+ * area was reused.
*/
-static void unmap_fixup(struct vm_area_struct *area,
- unsigned long addr, size_t len)
+static int unmap_fixup(struct vm_area_struct *area, unsigned long addr,
+ size_t len, struct vm_area_struct **extra)
{
struct vm_area_struct *mpnt;
unsigned long end = addr + len;
@@ -391,7 +393,7 @@ static void unmap_fixup(struct vm_area_struct *area,
area->vm_ops->close(area);
if (area->vm_dentry)
dput(area->vm_dentry);
- return;
+ return 0;
}
/* Work out to one of the ends. */
@@ -403,17 +405,16 @@ static void unmap_fixup(struct vm_area_struct *area,
} else {
/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
/* Add end mapping -- leave beginning for below */
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ mpnt = *extra;
+ *extra = NULL;
- if (!mpnt)
- return;
mpnt->vm_mm = area->vm_mm;
mpnt->vm_start = end;
mpnt->vm_end = area->vm_end;
mpnt->vm_page_prot = area->vm_page_prot;
mpnt->vm_flags = area->vm_flags;
mpnt->vm_ops = area->vm_ops;
- mpnt->vm_offset += (end - area->vm_start);
+ mpnt->vm_offset = area->vm_offset + (end - area->vm_start);
mpnt->vm_dentry = dget(area->vm_dentry);
if (mpnt->vm_ops && mpnt->vm_ops->open)
mpnt->vm_ops->open(mpnt);
@@ -421,18 +422,18 @@ static void unmap_fixup(struct vm_area_struct *area,
insert_vm_struct(current->mm, mpnt);
}
- /* Construct whatever mapping is needed. */
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
- return;
- *mpnt = *area;
- if (mpnt->vm_ops && mpnt->vm_ops->open)
- mpnt->vm_ops->open(mpnt);
+ /* Close the current area ... */
if (area->vm_ops && area->vm_ops->close) {
+ end = area->vm_end; /* save new end */
area->vm_end = area->vm_start;
area->vm_ops->close(area);
+ area->vm_end = end;
}
- insert_vm_struct(current->mm, mpnt);
+ /* ... then reopen and reinsert. */
+ if (area->vm_ops && area->vm_ops->open)
+ area->vm_ops->open(area);
+ insert_vm_struct(current->mm, area);
+ return 1;
}
asmlinkage int sys_munmap(unsigned long addr, size_t len)
@@ -452,7 +453,8 @@ asmlinkage int sys_munmap(unsigned long addr, size_t len)
*/
int do_munmap(unsigned long addr, size_t len)
{
- struct vm_area_struct *mpnt, *next, *free;
+ struct vm_area_struct *mpnt, *next, *free, *extra;
+ int freed;
if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
return -EINVAL;
@@ -471,6 +473,14 @@ int do_munmap(unsigned long addr, size_t len)
if (!mpnt)
return 0;
+ /*
+ * We may need one additional vma to fix up the mappings ...
+ * and this is the last chance for an easy error exit.
+ */
+ extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!extra)
+ return -ENOMEM;
+
next = mpnt->vm_next;
/* we have mpnt->vm_next = next and addr < mpnt->vm_end */
@@ -486,19 +496,18 @@ int do_munmap(unsigned long addr, size_t len)
free = mpnt;
mpnt = next;
}
- if (free == NULL)
- return 0;
/* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and unmap the page range..
* If the one of the segments is only being partially unmapped,
* it will put new vm_area_struct(s) into the address space.
*/
- do {
+ freed = 0;
+ while ((mpnt = free) != NULL) {
unsigned long st, end, size;
- mpnt = free;
free = free->vm_next;
+ freed = 1;
remove_shared_vm_struct(mpnt);
@@ -514,12 +523,19 @@ int do_munmap(unsigned long addr, size_t len)
zap_page_range(current->mm, st, size);
flush_tlb_range(current->mm, st, end);
- unmap_fixup(mpnt, st, size);
+ /*
+ * Fix the mapping, and free the old area if it wasn't reused.
+ */
+ if (!unmap_fixup(mpnt, st, size, &extra))
+ kmem_cache_free(vm_area_cachep, mpnt);
+ }
- kmem_cache_free(vm_area_cachep, mpnt);
- } while (free);
+ /* Release the extra vma struct if it wasn't used */
+ if (extra)
+ kmem_cache_free(vm_area_cachep, extra);
- current->mm->mmap_cache = NULL; /* Kill the cache. */
+ if (freed)
+ current->mm->mmap_cache = NULL; /* Kill the cache. */
return 0;
}
diff --git a/mm/slab.c b/mm/slab.c
index fa89b9e20..19dd69a11 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1582,6 +1582,12 @@ bad_slab:
} else
kmem_report_free_err("Bad obj addr", objp, cachep);
spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+
+#if 1
+/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
+*(int *) 0 = 0;
+#endif
+
return;
null_addr:
kmem_report_free_err("NULL ptr", objp, cachep);
@@ -1624,7 +1630,7 @@ kfree(const void *objp)
goto null_ptr;
nr = MAP_NR(objp);
if (nr >= max_mapnr)
- goto null_ptr;
+ goto bad_ptr;
/* Assume we own the page structure - hence no locking.
* If someone is misbehaving (eg. someone calling us with a bad
@@ -1647,8 +1653,15 @@ kfree(const void *objp)
return;
}
}
-null_ptr:
+bad_ptr:
printk(KERN_ERR "kfree: Bad obj %p\n", objp);
+
+#if 1
+/* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL CAPS. GET THE CALL CHAIN. */
+*(int *) 0 = 0;
+#endif
+
+null_ptr:
return;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 400274268..b76b34237 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -21,6 +21,7 @@
#include <linux/malloc.h>
#include <linux/blkdev.h> /* for blk_size */
#include <linux/vmalloc.h>
+#include <linux/dcache.h>
#include <asm/dma.h>
#include <asm/system.h> /* for cli()/sti() */
@@ -169,7 +170,7 @@ void swap_free(unsigned long entry)
* from the beginning for this process..
*/
static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
- pte_t *dir, unsigned int type, unsigned long page)
+ pte_t *dir, unsigned long entry, unsigned long page)
{
pte_t pte = *dir;
@@ -178,24 +179,24 @@ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
if (pte_present(pte)) {
struct page *pg;
unsigned long page_nr = MAP_NR(pte_page(pte));
+ unsigned long pg_swap_entry;
+
if (page_nr >= max_mapnr)
return 0;
pg = mem_map + page_nr;
- if (!in_swap_cache(pg))
+ if (!(pg_swap_entry = in_swap_cache(pg)))
return 0;
- if (SWP_TYPE(in_swap_cache(pg)) != type)
+ if (SWP_TYPE(pg_swap_entry) != SWP_TYPE(entry))
return 0;
delete_from_swap_cache(pg);
set_pte(dir, pte_mkdirty(pte));
- return 0;
- }
- if (SWP_TYPE(pte_val(pte)) != type)
- return 0;
- read_swap_page(pte_val(pte), (char *) page);
- if (pte_val(*dir) != pte_val(pte)) {
+ if (pg_swap_entry != entry)
+ return 0;
free_page(page);
return 1;
}
+ if (pte_val(pte) != entry)
+ return 0;
set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
flush_tlb_page(vma, address);
++vma->vm_mm->rss;
@@ -205,7 +206,7 @@ static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
- unsigned int type, unsigned long page)
+ unsigned long entry, unsigned long page)
{
pte_t * pte;
unsigned long end;
@@ -224,7 +225,8 @@ static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
if (end > PMD_SIZE)
end = PMD_SIZE;
do {
- if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
+ if (unuse_pte(vma, offset+address-vma->vm_start, pte, entry,
+ page))
return 1;
address += PAGE_SIZE;
pte++;
@@ -234,7 +236,7 @@ static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
- unsigned int type, unsigned long page)
+ unsigned long entry, unsigned long page)
{
pmd_t * pmd;
unsigned long offset, end;
@@ -253,7 +255,8 @@ static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
+ if (unuse_pmd(vma, pmd, address, end - address, offset, entry,
+ page))
return 1;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
@@ -262,11 +265,12 @@ static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
}
static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
- unsigned long start, unsigned long end,
- unsigned int type, unsigned long page)
+ unsigned long entry, unsigned long page)
{
+ unsigned long start = vma->vm_start, end = vma->vm_end;
+
while (start < end) {
- if (unuse_pgd(vma, pgdir, start, end - start, type, page))
+ if (unuse_pgd(vma, pgdir, start, end - start, entry, page))
return 1;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
@@ -274,7 +278,8 @@ static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
return 0;
}
-static int unuse_process(struct mm_struct * mm, unsigned int type, unsigned long page)
+static int unuse_process(struct mm_struct * mm, unsigned long entry,
+ unsigned long page)
{
struct vm_area_struct* vma;
@@ -283,43 +288,70 @@ static int unuse_process(struct mm_struct * mm, unsigned int type, unsigned long
*/
if (!mm || mm == &init_mm)
return 0;
- vma = mm->mmap;
- while (vma) {
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
pgd_t * pgd = pgd_offset(mm, vma->vm_start);
- if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
+ if (unuse_vma(vma, pgd, entry, page))
return 1;
- vma = vma->vm_next;
+ }
+ return 0;
+}
+
+static unsigned long find_swap_entry(int type)
+{
+ struct swap_info_struct * p = &swap_info[type];
+ int i;
+
+ for (i = 1 ; i < p->max ; i++) {
+ if (p->swap_map[i] > 0 && p->swap_map[i] != 0x80)
+ return SWP_ENTRY(type, i);
}
return 0;
}
/*
- * To avoid races, we repeat for each process after having
- * swapped something in. That gets rid of a few pesky races,
- * and "swapoff" isn't exactly timing critical.
+ * We completely avoid races by reading each swap page in advance,
+ * and then search for the process using it. All the necessary
+ * page table adjustments can then be made atomically.
*/
static int try_to_unuse(unsigned int type)
{
- unsigned long page = get_free_page(GFP_KERNEL);
+ unsigned long page = 0;
struct task_struct *p;
+ unsigned long entry;
- if (!page)
- return -ENOMEM;
-again:
- read_lock(&tasklist_lock);
- for_each_task(p) {
- read_unlock(&tasklist_lock);
- if(unuse_process(p->mm, type, page)) {
- page = get_free_page(GFP_KERNEL);
- if(!page)
+ /*
+ * Find all swap entries in use ...
+ */
+ while ((entry = find_swap_entry(type)) != 0) {
+ if (!page) {
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
return -ENOMEM;
- goto again;
}
+
+ /*
+ * Read in the page, and then free the swap page.
+ */
+ read_swap_page(entry, (char *) page);
+
read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (unuse_process(p->mm, entry, page)) {
+ page = 0;
+ goto unlock;
+ }
+ }
+ unlock:
+ read_unlock(&tasklist_lock);
+ if (page) {
+ printk("try_to_unuse: didn't find entry %8lx\n",
+ entry);
+ swap_free(entry);
+ }
}
- read_unlock(&tasklist_lock);
- free_page(page);
+ if (page)
+ free_page(page);
return 0;
}
@@ -415,18 +447,25 @@ out:
int get_swaparea_info(char *buf)
{
+ char * page = (char *) __get_free_page(GFP_KERNEL);
struct swap_info_struct *ptr = swap_info;
int i, j, len = 0, usedswap;
+ if (!page)
+ return -ENOMEM;
+
len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
- for (i = 0 ; i < nr_swapfiles ; i++, ptr++)
+ for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
if (ptr->flags & SWP_USED) {
- len += sprintf(buf + len, "%-31s ", ptr->swap_file->d_name.name);
+ char * path = d_path(ptr->swap_file, page, PAGE_SIZE);
+
+ len += sprintf(buf + len, "%-31s ", path);
- if (ptr->swap_file)
+ if (!ptr->swap_device)
len += sprintf(buf + len, "file\t\t");
else
len += sprintf(buf + len, "partition\t");
+
usedswap = 0;
for (j = 0; j < ptr->max; ++j)
switch (ptr->swap_map[j]) {
@@ -439,6 +478,8 @@ int get_swaparea_info(char *buf)
len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
usedswap << (PAGE_SHIFT - 10), ptr->prio);
}
+ }
+ free_page((unsigned long) page);
return len;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6871c192..dba73f4ed 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -269,74 +269,78 @@ static int swap_out_process(struct task_struct * p, int dma, int wait)
return 0;
}
+/*
+ * Select the task with maximal swap_cnt and try to swap out a page.
+ * N.B. This function returns only 0 or 1. Return values != 1 from
+ * the lower level routines result in continued processing.
+ */
static int swap_out(unsigned int priority, int dma, int wait)
{
- static int skip_factor = 0;
- int limit = nr_tasks - 1;
- int loop, counter, i;
- struct task_struct *p;
-
+ struct task_struct * p, * pbest;
+ int counter, assign, max_cnt;
+
+ /*
+ * We make one or two passes through the task list, indexed by
+ * assign = {0, 1}:
+ * Pass 1: select the swappable task with maximal swap_cnt.
+ * Pass 2: assign new swap_cnt values, then select as above.
+ * With this approach, there's no need to remember the last task
+ * swapped out. If the swap-out fails, we clear swap_cnt so the
+ * task won't be selected again until all others have been tried.
+ */
counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
- if(skip_factor > nr_tasks)
- skip_factor = 0;
-
- read_lock(&tasklist_lock);
- p = init_task.next_task;
- i = skip_factor;
- while(i--)
- p = p->next_task;
- for(; counter >= 0; counter--) {
- /* Check if task is suitable for swapping. */
- loop = 0;
- while(1) {
- if(!--limit) {
- limit = nr_tasks - 1;
- /* See if all processes are unswappable or
- * already swapped out.
+ for (; counter >= 0; counter--) {
+ assign = 0;
+ max_cnt = 0;
+ pbest = NULL;
+ select:
+ read_lock(&tasklist_lock);
+ p = init_task.next_task;
+ for (; p != &init_task; p = p->next_task) {
+ if (!p->swappable)
+ continue;
+ if (p->mm->rss <= 0)
+ continue;
+ if (assign) {
+ /*
+ * If we didn't select a task on pass 1,
+ * assign each task a new swap_cnt.
+ * Normalise the number of pages swapped
+ * by multiplying by (RSS / 1MB)
*/
- if (loop)
- goto out;
- loop = 1;
+ p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
+ }
+ if (p->swap_cnt > max_cnt) {
+ max_cnt = p->swap_cnt;
+ pbest = p;
}
- if (p->swappable && p->mm->rss)
- break;
- if((p = p->next_task) == &init_task)
- p = p->next_task;
- }
- skip_factor++;
-
- /* Determine the number of pages to swap from this process. */
- if (!p->swap_cnt) {
- /* Normalise the number of pages swapped by
- multiplying by (RSS / 1MB) */
- p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
}
- if (!--p->swap_cnt)
- skip_factor++;
read_unlock(&tasklist_lock);
+ if (!pbest) {
+ if (!assign) {
+ assign = 1;
+ goto select;
+ }
+ goto out;
+ }
+ pbest->swap_cnt--;
- switch (swap_out_process(p, dma, wait)) {
+ switch (swap_out_process(pbest, dma, wait)) {
case 0:
- if (p->swap_cnt)
- skip_factor++;
+ /*
+ * Clear swap_cnt so we don't look at this task
+ * again until we've tried all of the others.
+ * (We didn't block, so the task is still here.)
+ */
+ pbest->swap_cnt = 0;
break;
case 1:
return 1;
default:
break;
};
-
- /* Whoever we swapped may not even exist now, in fact we cannot
- * assume anything about the list we were searching previously.
- */
- read_lock(&tasklist_lock);
- p = init_task.next_task;
- i = skip_factor;
- while(i--)
- p = p->next_task;
}
out:
- read_unlock(&tasklist_lock);
return 0;
}
@@ -362,9 +366,6 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
return 1;
state = 1;
case 1:
- shrink_dcache();
- state = 2;
- case 2:
/*
* We shouldn't have a priority here:
* If we're low on memory we should
@@ -373,11 +374,11 @@ static inline int do_try_to_free_page(int priority, int dma, int wait)
*/
if (kmem_cache_reap(0, dma, wait))
return 1;
- state = 3;
- case 3:
+ state = 2;
+ case 2:
if (shm_swap(i, dma))
return 1;
- state = 4;
+ state = 3;
default:
if (swap_out(i, dma, wait))
return 1;
@@ -477,31 +478,23 @@ int kswapd(void *unused)
void swap_tick(void)
{
- int want_wakeup = 0;
- static int last_wakeup_low = 0;
-
- if ((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_low) {
- if (last_wakeup_low)
- want_wakeup = jiffies >= next_swap_jiffies;
- else
- last_wakeup_low = want_wakeup = 1;
- }
- else if (((nr_free_pages + atomic_read(&nr_async_pages)) < free_pages_high) &&
- jiffies >= next_swap_jiffies) {
- last_wakeup_low = 0;
+ int want_wakeup = 0, memory_low = 0;
+ int pages = nr_free_pages + atomic_read(&nr_async_pages);
+
+ if (pages < free_pages_low)
+ memory_low = want_wakeup = 1;
+ else if (pages < free_pages_high && jiffies >= next_swap_jiffies)
want_wakeup = 1;
- }
if (want_wakeup) {
if (!kswapd_awake) {
wake_up(&kswapd_wait);
need_resched = 1;
}
- /* low on memory, we need to start swapping soon */
- if(last_wakeup_low)
- next_swap_jiffies = jiffies;
- else
- next_swap_jiffies = jiffies + swapout_interval;
+ /* Set the next wake-up time */
+ next_swap_jiffies = jiffies;
+ if (!memory_low)
+ next_swap_jiffies += swapout_interval;
}
timer_active |= (1<<SWAP_TIMER);
}