diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-03-19 01:28:40 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-03-19 01:28:40 +0000 |
commit | 8abb719409c9060a7c0676f76e9182c1e0b8ca46 (patch) | |
tree | b88cc5a6cd513a04a512b7e6215c873c90a1c5dd /mm | |
parent | f01bd7aeafd95a08aafc9e3636bb26974df69d82 (diff) |
Merge with 2.3.99-pre1.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 577 | ||||
-rw-r--r-- | mm/mlock.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/mprotect.c | 5 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 46 | ||||
-rw-r--r-- | mm/vmscan.c | 3 |
7 files changed, 606 insertions, 36 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index b5febc2e5..3fb7d011c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -25,6 +25,7 @@ #include <asm/pgalloc.h> #include <asm/uaccess.h> +#include <asm/mman.h> #include <linux/highmem.h> @@ -220,15 +221,18 @@ int shrink_mmap(int priority, int gfp_mask, zone_t *zone) struct list_head * page_lru, * dispose; struct page * page; + if (!zone) + BUG(); + count = nr_lru_pages / (priority+1); spin_lock(&pagemap_lru_lock); - while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) { + while (count > 0 && (page_lru = zone->lru_cache.prev) != &zone->lru_cache) { page = list_entry(page_lru, struct page, lru); list_del(page_lru); - dispose = &lru_cache; + dispose = &zone->lru_cache; if (test_and_clear_bit(PG_referenced, &page->flags)) /* Roll the page at the top of the lru list, * we could also be more aggressive putting @@ -355,8 +359,8 @@ made_buffer_progress: nr_lru_pages--; out: - list_splice(&young, &lru_cache); - list_splice(&old, lru_cache.prev); + list_splice(&young, &zone->lru_cache); + list_splice(&old, zone->lru_cache.prev); spin_unlock(&pagemap_lru_lock); @@ -1294,6 +1298,61 @@ out: } /* + * Read-ahead and flush behind for MADV_SEQUENTIAL areas. Since we are + * sure this is sequential access, we don't need a flexible read-ahead + * window size -- we can always use a large fixed size window. + */ +static void nopage_sequential_readahead(struct vm_area_struct * vma, + unsigned long pgoff, unsigned long filesize) +{ + unsigned long ra_window; + + ra_window = get_max_readahead(vma->vm_file->f_dentry->d_inode); + ra_window = CLUSTER_OFFSET(ra_window + CLUSTER_PAGES - 1); + + /* vm_raend is zero if we haven't read ahead in this area yet. */ + if (vma->vm_raend == 0) + vma->vm_raend = vma->vm_pgoff + ra_window; + + /* + * If we've just faulted the page half-way through our window, + * then schedule reads for the next window, and release the + * pages in the previous window. + */ + if ((pgoff + (ra_window >> 1)) == vma->vm_raend) { + unsigned long start = vma->vm_pgoff + vma->vm_raend; + unsigned long end = start + ra_window; + + if (end > ((vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff)) + end = (vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff; + if (start > end) + return; + + while ((start < end) && (start < filesize)) { + if (read_cluster_nonblocking(vma->vm_file, + start, filesize) < 0) + break; + start += CLUSTER_PAGES; + } + run_task_queue(&tq_disk); + + /* if we're far enough past the beginning of this area, + recycle pages that are in the previous window. */ + if (vma->vm_raend > (vma->vm_pgoff + ra_window + ra_window)) { + unsigned long window = ra_window << PAGE_SHIFT; + + end = vma->vm_start + (vma->vm_raend << PAGE_SHIFT); + end -= window + window; + filemap_sync(vma, end - window, window, MS_INVALIDATE); + } + + vma->vm_raend += ra_window; + } + + return; +} + +/* * filemap_nopage() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * @@ -1339,6 +1398,12 @@ retry_find: goto page_not_uptodate; success: + /* + * Try read-ahead for sequential areas. + */ + if (VM_SequentialReadHint(area)) + nopage_sequential_readahead(area, pgoff, size); + /* * Found the page and have a reference on it, need to check sharing * and possibly copy it over to another page.. @@ -1355,7 +1420,7 @@ success: page_cache_release(page); return new_page; } - + flush_page_to_ram(old_page); return old_page; @@ -1367,7 +1432,7 @@ no_cached_page: * Otherwise, we're off the end of a privately mapped file, * so we need to map a zero page. */ - if (pgoff < size) + if ((pgoff < size) && !VM_RandomReadHint(area)) error = read_cluster_nonblocking(file, pgoff, size); else error = page_cache_read(file, pgoff); @@ -1646,7 +1711,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) return 0; } - /* * The msync() system call. */ @@ -1727,6 +1791,505 @@ out: return error; } +static inline void setup_read_behavior(struct vm_area_struct * vma, + int behavior) +{ + VM_ClearReadHint(vma); + switch(behavior) { + case MADV_SEQUENTIAL: + vma->vm_flags |= VM_SEQ_READ; + break; + case MADV_RANDOM: + vma->vm_flags |= VM_RAND_READ; + break; + default: + break; + } + return; +} + +static long madvise_fixup_start(struct vm_area_struct * vma, + unsigned long end, int behavior) +{ + struct vm_area_struct * n; + + n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!n) + return -EAGAIN; + *n = *vma; + n->vm_end = end; + setup_read_behavior(n, behavior); + n->vm_raend = 0; + get_file(n->vm_file); + if (n->vm_ops && n->vm_ops->open) + n->vm_ops->open(n); + vmlist_modify_lock(vma->vm_mm); + vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; + vma->vm_start = end; + insert_vm_struct(current->mm, n); + vmlist_modify_unlock(vma->vm_mm); + return 0; +} + +static long madvise_fixup_end(struct vm_area_struct * vma, + unsigned long start, int behavior) +{ + struct vm_area_struct * n; + + n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!n) + return -EAGAIN; + *n = *vma; + n->vm_start = start; + n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; + setup_read_behavior(n, behavior); + n->vm_raend = 0; + get_file(n->vm_file); + if (n->vm_ops && n->vm_ops->open) + n->vm_ops->open(n); + vmlist_modify_lock(vma->vm_mm); + vma->vm_end = start; + insert_vm_struct(current->mm, n); + vmlist_modify_unlock(vma->vm_mm); + return 0; +} + +static long madvise_fixup_middle(struct vm_area_struct * vma, + unsigned long start, unsigned long end, int behavior) +{ + struct vm_area_struct * left, * right; + + left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!left) + return -EAGAIN; + right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!right) { + kmem_cache_free(vm_area_cachep, left); + return -EAGAIN; + } + *left = *vma; + *right = *vma; + left->vm_end = start; + right->vm_start = end; + right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; + left->vm_raend = 0; + right->vm_raend = 0; + atomic_add(2, &vma->vm_file->f_count); + + if (vma->vm_ops && vma->vm_ops->open) { + vma->vm_ops->open(left); + vma->vm_ops->open(right); + } + vmlist_modify_lock(vma->vm_mm); + vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; + vma->vm_start = start; + vma->vm_end = end; + setup_read_behavior(vma, behavior); + vma->vm_raend = 0; + insert_vm_struct(current->mm, left); + insert_vm_struct(current->mm, right); + vmlist_modify_unlock(vma->vm_mm); + return 0; +} + +/* + * We can potentially split a vm area into separate + * areas, each area with its own behavior. + */ +static long madvise_behavior(struct vm_area_struct * vma, + unsigned long start, unsigned long end, int behavior) +{ + int error = 0; + + /* This caps the number of vma's this process can own */ + if (vma->vm_mm->map_count > MAX_MAP_COUNT) + return -ENOMEM; + + if (start == vma->vm_start) { + if (end == vma->vm_end) { + setup_read_behavior(vma, behavior); + vma->vm_raend = 0; + } else + error = madvise_fixup_start(vma, end, behavior); + } else { + if (end == vma->vm_end) + error = madvise_fixup_end(vma, start, behavior); + else + error = madvise_fixup_middle(vma, start, end, behavior); + } + + return error; +} + +/* + * Schedule all required I/O operations, then run the disk queue + * to make sure they are started. Do not wait for completion. + */ +static long madvise_willneed(struct vm_area_struct * vma, + unsigned long start, unsigned long end) +{ + long error = -EBADF; + struct file * file; + unsigned long size, rlim_rss; + + /* Doesn't work if there's no mapped file. */ + if (!vma->vm_file) + return error; + file = vma->vm_file; + size = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (end > vma->vm_end) + end = vma->vm_end; + end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + + /* Make sure this doesn't exceed the process's max rss. */ + error = -EIO; + rlim_rss = current->rlim ? current->rlim[RLIMIT_RSS].rlim_cur : + LONG_MAX; /* default: see resource.h */ + if ((vma->vm_mm->rss + (end - start)) > rlim_rss) + return error; + + /* round to cluster boundaries if this isn't a "random" area. */ + if (!VM_RandomReadHint(vma)) { + start = CLUSTER_OFFSET(start); + end = CLUSTER_OFFSET(end + CLUSTER_PAGES - 1); + + while ((start < end) && (start < size)) { + error = read_cluster_nonblocking(file, start, size); + start += CLUSTER_PAGES; + if (error < 0) + break; + } + } else { + while ((start < end) && (start < size)) { + error = page_cache_read(file, start); + start++; + if (error < 0) + break; + } + } + + /* Don't wait for someone else to push these requests. */ + run_task_queue(&tq_disk); + + return error; +} + +/* + * Application no longer needs these pages. If the pages are dirty, + * it's OK to just throw them away. The app will be more careful about + * data it wants to keep. Be sure to free swap resources too. The + * zap_page_range call sets things up for shrink_mmap to actually free + * these pages later if no one else has touched them in the meantime, + * although we could add these pages to a global reuse list for + * shrink_mmap to pick up before reclaiming other pages. + * + * NB: This interface discards data rather than pushes it out to swap, + * as some implementations do. This has performance implications for + * applications like large transactional databases which want to discard + * pages in anonymous maps after committing to backing store the data + * that was kept in them. There is no reason to write this data out to + * the swap area if the application is discarding it. + * + * An interface that causes the system to free clean pages and flush + * dirty pages is already available as msync(MS_INVALIDATE). + */ +static long madvise_dontneed(struct vm_area_struct * vma, + unsigned long start, unsigned long end) +{ + if (vma->vm_flags & VM_LOCKED) + return -EINVAL; + + lock_kernel(); /* is this really necessary? */ + + flush_cache_range(vma->vm_mm, start, end); + zap_page_range(vma->vm_mm, start, end - start); + flush_tlb_range(vma->vm_mm, start, end); + + unlock_kernel(); + return 0; +} + +static long madvise_vma(struct vm_area_struct * vma, unsigned long start, + unsigned long end, int behavior) +{ + long error = -EBADF; + + switch (behavior) { + case MADV_NORMAL: + case MADV_SEQUENTIAL: + case MADV_RANDOM: + error = madvise_behavior(vma, start, end, behavior); + break; + + case MADV_WILLNEED: + error = madvise_willneed(vma, start, end); + break; + + case MADV_DONTNEED: + error = madvise_dontneed(vma, start, end); + break; + + default: + error = -EINVAL; + break; + } + + return error; +} + +/* + * The madvise(2) system call. + * + * Applications can use madvise() to advise the kernel how it should + * handle paging I/O in this VM area. The idea is to help the kernel + * use appropriate read-ahead and caching techniques. The information + * provided is advisory only, and can be safely disregarded by the + * kernel without affecting the correct operation of the application. + * + * behavior values: + * MADV_NORMAL - the default behavior is to read clusters. This + * results in some read-ahead and read-behind. + * MADV_RANDOM - the system should read the minimum amount of data + * on any access, since it is unlikely that the appli- + * cation will need more than what it asks for. + * MADV_SEQUENTIAL - pages in the given range will probably be accessed + * once, so they can be aggressively read ahead, and + * can be freed soon after they are accessed. + * MADV_WILLNEED - the application is notifying the system to read + * some pages ahead. + * MADV_DONTNEED - the application is finished with the given range, + * so the kernel can free resources associated with it. + * + * return values: + * zero - success + * -EINVAL - start + len < 0, start is not page-aligned, + * "behavior" is not a valid value, or application + * is attempting to release locked or shared pages. + * -ENOMEM - addresses in the specified range are not currently + * mapped, or are outside the AS of the process. + * -EIO - an I/O error occurred while paging in data. + * -EBADF - map exists, but area maps something that isn't a file. + * -EAGAIN - a kernel resource was temporarily unavailable. + */ +asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior) +{ + unsigned long end; + struct vm_area_struct * vma; + int unmapped_error = 0; + int error = -EINVAL; + + down(¤t->mm->mmap_sem); + + if (start & ~PAGE_MASK) + goto out; + len = (len + ~PAGE_MASK) & PAGE_MASK; + end = start + len; + if (end < start) + goto out; + + error = 0; + if (end == start) + goto out; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + */ + vma = find_vma(current->mm, start); + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + goto out; + + /* Here start < vma->vm_end. */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + } + + /* Here vma->vm_start <= start < vma->vm_end. */ + if (end <= vma->vm_end) { + if (start < end) { + error = madvise_vma(vma, start, end, + behavior); + if (error) + goto out; + } + error = unmapped_error; + goto out; + } + + /* Here vma->vm_start <= start < vma->vm_end < end. */ + error = madvise_vma(vma, start, vma->vm_end, behavior); + if (error) + goto out; + start = vma->vm_end; + vma = vma->vm_next; + } + +out: + up(¤t->mm->mmap_sem); + return error; +} + +/* + * Later we can get more picky about what "in core" means precisely. + * For now, simply check to see if the page is in the page cache, + * and is up to date; i.e. that no page-in operation would be required + * at this time if an application were to map and access this page. + */ +static unsigned char mincore_page(struct vm_area_struct * vma, + unsigned long pgoff) +{ + unsigned char present = 0; + struct address_space * as = &vma->vm_file->f_dentry->d_inode->i_data; + struct page * page, ** hash = page_hash(as, pgoff); + + spin_lock(&pagecache_lock); + page = __find_page_nolock(as, pgoff, *hash); + if ((page) && (Page_Uptodate(page))) + present = 1; + spin_unlock(&pagecache_lock); + + return present; +} + +static long mincore_vma(struct vm_area_struct * vma, + unsigned long start, unsigned long end, unsigned char * vec) +{ + long error, i, remaining; + unsigned char * tmp; + + error = -ENOMEM; + if (!vma->vm_file) + return error; + + start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (end > vma->vm_end) + end = vma->vm_end; + end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + + error = -EAGAIN; + tmp = (unsigned char *) __get_free_page(GFP_KERNEL); + if (!tmp) + return error; + + /* (end - start) is # of pages, and also # of bytes in "vec */ + remaining = (end - start), + + error = 0; + for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { + int j = 0; + long thispiece = (remaining < PAGE_SIZE) ? + remaining : PAGE_SIZE; + + while (j < thispiece) + tmp[j++] = mincore_page(vma, start++); + + if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { + error = -EFAULT; + break; + } + } + + free_page((unsigned long) tmp); + return error; +} + +/* + * The mincore(2) system call. + * + * mincore() returns the memory residency status of the pages in the + * current process's address space specified by [addr, addr + len). + * The status is returned in a vector of bytes. The least significant + * bit of each byte is 1 if the referenced page is in memory, otherwise + * it is zero. + * + * Because the status of a page can change after mincore() checks it + * but before it returns to the application, the returned vector may + * contain stale information. Only locked pages are guaranteed to + * remain in memory. + * + * return values: + * zero - success + * -EFAULT - vec points to an illegal address + * -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE, + * or len has a nonpositive value + * -ENOMEM - Addresses in the range [addr, addr + len] are + * invalid for the address space of this process, or + * specify one or more pages which are not currently + * mapped + * -EAGAIN - A kernel resource was temporarily unavailable. + */ +asmlinkage long sys_mincore(unsigned long start, size_t len, + unsigned char * vec) +{ + int index = 0; + unsigned long end; + struct vm_area_struct * vma; + int unmapped_error = 0; + long error = -EINVAL; + + down(¤t->mm->mmap_sem); + + if (start & ~PAGE_MASK) + goto out; + len = (len + ~PAGE_MASK) & PAGE_MASK; + end = start + len; + if (end < start) + goto out; + + error = 0; + if (end == start) + goto out; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + */ + vma = find_vma(current->mm, start); + for (;;) { + /* Still start < end. */ + error = -ENOMEM; + if (!vma) + goto out; + + /* Here start < vma->vm_end. */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + } + + /* Here vma->vm_start <= start < vma->vm_end. */ + if (end <= vma->vm_end) { + if (start < end) { + error = mincore_vma(vma, start, end, + &vec[index]); + if (error) + goto out; + } + error = unmapped_error; + goto out; + } + + /* Here vma->vm_start <= start < vma->vm_end < end. */ + error = mincore_vma(vma, start, vma->vm_end, &vec[index]); + if (error) + goto out; + index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT; + start = vma->vm_end; + vma = vma->vm_next; + } + +out: + up(¤t->mm->mmap_sem); + return error; +} + struct page *read_cache_page(struct address_space *mapping, unsigned long index, int (*filler)(void *,struct page*), diff --git a/mm/mlock.c b/mm/mlock.c index c3e40db54..a3d10ff99 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -31,6 +31,7 @@ static inline int mlock_fixup_start(struct vm_area_struct * vma, *n = *vma; n->vm_end = end; n->vm_flags = newflags; + n->vm_raend = 0; if (n->vm_file) get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) @@ -55,6 +56,7 @@ static inline int mlock_fixup_end(struct vm_area_struct * vma, n->vm_start = start; n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; n->vm_flags = newflags; + n->vm_raend = 0; if (n->vm_file) get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) @@ -85,6 +87,8 @@ static inline int mlock_fixup_middle(struct vm_area_struct * vma, right->vm_start = end; right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; vma->vm_flags = newflags; + left->vm_raend = 0; + right->vm_raend = 0; if (vma->vm_file) atomic_add(2, &vma->vm_file->f_count); @@ -97,6 +101,7 @@ static inline int mlock_fixup_middle(struct vm_area_struct * vma, vma->vm_start = start; vma->vm_end = end; vma->vm_flags = newflags; + vma->vm_raend = 0; insert_vm_struct(current->mm, left); insert_vm_struct(current->mm, right); vmlist_modify_unlock(vma->vm_mm); @@ -249,6 +249,9 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon vma->vm_flags = vm_flags(prot,flags) | mm->def_flags; if (file) { + VM_ClearReadHint(vma); + vma->vm_raend = 0; + if (file->f_mode & 1) vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_SHARED) { @@ -549,6 +552,7 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area, mpnt->vm_end = area->vm_end; mpnt->vm_page_prot = area->vm_page_prot; mpnt->vm_flags = area->vm_flags; + mpnt->vm_raend = 0; mpnt->vm_ops = area->vm_ops; mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT); mpnt->vm_file = area->vm_file; diff --git a/mm/mprotect.c b/mm/mprotect.c index 70f1d8e2c..53fc53acb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -105,6 +105,7 @@ static inline int mprotect_fixup_start(struct vm_area_struct * vma, *n = *vma; n->vm_end = end; n->vm_flags = newflags; + n->vm_raend = 0; n->vm_page_prot = prot; if (n->vm_file) get_file(n->vm_file); @@ -131,6 +132,7 @@ static inline int mprotect_fixup_end(struct vm_area_struct * vma, n->vm_start = start; n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; n->vm_flags = newflags; + n->vm_raend = 0; n->vm_page_prot = prot; if (n->vm_file) get_file(n->vm_file); @@ -162,6 +164,8 @@ static inline int mprotect_fixup_middle(struct vm_area_struct * vma, left->vm_end = start; right->vm_start = end; right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; + left->vm_raend = 0; + right->vm_raend = 0; if (vma->vm_file) atomic_add(2,&vma->vm_file->f_count); if (vma->vm_ops && vma->vm_ops->open) { @@ -173,6 +177,7 @@ static inline int mprotect_fixup_middle(struct vm_area_struct * vma, vma->vm_start = start; vma->vm_end = end; vma->vm_flags = newflags; + vma->vm_raend = 0; vma->vm_page_prot = prot; insert_vm_struct(current->mm, left); insert_vm_struct(current->mm, right); diff --git a/mm/mremap.c b/mm/mremap.c index 5721fc5d5..d8d18cf62 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -135,8 +135,8 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, *new_vma = *vma; new_vma->vm_start = new_addr; new_vma->vm_end = new_addr+new_len; - new_vma->vm_pgoff = vma->vm_pgoff; new_vma->vm_pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; + new_vma->vm_raend = 0; if (new_vma->vm_file) get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 02ba33bd5..0204cf141 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -26,7 +26,6 @@ int nr_swap_pages = 0; int nr_lru_pages; -LIST_HEAD(lru_cache); pg_data_t *pgdat_list = (pg_data_t *)0; static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; @@ -59,6 +58,19 @@ static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, }; */ #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size)) +static inline unsigned long classfree(zone_t *zone) +{ + unsigned long free = 0; + zone_t *z = zone->zone_pgdat->node_zones; + + while (z != zone) { + free += z->free_pages; + z++; + } + free += zone->free_pages; + return(free); +} + /* * Buddy system. Hairy. You really aren't expected to understand this * @@ -135,6 +147,9 @@ void __free_pages_ok (struct page *page, unsigned long order) memlist_add_head(&(base + page_idx)->list, &area->free_list); spin_unlock_irqrestore(&zone->lock, flags); + + if (classfree(zone) > zone->pages_high) + zone->zone_wake_kswapd = 0; } #define MARK_USED(index, order, area) \ @@ -201,19 +216,6 @@ static inline struct page * rmqueue (zone_t *zone, unsigned long order) return NULL; } -static inline unsigned long classfree(zone_t *zone) -{ - unsigned long free = 0; - zone_t *z = zone->zone_pgdat->node_zones; - - while (z != zone) { - free += z->free_pages; - z++; - } - free += zone->free_pages; - return(free); -} - static inline int zone_balance_memory (zone_t *zone, int gfp_mask) { int freed; @@ -263,21 +265,12 @@ struct page * __alloc_pages (zonelist_t *zonelist, unsigned long order) { unsigned long free = classfree(z); - if (free > z->pages_high) - { - if (z->low_on_memory) - z->low_on_memory = 0; - z->zone_wake_kswapd = 0; - } - else + if (free <= z->pages_high) { extern wait_queue_head_t kswapd_wait; - if (free <= z->pages_low) { - z->zone_wake_kswapd = 1; - wake_up_interruptible(&kswapd_wait); - } else - z->zone_wake_kswapd = 0; + z->zone_wake_kswapd = 1; + wake_up_interruptible(&kswapd_wait); if (free <= z->pages_min) z->low_on_memory = 1; @@ -585,6 +578,7 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, unsigned long bitmap_size; memlist_init(&zone->free_area[i].free_list); + memlist_init(&zone->lru_cache); mask += mask; size = (size + ~mask) & mask; bitmap_size = size >> i; diff --git a/mm/vmscan.c b/mm/vmscan.c index 603b9a2e0..d3dfb8db6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -504,8 +504,7 @@ int kswapd(void *unused) while (pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { zone = pgdat->node_zones + i; - if ((!zone->size) || - (!zone->zone_wake_kswapd)) + if ((!zone->size) || (!zone->zone_wake_kswapd)) continue; do_try_to_free_pages(GFP_KSWAPD, zone); } |