summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-09-28 22:25:29 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-09-28 22:25:29 +0000
commit0ae8dceaebe3659ee0c3352c08125f403e77ebca (patch)
tree5085c389f09da78182b899d19fe1068b619a69dd /mm
parent273767781288c35c9d679e908672b9996cda4c34 (diff)
Merge with 2.3.10.
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c24
-rw-r--r--mm/memory.c256
-rw-r--r--mm/mlock.c12
-rw-r--r--mm/mmap.c24
-rw-r--r--mm/mprotect.c6
-rw-r--r--mm/mremap.c13
-rw-r--r--mm/page_alloc.c87
-rw-r--r--mm/vmscan.c17
8 files changed, 203 insertions, 236 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index ed5b6d34c..668c6c99f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1194,8 +1194,6 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou
struct file * in_file, * out_file;
struct inode * in_inode, * out_inode;
- lock_kernel();
-
/*
* Get input file, and verify that it is ok..
*/
@@ -1234,7 +1232,6 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou
if (retval)
goto fput_out;
- unlock_kernel();
retval = 0;
if (count) {
read_descriptor_t desc;
@@ -1244,7 +1241,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou
ppos = &in_file->f_pos;
if (offset) {
if (get_user(pos, offset))
- goto fput_out_lock;
+ goto fput_out;
ppos = &pos;
}
@@ -1261,14 +1258,11 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t cou
put_user(pos, offset);
}
-fput_out_lock:
- lock_kernel();
fput_out:
fput(out_file);
fput_in:
fput(in_file);
out:
- unlock_kernel();
return retval;
}
@@ -1297,9 +1291,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long
new_page = 0;
offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
- goto no_page_nolock;
-
- unlock_kernel();
+ goto no_page;
/*
* Do we have something in the page cache already?
@@ -1344,7 +1336,6 @@ success:
page_cache_free(new_page);
flush_page_to_ram(old_page);
- lock_kernel();
return old_page;
}
@@ -1354,7 +1345,6 @@ success:
copy_page(new_page, old_page);
flush_page_to_ram(new_page);
page_cache_release(page);
- lock_kernel();
return new_page;
no_cached_page:
@@ -1431,8 +1421,6 @@ failure:
if (new_page)
page_cache_free(new_page);
no_page:
- lock_kernel();
-no_page_nolock:
return 0;
}
@@ -1487,7 +1475,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
* If a task terminates while we're swapping the page, the vma and
* and file could be released ... increment the count to be safe.
*/
- atomic_inc(&file->f_count);
+ get_file(file);
result = do_write_page(inode, file, (const char *) page, offset);
fput(file);
return result;
@@ -1648,8 +1636,7 @@ static struct vm_operations_struct file_shared_mmap = {
NULL, /* advise */
filemap_nopage, /* nopage */
NULL, /* wppage */
- filemap_swapout, /* swapout */
- NULL, /* swapin */
+ filemap_swapout /* swapout */
};
/*
@@ -1667,8 +1654,7 @@ static struct vm_operations_struct file_private_mmap = {
NULL, /* advise */
filemap_nopage, /* nopage */
NULL, /* wppage */
- NULL, /* swapout */
- NULL, /* swapin */
+ NULL /* swapout */
};
/* This is used for a general mmap of a disk file */
diff --git a/mm/memory.c b/mm/memory.c
index aac203bbb..a31e862b2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -36,7 +36,9 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/swap.h>
+#include <linux/pagemap.h>
#include <linux/smp_lock.h>
+#include <linux/swapctl.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -320,7 +322,7 @@ static inline void forget_pte(pte_t page)
}
}
-static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
+static inline int zap_pte_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size)
{
pte_t * pte;
int freed;
@@ -345,15 +347,15 @@ static inline int zap_pte_range(pmd_t * pmd, unsigned long address, unsigned lon
page = *pte;
pte++;
size--;
+ pte_clear(pte-1);
if (pte_none(page))
continue;
- pte_clear(pte-1);
freed += free_pte(page);
}
return freed;
}
-static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
+static inline int zap_pmd_range(struct mm_struct *mm, pgd_t * dir, unsigned long address, unsigned long size)
{
pmd_t * pmd;
unsigned long end;
@@ -373,7 +375,7 @@ static inline int zap_pmd_range(pgd_t * dir, unsigned long address, unsigned lon
end = PGDIR_SIZE;
freed = 0;
do {
- freed += zap_pte_range(pmd, address, end - address);
+ freed += zap_pte_range(mm, pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -390,11 +392,21 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
int freed = 0;
dir = pgd_offset(mm, address);
+
+ /*
+ * This is a long-lived spinlock. That's fine.
+ * There's no contention, because the page table
+ * lock only protects against kswapd anyway, and
+ * even if kswapd happened to be looking at this
+ * process we _want_ it to get stuck.
+ */
+ spin_lock(&mm->page_table_lock);
while (address < end) {
- freed += zap_pmd_range(dir, address, end - address);
+ freed += zap_pmd_range(mm, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
}
+ spin_unlock(&mm->page_table_lock);
/*
* Update rss for the mm_struct (not necessarily current->mm)
*/
@@ -599,17 +611,16 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig
* We also mark the page dirty at this point even though the page will
* change only once the write actually happens. This avoids a few races,
* and potentially makes it more efficient.
+ *
+ * We enter with the page table read-lock held, and need to exit without
+ * it.
*/
static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte)
{
unsigned long old_page, new_page;
struct page * page;
-
- new_page = __get_free_page(GFP_USER);
- /* Did swap_out() unmap the protected page while we slept? */
- if (pte_val(*page_table) != pte_val(pte))
- goto end_wp_page;
+
old_page = pte_page(pte);
if (MAP_NR(old_page) >= max_mapnr)
goto bad_wp_page;
@@ -634,44 +645,44 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
/* FallThrough */
case 1:
flush_cache_page(vma, address);
- set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
+ set_pte(page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
flush_tlb_page(vma, address);
-end_wp_page:
- /*
- * We can release the kernel lock now.. Now swap_out will see
- * a dirty page and so won't get confused and flush_tlb_page
- * won't SMP race. -Andrea
- */
- unlock_kernel();
-
- if (new_page)
- free_page(new_page);
+ spin_unlock(&tsk->mm->page_table_lock);
return 1;
}
-
+
+ /*
+ * Ok, we need to copy. Oh, well..
+ */
+ spin_unlock(&tsk->mm->page_table_lock);
+ new_page = __get_free_page(GFP_USER);
if (!new_page)
- goto no_new_page;
+ return -1;
+ spin_lock(&tsk->mm->page_table_lock);
- if (PageReserved(page))
- ++vma->vm_mm->rss;
- copy_cow_page(old_page,new_page);
- flush_page_to_ram(old_page);
- flush_page_to_ram(new_page);
- flush_cache_page(vma, address);
- set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
- flush_tlb_page(vma, address);
- unlock_kernel();
- __free_page(page);
+ /*
+ * Re-check the pte - we dropped the lock
+ */
+ if (pte_val(*page_table) == pte_val(pte)) {
+ if (PageReserved(page))
+ ++vma->vm_mm->rss;
+ copy_cow_page(old_page,new_page);
+ flush_page_to_ram(old_page);
+ flush_page_to_ram(new_page);
+ flush_cache_page(vma, address);
+ set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
+ flush_tlb_page(vma, address);
+
+ /* Free the old page.. */
+ new_page = old_page;
+ }
+ spin_unlock(&tsk->mm->page_table_lock);
+ free_page(new_page);
return 1;
bad_wp_page:
printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
- send_sig(SIGKILL, tsk, 1);
-no_new_page:
- unlock_kernel();
- if (new_page)
- free_page(new_page);
- return 0;
+ return -1;
}
/*
@@ -725,8 +736,9 @@ void vmtruncate(struct inode * inode, unsigned long offset)
struct vm_area_struct * mpnt;
truncate_inode_pages(inode, offset);
+ spin_lock(&inode->i_shared_lock);
if (!inode->i_mmap)
- return;
+ goto out_unlock;
mpnt = inode->i_mmap;
do {
struct mm_struct *mm = mpnt->vm_mm;
@@ -757,35 +769,81 @@ void vmtruncate(struct inode * inode, unsigned long offset)
zap_page_range(mm, start, len);
flush_tlb_range(mm, start, end);
} while ((mpnt = mpnt->vm_next_share) != NULL);
+out_unlock:
+ spin_unlock(&inode->i_shared_lock);
}
-/*
- * This is called with the kernel lock held, we need
- * to return without it.
+
+/*
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (1 << page_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time. We also make sure to queue
+ * the 'original' request together with the readahead ones...
*/
-static int do_swap_page(struct task_struct * tsk,
+static void swapin_readahead(unsigned long entry)
+{
+ int i;
+ struct page *new_page;
+ unsigned long offset = SWP_OFFSET(entry);
+ struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
+
+ offset = (offset >> page_cluster) << page_cluster;
+
+ i = 1 << page_cluster;
+ do {
+ /* Don't read-ahead past the end of the swap area */
+ if (offset >= swapdev->max)
+ break;
+ /* Don't block on I/O for read-ahead */
+ if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster)
+ break;
+ /* Don't read in bad or busy pages */
+ if (!swapdev->swap_map[offset])
+ break;
+ if (swapdev->swap_map[offset] == SWAP_MAP_BAD)
+ break;
+
+ /* Ok, do the async read-ahead now */
+ new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
+ if (new_page != NULL)
+ __free_page(new_page);
+ offset++;
+ } while (--i);
+ return;
+}
+
+static int do_swap_page(struct task_struct * tsk,
struct vm_area_struct * vma, unsigned long address,
- pte_t * page_table, pte_t entry, int write_access)
+ pte_t * page_table, unsigned long entry, int write_access)
{
- if (!vma->vm_ops || !vma->vm_ops->swapin) {
- swap_in(tsk, vma, page_table, pte_val(entry), write_access);
- flush_page_to_ram(pte_page(*page_table));
- } else {
- pte_t page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
- if (pte_val(*page_table) != pte_val(entry)) {
- free_page(pte_page(page));
- } else {
- if (page_count(mem_map + MAP_NR(pte_page(page))) > 1 &&
- !(vma->vm_flags & VM_SHARED))
- page = pte_wrprotect(page);
- ++vma->vm_mm->rss;
- ++tsk->maj_flt;
- flush_page_to_ram(pte_page(page));
- set_pte(page_table, page);
- }
+ struct page *page = lookup_swap_cache(entry);
+ pte_t pte;
+
+ if (!page) {
+ lock_kernel();
+ swapin_readahead(entry);
+ page = read_swap_cache(entry);
+ unlock_kernel();
+ if (!page)
+ return -1;
+
+ flush_page_to_ram(page_address(page));
+ }
+
+ vma->vm_mm->rss++;
+ tsk->min_flt++;
+ swap_free(entry);
+
+ pte = mk_pte(page_address(page), vma->vm_page_prot);
+
+ if (write_access && !is_page_shared(page)) {
+ delete_from_swap_cache(page);
+ pte = pte_mkwrite(pte_mkdirty(pte));
}
- unlock_kernel();
+ set_pte(page_table, pte);
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, address, pte);
return 1;
}
@@ -798,7 +856,7 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v
if (write_access) {
unsigned long page = __get_free_page(GFP_USER);
if (!page)
- return 0;
+ return -1;
clear_page(page);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
vma->vm_mm->rss++;
@@ -806,6 +864,8 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v
flush_page_to_ram(page);
}
set_pte(page_table, entry);
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, addr, entry);
return 1;
}
@@ -827,23 +887,17 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long page;
pte_t entry;
- if (!vma->vm_ops || !vma->vm_ops->nopage) {
- unlock_kernel();
- return do_anonymous_page(tsk, vma, page_table, write_access,
- address);
- }
+ if (!vma->vm_ops || !vma->vm_ops->nopage)
+ return do_anonymous_page(tsk, vma, page_table, write_access, address);
/*
* The third argument is "no_share", which tells the low-level code
* to copy, not share the page even if sharing is possible. It's
* essentially an early COW detection.
*/
- page = vma->vm_ops->nopage(vma, address & PAGE_MASK,
- (vma->vm_flags & VM_SHARED)?0:write_access);
-
- unlock_kernel();
+ page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
if (!page)
- return 0;
+ return 0; /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
++tsk->maj_flt;
++vma->vm_mm->rss;
@@ -866,6 +920,7 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
entry = pte_wrprotect(entry);
set_pte(page_table, entry);
/* no need to invalidate: a not-present page shouldn't be cached */
+ update_mmu_cache(vma, address, entry);
return 1;
}
@@ -877,6 +932,15 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
* There is also a hook called "update_mmu_cache()" that architectures
* with external mmu caches can use to update those (ie the Sparc or
* PowerPC hashed page tables that act as extended TLBs).
+ *
+ * Note the "page_table_lock". It is to protect against kswapd removing
+ * pages from under us. Note that kswapd only ever _removes_ pages, never
+ * adds them. As such, once we have noticed that the page is not present,
+ * we can drop the lock early.
+ *
+ * The adding of pages is protected by the MM semaphore (which we hold),
+ * so we don't need to worry about a page being suddenly been added into
+ * our VM.
*/
static inline int handle_pte_fault(struct task_struct *tsk,
struct vm_area_struct * vma, unsigned long address,
@@ -884,27 +948,32 @@ static inline int handle_pte_fault(struct task_struct *tsk,
{
pte_t entry;
- lock_kernel();
entry = *pte;
-
if (!pte_present(entry)) {
if (pte_none(entry))
return do_no_page(tsk, vma, address, write_access, pte);
- return do_swap_page(tsk, vma, address, pte, entry, write_access);
+ return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
}
- entry = pte_mkyoung(entry);
- set_pte(pte, entry);
- flush_tlb_page(vma, address);
- if (write_access) {
- if (!pte_write(entry))
- return do_wp_page(tsk, vma, address, pte, entry);
+ /*
+ * Ok, the entry was present, we need to get the page table
+ * lock to synchronize with kswapd, and verify that the entry
+ * didn't change from under us..
+ */
+ spin_lock(&tsk->mm->page_table_lock);
+ if (pte_val(entry) == pte_val(*pte)) {
+ if (write_access) {
+ if (!pte_write(entry))
+ return do_wp_page(tsk, vma, address, pte, entry);
- entry = pte_mkdirty(entry);
+ entry = pte_mkdirty(entry);
+ }
+ entry = pte_mkyoung(entry);
set_pte(pte, entry);
flush_tlb_page(vma, address);
+ update_mmu_cache(vma, address, entry);
}
- unlock_kernel();
+ spin_unlock(&tsk->mm->page_table_lock);
return 1;
}
@@ -921,28 +990,27 @@ int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
pmd = pmd_alloc(pgd, address);
if (pmd) {
pte_t * pte = pte_alloc(pmd, address);
- if (pte) {
- if (handle_pte_fault(tsk, vma, address, write_access, pte)) {
- update_mmu_cache(vma, address, *pte);
- return 1;
- }
- }
+ if (pte)
+ return handle_pte_fault(tsk, vma, address, write_access, pte);
}
- return 0;
+ return -1;
}
/*
* Simplistic page force-in..
*/
-void make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end)
{
int write;
+ struct task_struct *tsk = current;
struct vm_area_struct * vma;
- vma = find_vma(current->mm, addr);
+ vma = find_vma(tsk->mm, addr);
write = (vma->vm_flags & VM_WRITE) != 0;
while (addr < end) {
- handle_mm_fault(current, vma, addr, write);
+ if (handle_mm_fault(tsk, vma, addr, write) < 0)
+ return -1;
addr += PAGE_SIZE;
}
+ return 0;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 7947031af..d6b19cfb1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -31,7 +31,7 @@ static inline int mlock_fixup_start(struct vm_area_struct * vma,
vma->vm_offset += vma->vm_start - n->vm_start;
n->vm_flags = newflags;
if (n->vm_file)
- atomic_inc(&n->vm_file->f_count);
+ get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
@@ -52,7 +52,7 @@ static inline int mlock_fixup_end(struct vm_area_struct * vma,
n->vm_offset += n->vm_start - vma->vm_start;
n->vm_flags = newflags;
if (n->vm_file)
- atomic_inc(&n->vm_file->f_count);
+ get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
@@ -179,7 +179,6 @@ asmlinkage int sys_mlock(unsigned long start, size_t len)
int error = -ENOMEM;
down(&current->mm->mmap_sem);
- lock_kernel();
len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK;
start &= PAGE_MASK;
@@ -200,7 +199,6 @@ asmlinkage int sys_mlock(unsigned long start, size_t len)
error = do_mlock(start, len, 1);
out:
- unlock_kernel();
up(&current->mm->mmap_sem);
return error;
}
@@ -210,11 +208,9 @@ asmlinkage int sys_munlock(unsigned long start, size_t len)
int ret;
down(&current->mm->mmap_sem);
- lock_kernel();
len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK;
start &= PAGE_MASK;
ret = do_mlock(start, len, 0);
- unlock_kernel();
up(&current->mm->mmap_sem);
return ret;
}
@@ -254,7 +250,6 @@ asmlinkage int sys_mlockall(int flags)
int ret = -EINVAL;
down(&current->mm->mmap_sem);
- lock_kernel();
if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
goto out;
@@ -272,7 +267,6 @@ asmlinkage int sys_mlockall(int flags)
ret = do_mlockall(flags);
out:
- unlock_kernel();
up(&current->mm->mmap_sem);
return ret;
}
@@ -282,9 +276,7 @@ asmlinkage int sys_munlockall(void)
int ret;
down(&current->mm->mmap_sem);
- lock_kernel();
ret = do_mlockall(0);
- unlock_kernel();
up(&current->mm->mmap_sem);
return ret;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index c9d07a291..61826cfa2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -77,10 +77,12 @@ static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
if (file) {
if (vma->vm_flags & VM_DENYWRITE)
- file->f_dentry->d_inode->i_writecount++;
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
+ spin_lock(&file->f_dentry->d_inode->i_shared_lock);
if(vma->vm_next_share)
vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
*vma->vm_pprev_share = vma->vm_next_share;
+ spin_unlock(&file->f_dentry->d_inode->i_shared_lock);
}
}
@@ -294,7 +296,7 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
if (file) {
int correct_wcount = 0;
if (vma->vm_flags & VM_DENYWRITE) {
- if (file->f_dentry->d_inode->i_writecount > 0) {
+ if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
error = -ETXTBSY;
goto free_vma;
}
@@ -303,17 +305,17 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
* might). In any case, this takes care of any
* race that this might cause.
*/
- file->f_dentry->d_inode->i_writecount--;
+ atomic_dec(&file->f_dentry->d_inode->i_writecount);
correct_wcount = 1;
}
error = file->f_op->mmap(file, vma);
/* Fix up the count if necessary, then check for an error */
if (correct_wcount)
- file->f_dentry->d_inode->i_writecount++;
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
if (error)
goto unmap_and_free_vma;
vma->vm_file = file;
- atomic_inc(&file->f_count);
+ get_file(file);
}
/*
@@ -547,7 +549,7 @@ static struct vm_area_struct * unmap_fixup(struct vm_area_struct *area,
mpnt->vm_file = area->vm_file;
mpnt->vm_pte = area->vm_pte;
if (mpnt->vm_file)
- atomic_inc(&mpnt->vm_file->f_count);
+ get_file(mpnt->vm_file);
if (mpnt->vm_ops && mpnt->vm_ops->open)
mpnt->vm_ops->open(mpnt);
area->vm_end = addr; /* Truncate area */
@@ -678,9 +680,9 @@ int do_munmap(unsigned long addr, size_t len)
size = end - st;
lock_kernel();
-
if (mpnt->vm_ops && mpnt->vm_ops->unmap)
mpnt->vm_ops->unmap(mpnt, st, size);
+ unlock_kernel();
remove_shared_vm_struct(mpnt);
mm->map_count--;
@@ -693,8 +695,6 @@ int do_munmap(unsigned long addr, size_t len)
* Fix the mapping, and free the old area if it wasn't reused.
*/
extra = unmap_fixup(mpnt, st, size, extra);
-
- unlock_kernel();
}
/* Release the extra vma struct if it wasn't used */
@@ -787,10 +787,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
flags = vma->vm_flags;
addr = vma->vm_start;
- lock_kernel(); /* kswapd, ugh */
insert_vm_struct(mm, vma);
merge_segments(mm, vma->vm_start, vma->vm_end);
- unlock_kernel();
mm->total_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED) {
@@ -878,13 +876,15 @@ void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
if (file) {
struct inode * inode = file->f_dentry->d_inode;
if (vmp->vm_flags & VM_DENYWRITE)
- inode->i_writecount--;
+ atomic_dec(&inode->i_writecount);
/* insert vmp into inode's share list */
+ spin_lock(&inode->i_shared_lock);
if((vmp->vm_next_share = inode->i_mmap) != NULL)
inode->i_mmap->vm_pprev_share = &vmp->vm_next_share;
inode->i_mmap = vmp;
vmp->vm_pprev_share = &inode->i_mmap;
+ spin_unlock(&inode->i_shared_lock);
}
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 14073c0fa..b1504af83 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -103,7 +103,7 @@ static inline int mprotect_fixup_start(struct vm_area_struct * vma,
n->vm_flags = newflags;
n->vm_page_prot = prot;
if (n->vm_file)
- atomic_inc(&n->vm_file->f_count);
+ get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
@@ -126,7 +126,7 @@ static inline int mprotect_fixup_end(struct vm_area_struct * vma,
n->vm_flags = newflags;
n->vm_page_prot = prot;
if (n->vm_file)
- atomic_inc(&n->vm_file->f_count);
+ get_file(n->vm_file);
if (n->vm_ops && n->vm_ops->open)
n->vm_ops->open(n);
insert_vm_struct(current->mm, n);
@@ -212,7 +212,6 @@ asmlinkage int sys_mprotect(unsigned long start, size_t len, unsigned long prot)
return 0;
down(&current->mm->mmap_sem);
- lock_kernel();
vma = find_vma(current->mm, start);
error = -EFAULT;
@@ -249,7 +248,6 @@ asmlinkage int sys_mprotect(unsigned long start, size_t len, unsigned long prot)
}
merge_segments(current->mm, start, end);
out:
- unlock_kernel();
up(&current->mm->mmap_sem);
return error;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 48d3e9f94..2852f9b06 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -57,11 +57,13 @@ static inline pte_t *alloc_one_pte(struct mm_struct *mm, unsigned long addr)
return pte;
}
-static inline int copy_one_pte(pte_t * src, pte_t * dst)
+static inline int copy_one_pte(struct mm_struct *mm, pte_t * src, pte_t * dst)
{
int error = 0;
- pte_t pte = *src;
+ pte_t pte;
+ spin_lock(&mm->page_table_lock);
+ pte = *src;
if (!pte_none(pte)) {
error++;
if (dst) {
@@ -70,6 +72,7 @@ static inline int copy_one_pte(pte_t * src, pte_t * dst)
error--;
}
}
+ spin_unlock(&mm->page_table_lock);
return error;
}
@@ -80,7 +83,7 @@ static int move_one_page(struct mm_struct *mm, unsigned long old_addr, unsigned
src = get_one_pte(mm, old_addr);
if (src)
- error = copy_one_pte(src, alloc_one_pte(mm, new_addr));
+ error = copy_one_pte(mm, src, alloc_one_pte(mm, new_addr));
return error;
}
@@ -134,14 +137,12 @@ static inline unsigned long move_vma(struct vm_area_struct * vma,
new_vma->vm_start = new_addr;
new_vma->vm_end = new_addr+new_len;
new_vma->vm_offset = vma->vm_offset + (addr - vma->vm_start);
- lock_kernel();
if (new_vma->vm_file)
- atomic_inc(&new_vma->vm_file->f_count);
+ get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
insert_vm_struct(current->mm, new_vma);
merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end);
- unlock_kernel();
do_munmap(addr, old_len);
current->mm->total_vm += new_len >> PAGE_SHIFT;
if (new_vma->vm_flags & VM_LOCKED) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f30a049e..22ce7ac00 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -345,90 +345,3 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m
}
return start_mem;
}
-
-/*
- * Primitive swap readahead code. We simply read an aligned block of
- * (1 << page_cluster) entries in the swap area. This method is chosen
- * because it doesn't cost us any seek time. We also make sure to queue
- * the 'original' request together with the readahead ones...
- */
-void swapin_readahead(unsigned long entry)
-{
- int i;
- struct page *new_page;
- unsigned long offset = SWP_OFFSET(entry);
- struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
-
- offset = (offset >> page_cluster) << page_cluster;
-
- i = 1 << page_cluster;
- do {
- /* Don't read-ahead past the end of the swap area */
- if (offset >= swapdev->max)
- break;
- /* Don't block on I/O for read-ahead */
- if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster)
- break;
- /* Don't read in bad or busy pages */
- if (!swapdev->swap_map[offset])
- break;
- if (swapdev->swap_map[offset] == SWAP_MAP_BAD)
- break;
-
- /* Ok, do the async read-ahead now */
- new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
- if (new_page != NULL)
- __free_page(new_page);
- offset++;
- } while (--i);
- return;
-}
-
-/*
- * The tests may look silly, but it essentially makes sure that
- * no other process did a swap-in on us just as we were waiting.
- *
- * Also, don't bother to add to the swap cache if this page-in
- * was due to a write access.
- */
-void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
- pte_t * page_table, unsigned long entry, int write_access)
-{
- unsigned long page;
- struct page *page_map = lookup_swap_cache(entry);
-
- if (!page_map) {
- swapin_readahead(entry);
- page_map = read_swap_cache(entry);
- }
- if (pte_val(*page_table) != entry) {
- if (page_map)
- free_page_and_swap_cache(page_address(page_map));
- return;
- }
- if (!page_map) {
- set_pte(page_table, BAD_PAGE);
- swap_free(entry);
- oom(tsk);
- return;
- }
-
- page = page_address(page_map);
- vma->vm_mm->rss++;
- tsk->min_flt++;
- swap_free(entry);
-
- if (!write_access || is_page_shared(page_map)) {
- set_pte(page_table, mk_pte(page, vma->vm_page_prot));
- return;
- }
-
- /*
- * The page is unshared and we're going to dirty it - so tear
- * down the swap cache and give exclusive access to the page to
- * this process.
- */
- delete_from_swap_cache(page_map);
- set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
- return;
-}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4cccaf171..1ae052b94 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -45,7 +45,11 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
page_addr = pte_page(pte);
if (MAP_NR(page_addr) >= max_mapnr)
goto out_failed;
+
page = mem_map + MAP_NR(page_addr);
+ spin_lock(&tsk->mm->page_table_lock);
+ if (pte_val(pte) != pte_val(*page_table))
+ goto out_failed_unlock;
/*
* Dont be too eager to get aging right if
@@ -58,13 +62,13 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
*/
set_pte(page_table, pte_mkold(pte));
set_bit(PG_referenced, &page->flags);
- goto out_failed;
+ goto out_failed_unlock;
}
if (PageReserved(page)
|| PageLocked(page)
|| ((gfp_mask & __GFP_DMA) && !PageDMA(page)))
- goto out_failed;
+ goto out_failed_unlock;
/*
* Is the page already in the swap cache? If so, then
@@ -82,7 +86,7 @@ drop_pte:
vma->vm_mm->rss--;
flush_tlb_page(vma, address);
__free_page(page);
- goto out_failed;
+ goto out_failed_unlock;
}
/*
@@ -109,7 +113,7 @@ drop_pte:
* locks etc.
*/
if (!(gfp_mask & __GFP_IO))
- goto out_failed;
+ goto out_failed_unlock;
/*
* Ok, it's really dirty. That means that
@@ -134,6 +138,7 @@ drop_pte:
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
pte_clear(page_table);
+ spin_unlock(&tsk->mm->page_table_lock);
flush_tlb_page(vma, address);
vma->vm_mm->rss--;
@@ -155,6 +160,8 @@ drop_pte:
vma->vm_mm->rss--;
tsk->nswap++;
set_pte(page_table, __pte(entry));
+ spin_unlock(&tsk->mm->page_table_lock);
+
flush_tlb_page(vma, address);
swap_duplicate(entry); /* One for the process, one for the swap cache */
@@ -167,6 +174,8 @@ drop_pte:
out_free_success:
__free_page(page);
return 1;
+out_failed_unlock:
+ spin_unlock(&tsk->mm->page_table_lock);
out_failed:
return 0;
}