summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile10
-rw-r--r--mm/filemap.c317
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mlock.c6
-rw-r--r--mm/mmap.c140
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c7
-rw-r--r--mm/shmem.c873
-rw-r--r--mm/swap_state.c10
-rw-r--r--mm/swapfile.c6
-rw-r--r--mm/vmscan.c81
11 files changed, 1138 insertions, 331 deletions
diff --git a/mm/Makefile b/mm/Makefile
index d74cdec48..63d2d4b4f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -8,12 +8,12 @@
# Note 2! The CFLAGS definition is now in the main makefile...
O_TARGET := mm.o
-O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+
+obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
- page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o
+ page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
+ shmem.o
-ifeq ($(CONFIG_HIGHMEM),y)
-O_OBJS += highmem.o
-endif
+obj-$(CONFIG_HIGHMEM) += highmem.o
include $(TOPDIR)/Rules.make
diff --git a/mm/filemap.c b/mm/filemap.c
index ec8ff8ac7..69fe40466 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -55,35 +55,48 @@ spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED;
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
-void __add_page_to_hash_queue(struct page * page, struct page **p)
+static void add_page_to_hash_queue(struct page * page, struct page **p)
{
- atomic_inc(&page_cache_size);
- if((page->next_hash = *p) != NULL)
- (*p)->pprev_hash = &page->next_hash;
+ struct page *next = *p;
+
*p = page;
+ page->next_hash = next;
page->pprev_hash = p;
+ if (next)
+ next->pprev_hash = &page->next_hash;
if (page->buffers)
PAGE_BUG(page);
+ atomic_inc(&page_cache_size);
}
-static inline void remove_page_from_hash_queue(struct page * page)
+static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page)
{
- if(page->pprev_hash) {
- if(page->next_hash)
- page->next_hash->pprev_hash = page->pprev_hash;
- *page->pprev_hash = page->next_hash;
- page->pprev_hash = NULL;
- }
- atomic_dec(&page_cache_size);
+ struct list_head *head = &mapping->clean_pages;
+
+ mapping->nrpages++;
+ list_add(&page->list, head);
+ page->mapping = mapping;
}
-static inline int sync_page(struct page *page)
+static inline void remove_page_from_inode_queue(struct page * page)
{
- struct address_space *mapping = page->mapping;
+ struct address_space * mapping = page->mapping;
- if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
- return mapping->a_ops->sync_page(page);
- return 0;
+ mapping->nrpages--;
+ list_del(&page->list);
+ page->mapping = NULL;
+}
+
+static inline void remove_page_from_hash_queue(struct page * page)
+{
+ struct page *next = page->next_hash;
+ struct page **pprev = page->pprev_hash;
+
+ if (next)
+ next->pprev_hash = pprev;
+ *pprev = next;
+ page->pprev_hash = NULL;
+ atomic_dec(&page_cache_size);
}
/*
@@ -93,6 +106,7 @@ static inline int sync_page(struct page *page)
*/
void __remove_inode_page(struct page *page)
{
+ if (PageDirty(page)) BUG();
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
page->mapping = NULL;
@@ -108,6 +122,30 @@ void remove_inode_page(struct page *page)
spin_unlock(&pagecache_lock);
}
+static inline int sync_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+
+ if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+ return mapping->a_ops->sync_page(page);
+ return 0;
+}
+
+/*
+ * Add a page to the dirty page list.
+ */
+void __set_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+
+ spin_lock(&pagecache_lock);
+ list_del(&page->list);
+ list_add(&page->list, &mapping->dirty_pages);
+ spin_unlock(&pagecache_lock);
+
+ mark_inode_dirty_pages(mapping->host);
+}
+
/**
* invalidate_inode_pages - Invalidate all the unlocked pages of one inode
* @inode: the inode which pages we want to invalidate
@@ -121,7 +159,7 @@ void invalidate_inode_pages(struct inode * inode)
struct list_head *head, *curr;
struct page * page;
- head = &inode->i_mapping->pages;
+ head = &inode->i_mapping->clean_pages;
spin_lock(&pagecache_lock);
spin_lock(&pagemap_lru_lock);
@@ -131,15 +169,17 @@ void invalidate_inode_pages(struct inode * inode)
page = list_entry(curr, struct page, list);
curr = curr->next;
- /* We cannot invalidate a locked page */
- if (TryLockPage(page))
+ /* We cannot invalidate something in use.. */
+ if (page_count(page) != 1)
continue;
- /* Neither can we invalidate something in use.. */
- if (page_count(page) != 1) {
- UnlockPage(page);
+ /* ..or dirty.. */
+ if (PageDirty(page))
+ continue;
+
+ /* ..or locked */
+ if (TryLockPage(page))
continue;
- }
__lru_cache_del(page);
__remove_inode_page(page);
@@ -179,26 +219,12 @@ static inline void truncate_complete_page(struct page *page)
page_cache_release(page);
}
-/**
- * truncate_inode_pages - truncate *all* the pages from an offset
- * @mapping: mapping to truncate
- * @lstart: offset from with to truncate
- *
- * Truncate the page cache at a set offset, removing the pages
- * that are beyond that offset (and zeroing out partial pages).
- * If any page is locked we wait for it to become unlocked.
- */
-void truncate_inode_pages(struct address_space * mapping, loff_t lstart)
+void truncate_list_pages(struct list_head *head, unsigned long start, unsigned partial)
{
- struct list_head *head, *curr;
+ struct list_head *curr;
struct page * page;
- unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
- unsigned long start;
-
- start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
repeat:
- head = &mapping->pages;
spin_lock(&pagecache_lock);
curr = head->next;
while (curr != head) {
@@ -242,6 +268,26 @@ repeat:
spin_unlock(&pagecache_lock);
}
+
+/**
+ * truncate_inode_pages - truncate *all* the pages from an offset
+ * @mapping: mapping to truncate
+ * @lstart: offset from with to truncate
+ *
+ * Truncate the page cache at a set offset, removing the pages
+ * that are beyond that offset (and zeroing out partial pages).
+ * If any page is locked we wait for it to become unlocked.
+ */
+void truncate_inode_pages(struct address_space * mapping, loff_t lstart)
+{
+ unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+
+ truncate_list_pages(&mapping->clean_pages, start, partial);
+ truncate_list_pages(&mapping->dirty_pages, start, partial);
+ truncate_list_pages(&mapping->locked_pages, start, partial);
+}
+
static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page)
{
goto inside;
@@ -303,14 +349,12 @@ static int waitfor_one_page(struct page *page)
return error;
}
-static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *))
+static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *))
{
- struct list_head *head, *curr;
+ struct list_head *curr;
struct page *page;
int retval = 0;
- head = &inode->i_mapping->pages;
-
spin_lock(&pagecache_lock);
curr = head->next;
while (curr != head) {
@@ -349,11 +393,89 @@ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsig
{
int retval;
- retval = do_buffer_fdatasync(inode, start_idx, end_idx, writeout_one_page);
- retval |= do_buffer_fdatasync(inode, start_idx, end_idx, waitfor_one_page);
+ /* writeout dirty buffers on pages from both clean and dirty lists */
+ retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
+ retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page);
+ retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page);
+
+ /* now wait for locked buffers on pages from both clean and dirty lists */
+ retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page);
+ retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page);
+ retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page);
+
return retval;
}
+/**
+ * filemap_fdatasync - walk the list of dirty pages of the given address space
+ * and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ *
+ */
+void filemap_fdatasync(struct address_space * mapping)
+{
+ int (*writepage)(struct page *) = mapping->a_ops->writepage;
+
+ spin_lock(&pagecache_lock);
+
+ while (!list_empty(&mapping->dirty_pages)) {
+ struct page *page = list_entry(mapping->dirty_pages.next, struct page, list);
+
+ list_del(&page->list);
+ list_add(&page->list, &mapping->locked_pages);
+
+ if (!PageDirty(page))
+ continue;
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+
+ lock_page(page);
+
+ if (PageDirty(page)) {
+ ClearPageDirty(page);
+ writepage(page);
+ } else
+ UnlockPage(page);
+
+ page_cache_release(page);
+ spin_lock(&pagecache_lock);
+ }
+ spin_unlock(&pagecache_lock);
+}
+
+/**
+ * filemap_fdatawait - walk the list of locked pages of the given address space
+ * and wait for all of them.
+ *
+ * @mapping: address space structure to wait for
+ *
+ */
+void filemap_fdatawait(struct address_space * mapping)
+{
+ spin_lock(&pagecache_lock);
+
+ while (!list_empty(&mapping->locked_pages)) {
+ struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
+
+ list_del(&page->list);
+ list_add(&page->list, &mapping->clean_pages);
+
+ if (!PageLocked(page))
+ continue;
+
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+
+ ___wait_on_page(page);
+
+ page_cache_release(page);
+ spin_lock(&pagecache_lock);
+ }
+ spin_unlock(&pagecache_lock);
+}
+
/*
* Add a page to the inode page cache.
*
@@ -369,7 +491,7 @@ void add_to_page_cache_locked(struct page * page, struct address_space *mapping,
spin_lock(&pagecache_lock);
page->index = index;
add_page_to_inode_queue(mapping, page);
- __add_page_to_hash_queue(page, page_hash(mapping, index));
+ add_page_to_hash_queue(page, page_hash(mapping, index));
lru_cache_add(page);
spin_unlock(&pagecache_lock);
}
@@ -392,7 +514,7 @@ static inline void __add_to_page_cache(struct page * page,
page_cache_get(page);
page->index = offset;
add_page_to_inode_queue(mapping, page);
- __add_page_to_hash_queue(page, hash);
+ add_page_to_hash_queue(page, hash);
lru_cache_add(page);
}
@@ -542,8 +664,8 @@ void lock_page(struct page *page)
* a rather lightweight function, finding and getting a reference to a
* hashed page atomically, waiting for it if it's locked.
*/
-static struct page * __find_get_page(struct address_space *mapping,
- unsigned long offset, struct page **hash)
+struct page * __find_get_page(struct address_space *mapping,
+ unsigned long offset, struct page **hash)
{
struct page *page;
@@ -1460,72 +1582,19 @@ page_not_uptodate:
return NULL;
}
-/*
- * If a task terminates while we're swapping the page, the vma and
- * and file could be released: try_to_swap_out has done a get_file.
- * vma/file is guaranteed to exist in the unmap/sync cases because
- * mmap_sem is held.
- *
- * The "mapping" test takes care of somebody having truncated the
- * page and thus made this write-page a no-op..
- */
-static int filemap_write_page(struct page * page, int wait)
-{
- struct address_space * mapping = page->mapping;
- int error = 0;
-
- if (mapping && mapping->a_ops->writepage) {
- ClearPageDirty(page);
- error = mapping->a_ops->writepage(page);
- }
- return error;
-}
-
-
-/*
- * The page cache takes care of races between somebody
- * trying to swap something out and swap something in
- * at the same time..
- */
-extern void wakeup_bdflush(int);
-int filemap_swapout(struct page * page, struct file *file)
-{
- SetPageDirty(page);
- return 0;
-}
-
/* Called with mm->page_table_lock held to protect against other
* threads/the swapper from ripping pte's out from under us.
*/
static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
- pte_t pte;
- struct page *page;
- int error;
-
- pte = *ptep;
+ pte_t pte = *ptep;
- if (!pte_present(pte))
- goto out;
- if (!ptep_test_and_clear_dirty(ptep))
- goto out;
-
- flush_page_to_ram(pte_page(pte));
- flush_cache_page(vma, address);
- flush_tlb_page(vma, address);
- page = pte_page(pte);
- page_cache_get(page);
- spin_unlock(&vma->vm_mm->page_table_lock);
-
- lock_page(page);
- error = filemap_write_page(page, 1);
- page_cache_free(page);
-
- spin_lock(&vma->vm_mm->page_table_lock);
- return error;
-
-out:
+ if (pte_present(pte) && ptep_test_and_clear_dirty(ptep)) {
+ struct page *page = pte_page(pte);
+ flush_tlb_page(vma, address);
+ set_page_dirty(page);
+ }
return 0;
}
@@ -1623,9 +1692,7 @@ int filemap_sync(struct vm_area_struct * vma, unsigned long address,
* backing-store for swapping..
*/
static struct vm_operations_struct file_shared_mmap = {
- sync: filemap_sync,
nopage: filemap_nopage,
- swapout: filemap_swapout,
};
/*
@@ -1667,16 +1734,19 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
static int msync_interval(struct vm_area_struct * vma,
unsigned long start, unsigned long end, int flags)
{
- if (vma->vm_file && vma->vm_ops && vma->vm_ops->sync) {
+ struct file * file = vma->vm_file;
+ if (file && (vma->vm_flags & VM_SHARED)) {
int error;
- error = vma->vm_ops->sync(vma, start, end-start, flags);
+ error = filemap_sync(vma, start, end-start, flags);
+
if (!error && (flags & MS_SYNC)) {
- struct file * file = vma->vm_file;
- if (file && file->f_op && file->f_op->fsync) {
- down(&file->f_dentry->d_inode->i_sem);
+ struct inode * inode = file->f_dentry->d_inode;
+ down(&inode->i_sem);
+ filemap_fdatasync(inode->i_mapping);
+ if (file->f_op && file->f_op->fsync)
error = file->f_op->fsync(file, file->f_dentry, 1);
- up(&file->f_dentry->d_inode->i_sem);
- }
+ filemap_fdatawait(inode->i_mapping);
+ up(&inode->i_sem);
}
return error;
}
@@ -2439,6 +2509,17 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
if (bytes > count)
bytes = count;
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ */
+ { volatile unsigned char dummy;
+ __get_user(dummy, buf);
+ __get_user(dummy, buf+bytes-1);
+ }
+
status = -ENOMEM; /* we'll assign it later anyway */
page = __grab_cache_page(mapping, index, &cached_page);
if (!page)
diff --git a/mm/memory.c b/mm/memory.c
index 13dad21a0..f4bb0141f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -259,22 +259,22 @@ nomem:
/*
* Return indicates whether a page was freed so caller can adjust rss
*/
-static inline int free_pte(pte_t page)
+static inline int free_pte(pte_t pte)
{
- if (pte_present(page)) {
- struct page *ptpage = pte_page(page);
- if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
+ if (pte_present(pte)) {
+ struct page *page = pte_page(pte);
+ if ((!VALID_PAGE(page)) || PageReserved(page))
return 0;
/*
* free_page() used to be able to clear swap cache
* entries. We may now have to do it manually.
*/
- if (pte_dirty(page))
- SetPageDirty(ptpage);
- free_page_and_swap_cache(ptpage);
+ if (pte_dirty(pte) && page->mapping)
+ set_page_dirty(page);
+ free_page_and_swap_cache(page);
return 1;
}
- swap_free(pte_to_swp_entry(page));
+ swap_free(pte_to_swp_entry(pte));
return 0;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 551d61d39..16e9f947b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -189,9 +189,6 @@ static int do_mlock(unsigned long start, size_t len, int on)
break;
}
}
- spin_lock(&current->mm->page_table_lock);
- merge_segments(current->mm, start, end);
- spin_unlock(&current->mm->page_table_lock);
return error;
}
@@ -263,9 +260,6 @@ static int do_mlockall(int flags)
if (error)
break;
}
- spin_lock(&current->mm->page_table_lock);
- merge_segments(current->mm, 0, TASK_SIZE);
- spin_unlock(&current->mm->page_table_lock);
return error;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 648cc5208..e5b3a989e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -333,32 +333,22 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon
if (error)
goto unmap_and_free_vma;
} else if (flags & MAP_SHARED) {
- error = map_zero_setup(vma);
+ error = shmem_zero_setup(vma);
if (error)
goto free_vma;
}
- /*
- * merge_segments may merge our vma, so we can't refer to it
- * after the call. Save the values we need now ...
- */
- flags = vma->vm_flags;
-
/* Can addr have changed??
*
* Answer: Yes, several device drivers can do it in their
* f_op->mmap method. -DaveM
*/
+ flags = vma->vm_flags;
addr = vma->vm_start;
- lock_vma_mappings(vma);
- spin_lock(&mm->page_table_lock);
- __insert_vm_struct(mm, vma);
- unlock_vma_mappings(vma);
+ insert_vm_struct(mm, vma);
if (correct_wcount)
atomic_inc(&file->f_dentry->d_inode->i_writecount);
- merge_segments(mm, vma->vm_start, vma->vm_end);
- spin_unlock(&mm->page_table_lock);
mm->total_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED) {
@@ -742,9 +732,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
end = end > mpnt->vm_end ? mpnt->vm_end : end;
size = end - st;
- if (mpnt->vm_ops && mpnt->vm_ops->unmap)
- mpnt->vm_ops->unmap(mpnt, st, size);
-
if (mpnt->vm_flags & VM_DENYWRITE &&
(st != mpnt->vm_start || end != mpnt->vm_end) &&
(file = mpnt->vm_file) != NULL) {
@@ -828,6 +815,23 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
if (!vm_enough_memory(len >> PAGE_SHIFT))
return -ENOMEM;
+ flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
+
+ flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+
+ /* Can we just expand an old anonymous mapping? */
+ if (addr) {
+ struct vm_area_struct * vma = find_vma(mm, addr-1);
+ if (vma && vma->vm_end == addr && !vma->vm_file &&
+ vma->vm_flags == flags) {
+ vma->vm_end = addr + len;
+ goto out;
+ }
+ }
+
+
/*
* create a vma struct for an anonymous mapping
*/
@@ -838,30 +842,16 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
- vma->vm_flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
- MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
-
- vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
- vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
+ vma->vm_flags = flags;
+ vma->vm_page_prot = protection_map[flags & 0x0f];
vma->vm_ops = NULL;
vma->vm_pgoff = 0;
vma->vm_file = NULL;
vma->vm_private_data = NULL;
- /*
- * merge_segments may merge our vma, so we can't refer to it
- * after the call. Save the values we need now ...
- */
- flags = vma->vm_flags;
- addr = vma->vm_start;
+ insert_vm_struct(mm, vma);
- lock_vma_mappings(vma);
- spin_lock(&mm->page_table_lock);
- __insert_vm_struct(mm, vma);
- unlock_vma_mappings(vma);
- merge_segments(mm, vma->vm_start, vma->vm_end);
- spin_unlock(&mm->page_table_lock);
-
+out:
mm->total_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED) {
mm->locked_vm += len >> PAGE_SHIFT;
@@ -900,8 +890,6 @@ void exit_mmap(struct mm_struct * mm)
unsigned long size = end - start;
if (mpnt->vm_ops) {
- if (mpnt->vm_ops->unmap)
- mpnt->vm_ops->unmap(mpnt, start, size);
if (mpnt->vm_ops->close)
mpnt->vm_ops->close(mpnt);
}
@@ -973,84 +961,8 @@ void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
{
lock_vma_mappings(vmp);
+ spin_lock(&current->mm->page_table_lock);
__insert_vm_struct(mm, vmp);
+ spin_unlock(&current->mm->page_table_lock);
unlock_vma_mappings(vmp);
}
-
-/* Merge the list of memory segments if possible.
- * Redundant vm_area_structs are freed.
- * This assumes that the list is ordered by address.
- * We don't need to traverse the entire list, only those segments
- * which intersect or are adjacent to a given interval.
- *
- * We must already hold the mm semaphore when we get here..
- */
-void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
-{
- struct vm_area_struct *prev, *mpnt, *next, *prev1;
-
- mpnt = find_vma_prev(mm, start_addr, &prev1);
- if (!mpnt)
- return;
-
- if (prev1) {
- prev = prev1;
- } else {
- prev = mpnt;
- mpnt = mpnt->vm_next;
- }
- mm->mmap_cache = NULL; /* Kill the cache. */
-
- /* prev and mpnt cycle through the list, as long as
- * start_addr < mpnt->vm_end && prev->vm_start < end_addr
- */
- for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) {
- next = mpnt->vm_next;
-
- /* To share, we must have the same file, operations.. */
- if ((mpnt->vm_file != prev->vm_file)||
- (mpnt->vm_private_data != prev->vm_private_data) ||
- (mpnt->vm_ops != prev->vm_ops) ||
- (mpnt->vm_flags != prev->vm_flags) ||
- (prev->vm_end != mpnt->vm_start))
- continue;
-
- /*
- * If we have a file or it's a shared memory area
- * the offsets must be contiguous..
- */
- if ((mpnt->vm_file != NULL) || (mpnt->vm_flags & VM_SHM)) {
- unsigned long off = prev->vm_pgoff;
- off += (prev->vm_end - prev->vm_start) >> PAGE_SHIFT;
- if (off != mpnt->vm_pgoff)
- continue;
- }
-
- /* merge prev with mpnt and set up pointers so the new
- * big segment can possibly merge with the next one.
- * The old unused mpnt is freed.
- */
- if (mm->mmap_avl)
- avl_remove(mpnt, &mm->mmap_avl);
- prev->vm_end = mpnt->vm_end;
- prev->vm_next = mpnt->vm_next;
- mm->map_count--;
- if (mpnt->vm_ops && mpnt->vm_ops->close) {
- mpnt->vm_pgoff += (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
- mpnt->vm_start = mpnt->vm_end;
- spin_unlock(&mm->page_table_lock);
- mpnt->vm_ops->close(mpnt);
- } else
- spin_unlock(&mm->page_table_lock);
-
- lock_vma_mappings(mpnt);
- __remove_shared_vm_struct(mpnt);
- unlock_vma_mappings(mpnt);
- if (mpnt->vm_file)
- fput(mpnt->vm_file);
- kmem_cache_free(vm_area_cachep, mpnt);
- mpnt = prev;
-
- spin_lock(&mm->page_table_lock);
- }
-}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e47987f1e..91905c8b1 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -277,9 +277,6 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot
break;
}
}
- spin_lock(&current->mm->page_table_lock);
- merge_segments(current->mm, start, end);
- spin_unlock(&current->mm->page_table_lock);
out:
up(&current->mm->mmap_sem);
return error;
diff --git a/mm/mremap.c b/mm/mremap.c
index bdbcf4841..e237c9442 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -141,12 +141,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma,
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
- lock_vma_mappings(vma);
- spin_lock(&current->mm->page_table_lock);
- __insert_vm_struct(current->mm, new_vma);
- unlock_vma_mappings(vma);
- merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end);
- spin_unlock(&current->mm->page_table_lock);
+ insert_vm_struct(current->mm, new_vma);
do_munmap(current->mm, addr, old_len);
current->mm->total_vm += new_len >> PAGE_SHIFT;
if (new_vma->vm_flags & VM_LOCKED) {
diff --git a/mm/shmem.c b/mm/shmem.c
new file mode 100644
index 000000000..a81a74659
--- /dev/null
+++ b/mm/shmem.c
@@ -0,0 +1,873 @@
+/*
+ * Resizable simple shmem filesystem for Linux.
+ *
+ * Copyright (C) 2000 Linus Torvalds.
+ * 2000 Transmeta Corp.
+ * 2000 Christoph Rohland
+ *
+ * This file is released under the GPL.
+ */
+
+/*
+ * This shared memory handling is heavily based on the ramfs. It
+ * extends the ramfs by the ability to use swap which would makes it a
+ * completely usable filesystem.
+ *
+ * But read and write are not supported (yet)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <asm/smplock.h>
+
+#include <asm/uaccess.h>
+
+#define SHMEM_MAGIC 0x01021994
+
+#define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
+#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
+
+static struct super_operations shmem_ops;
+static struct address_space_operations shmem_aops;
+static struct file_operations shmem_file_operations;
+static struct inode_operations shmem_inode_operations;
+static struct file_operations shmem_dir_operations;
+static struct inode_operations shmem_dir_inode_operations;
+static struct vm_operations_struct shmem_shared_vm_ops;
+static struct vm_operations_struct shmem_private_vm_ops;
+
+LIST_HEAD (shmem_inodes);
+static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+
+static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index)
+{
+ if (index < SHMEM_NR_DIRECT)
+ return info->i_direct+index;
+
+ index -= SHMEM_NR_DIRECT;
+ if (index >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
+ return NULL;
+
+ if (!info->i_indirect) {
+ info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
+ if (!info->i_indirect)
+ return NULL;
+ }
+ if(!(info->i_indirect[index/ENTRIES_PER_PAGE])) {
+ info->i_indirect[index/ENTRIES_PER_PAGE] = (swp_entry_t *) get_zeroed_page(GFP_USER);
+ if (!info->i_indirect[index/ENTRIES_PER_PAGE])
+ return NULL;
+ }
+
+ return info->i_indirect[index/ENTRIES_PER_PAGE]+index%ENTRIES_PER_PAGE;
+}
+
+static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
+{
+ swp_entry_t *ptr, entry;
+ struct page * page;
+ int freed = 0;
+
+ for (ptr = dir; ptr < dir + count; ptr++) {
+ if (!ptr->val)
+ continue;
+ entry = *ptr;
+ swap_free (entry);
+ *ptr = (swp_entry_t){0};
+ freed++;
+ if (!(page = lookup_swap_cache(entry)))
+ continue;
+ delete_from_swap_cache(page);
+ page_cache_release(page);
+ }
+ return freed;
+}
+
+/*
+ * shmem_truncate_part - free a bunch of swap entries
+ *
+ * @dir: pointer to swp_entries
+ * @size: number of entries in dir
+ * @start: offset to start from
+ * @inode: inode for statistics
+ * @freed: counter for freed pages
+ *
+ * It frees the swap entries from dir+start til dir+size
+ *
+ * returns 0 if it truncated something, else (offset-size)
+ */
+
+static unsigned long
+shmem_truncate_part (swp_entry_t * dir, unsigned long size,
+ unsigned long start, struct inode * inode, unsigned long *freed) {
+ if (start > size)
+ return start - size;
+ if (dir)
+ *freed += shmem_free_swp (dir+start, size-start);
+
+ return 0;
+}
+
+static void shmem_truncate (struct inode * inode)
+{
+ int clear_base;
+ unsigned long start;
+ unsigned long mmfreed, freed = 0;
+ swp_entry_t **base, **ptr;
+ struct shmem_inode_info * info = &inode->u.shmem_i;
+
+ spin_lock (&info->lock);
+ start = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, start, inode, &freed);
+
+ if (!(base = info->i_indirect))
+ goto out;;
+
+ clear_base = 1;
+ for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
+ if (!start) {
+ if (!*ptr)
+ continue;
+ freed += shmem_free_swp (*ptr, ENTRIES_PER_PAGE);
+ free_page ((unsigned long) *ptr);
+ *ptr = 0;
+ continue;
+ }
+ clear_base = 0;
+ start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, inode, &freed);
+ }
+
+ if (!clear_base)
+ goto out;
+
+ free_page ((unsigned long)base);
+ info->i_indirect = 0;
+
+out:
+
+ /*
+ * We have to calculate the free blocks since we do not know
+ * how many pages the mm discarded
+ *
+ * But we know that normally
+ * inodes->i_blocks == inode->i_mapping->nrpages + info->swapped
+ *
+ * So the mm freed
+ * inodes->i_blocks - (inode->i_mapping->nrpages + info->swapped)
+ */
+
+ mmfreed = inode->i_blocks - (inode->i_mapping->nrpages + info->swapped);
+ info->swapped -= freed;
+ inode->i_blocks -= freed + mmfreed;
+ spin_unlock (&info->lock);
+
+ spin_lock (&inode->i_sb->u.shmem_sb.stat_lock);
+ inode->i_sb->u.shmem_sb.free_blocks += freed + mmfreed;
+ spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
+}
+
+static void shmem_delete_inode(struct inode * inode)
+{
+ struct shmem_sb_info *info = &inode->i_sb->u.shmem_sb;
+
+ spin_lock (&shmem_ilock);
+ list_del (&inode->u.shmem_i.list);
+ spin_unlock (&shmem_ilock);
+ inode->i_size = 0;
+ shmem_truncate (inode);
+ spin_lock (&info->stat_lock);
+ info->free_inodes++;
+ spin_unlock (&info->stat_lock);
+ clear_inode(inode);
+}
+
+/*
+ * Move the page from the page cache to the swap cache
+ */
+static int shmem_writepage(struct page * page)
+{
+ int error;
+ struct shmem_inode_info *info;
+ swp_entry_t *entry, swap;
+
+ info = &page->mapping->host->u.shmem_i;
+ if (info->locked)
+ return 1;
+ swap = __get_swap_page(2);
+ if (!swap.val)
+ return 1;
+
+ spin_lock(&info->lock);
+ entry = shmem_swp_entry (info, page->index);
+ if (!entry) /* this had been allocted on page allocation */
+ BUG();
+ error = -EAGAIN;
+ if (entry->val) {
+ __swap_free(swap, 2);
+ goto out;
+ }
+
+ *entry = swap;
+ error = 0;
+ /* Remove the from the page cache */
+ lru_cache_del(page);
+ remove_inode_page(page);
+
+ /* Add it to the swap cache */
+ add_to_swap_cache(page, swap);
+ page_cache_release(page);
+ set_page_dirty(page);
+ info->swapped++;
+out:
+ spin_unlock(&info->lock);
+ UnlockPage(page);
+ return error;
+}
+
+/*
+ * shmem_nopage - either get the page from swap or allocate a new one
+ *
+ * If we allocate a new one we do not mark it dirty. That's up to the
+ * vm. If we swap it in we mark it dirty since we also free the swap
+ * entry since a page cannot live in both the swap and page cache
+ */
+struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
+{
+ unsigned long size;
+ struct page * page;
+ unsigned int idx;
+ swp_entry_t *entry;
+ struct inode * inode = vma->vm_file->f_dentry->d_inode;
+ struct address_space * mapping = inode->i_mapping;
+ struct shmem_inode_info *info;
+
+ idx = (address - vma->vm_start) >> PAGE_SHIFT;
+ idx += vma->vm_pgoff;
+
+ down (&inode->i_sem);
+ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ page = NOPAGE_SIGBUS;
+ if ((idx >= size) && (vma->vm_mm == current->mm))
+ goto out;
+
+ /* retry, we may have slept */
+ page = __find_lock_page(mapping, idx, page_hash (mapping, idx));
+ if (page)
+ goto cached_page;
+
+ info = &inode->u.shmem_i;
+ entry = shmem_swp_entry (info, idx);
+ if (!entry)
+ goto oom;
+ if (entry->val) {
+ unsigned long flags;
+
+ /* Look it up and read it in.. */
+ page = lookup_swap_cache(*entry);
+ if (!page) {
+ lock_kernel();
+ swapin_readahead(*entry);
+ page = read_swap_cache(*entry);
+ unlock_kernel();
+ if (!page)
+ goto oom;
+ }
+
+ /* We have to this with page locked to prevent races */
+ spin_lock (&info->lock);
+ swap_free(*entry);
+ lock_page(page);
+ delete_from_swap_cache_nolock(page);
+ *entry = (swp_entry_t) {0};
+ flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
+ page->flags = flags | (1 << PG_dirty);
+ add_to_page_cache_locked(page, mapping, idx);
+ info->swapped--;
+ spin_unlock (&info->lock);
+ } else {
+ spin_lock (&inode->i_sb->u.shmem_sb.stat_lock);
+ if (inode->i_sb->u.shmem_sb.free_blocks == 0)
+ goto no_space;
+ inode->i_sb->u.shmem_sb.free_blocks--;
+ spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
+ /* Ok, get a new page */
+ page = page_cache_alloc();
+ if (!page)
+ goto oom;
+ clear_user_highpage(page, address);
+ inode->i_blocks++;
+ add_to_page_cache (page, mapping, idx);
+ }
+ /* We have the page */
+ SetPageUptodate (page);
+
+cached_page:
+ UnlockPage (page);
+ up(&inode->i_sem);
+
+ if (no_share) {
+ struct page *new_page = page_cache_alloc();
+
+ if (new_page) {
+ copy_user_highpage(new_page, page, address);
+ flush_page_to_ram(new_page);
+ } else
+ new_page = NOPAGE_OOM;
+ page_cache_release(page);
+ return new_page;
+ }
+
+ flush_page_to_ram (page);
+ return(page);
+no_space:
+ spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
+oom:
+ page = NOPAGE_OOM;
+out:
+ up(&inode->i_sem);
+ return page;
+}
+
+struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
+{
+ struct inode * inode;
+
+ spin_lock (&sb->u.shmem_sb.stat_lock);
+ if (!sb->u.shmem_sb.free_inodes) {
+ spin_unlock (&sb->u.shmem_sb.stat_lock);
+ return NULL;
+ }
+ sb->u.shmem_sb.free_inodes--;
+ spin_unlock (&sb->u.shmem_sb.stat_lock);
+
+ inode = new_inode(sb);
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_rdev = to_kdev_t(dev);
+ inode->i_mapping->a_ops = &shmem_aops;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ spin_lock_init (&inode->u.shmem_i.lock);
+ switch (mode & S_IFMT) {
+ default:
+ init_special_inode(inode, mode, dev);
+ break;
+ case S_IFREG:
+ inode->i_op = &shmem_inode_operations;
+ inode->i_fop = &shmem_file_operations;
+ break;
+ case S_IFDIR:
+ inode->i_op = &shmem_dir_inode_operations;
+ inode->i_fop = &shmem_dir_operations;
+ break;
+ case S_IFLNK:
+ inode->i_op = &page_symlink_inode_operations;
+ break;
+ }
+ spin_lock (&shmem_ilock);
+ list_add (&inode->u.shmem_i.list, &shmem_inodes);
+ spin_unlock (&shmem_ilock);
+ }
+ return inode;
+}
+
+static int shmem_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = SHMEM_MAGIC;
+ buf->f_bsize = PAGE_CACHE_SIZE;
+ spin_lock (&sb->u.shmem_sb.stat_lock);
+ if (sb->u.shmem_sb.max_blocks != ULONG_MAX ||
+ sb->u.shmem_sb.max_inodes != ULONG_MAX) {
+ buf->f_blocks = sb->u.shmem_sb.max_blocks;
+ buf->f_bavail = buf->f_bfree = sb->u.shmem_sb.free_blocks;
+ buf->f_files = sb->u.shmem_sb.max_inodes;
+ buf->f_ffree = sb->u.shmem_sb.free_inodes;
+ }
+ spin_unlock (&sb->u.shmem_sb.stat_lock);
+ buf->f_namelen = 255;
+ return 0;
+}
+
+/*
+ * Lookup the data. This is trivial - if the dentry didn't already
+ * exist, we know it is negative.
+ */
+static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry)
+{
+ d_add(dentry, NULL);
+ return NULL;
+}
+
+/*
+ * File creation. Allocate an inode, and we're done..
+ */
+static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
+{
+ struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
+ int error = -ENOSPC;
+
+ if (inode) {
+ d_instantiate(dentry, inode);
+ dget(dentry); /* Extra count - pin the dentry in core */
+ error = 0;
+ }
+ return error;
+}
+
+static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+ return shmem_mknod(dir, dentry, mode | S_IFDIR, 0);
+}
+
+static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
+{
+ return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+/*
+ * Link a file..
+ */
+static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
+{
+ struct inode *inode = old_dentry->d_inode;
+
+ if (S_ISDIR(inode->i_mode))
+ return -EPERM;
+
+ inode->i_nlink++;
+ atomic_inc(&inode->i_count); /* New dentry reference */
+ dget(dentry); /* Extra pinning count for the created dentry */
+ d_instantiate(dentry, inode);
+ return 0;
+}
+
+static inline int shmem_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+/*
+ * Check that a directory is empty (this works
+ * for regular files too, they'll just always be
+ * considered empty..).
+ *
+ * Note that an empty directory can still have
+ * children, they just all have to be negative..
+ */
+static int shmem_empty(struct dentry *dentry)
+{
+ struct list_head *list;
+
+ spin_lock(&dcache_lock);
+ list = dentry->d_subdirs.next;
+
+ while (list != &dentry->d_subdirs) {
+ struct dentry *de = list_entry(list, struct dentry, d_child);
+
+ if (shmem_positive(de)) {
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
+ list = list->next;
+ }
+ spin_unlock(&dcache_lock);
+ return 1;
+}
+
+/*
+ * This works for both directories and regular files.
+ * (non-directories will always have empty subdirs)
+ */
+static int shmem_unlink(struct inode * dir, struct dentry *dentry)
+{
+ int retval = -ENOTEMPTY;
+
+ if (shmem_empty(dentry)) {
+ struct inode *inode = dentry->d_inode;
+
+ inode->i_nlink--;
+ dput(dentry); /* Undo the count from "create" - this does all the work */
+ retval = 0;
+ }
+ return retval;
+}
+
+#define shmem_rmdir shmem_unlink
+
+/*
+ * The VFS layer already does all the dentry stuff for rename,
+ * we just have to decrement the usage count for the target if
+ * it exists so that the VFS layer correctly free's it when it
+ * gets overwritten.
+ */
+static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
+{
+ int error = -ENOTEMPTY;
+
+ if (shmem_empty(new_dentry)) {
+ struct inode *inode = new_dentry->d_inode;
+ if (inode) {
+ inode->i_nlink--;
+ dput(new_dentry);
+ }
+ error = 0;
+ }
+ return error;
+}
+
+static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+{
+ int error;
+
+ error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
+ if (!error) {
+ int l = strlen(symname)+1;
+ struct inode *inode = dentry->d_inode;
+ error = block_symlink(inode, symname, l);
+ }
+ return error;
+}
+
+static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
+{
+ struct vm_operations_struct * ops;
+ struct inode *inode = file->f_dentry->d_inode;
+
+ ops = &shmem_private_vm_ops;
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ ops = &shmem_shared_vm_ops;
+ if (!inode->i_sb || !S_ISREG(inode->i_mode))
+ return -EACCES;
+ UPDATE_ATIME(inode);
+ vma->vm_ops = ops;
+ return 0;
+}
+
+static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes)
+{
+ char *this_char, *value;
+
+ this_char = NULL;
+ if ( options )
+ this_char = strtok(options,",");
+ for ( ; this_char; this_char = strtok(NULL,",")) {
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"nr_blocks")) {
+ if (!value || !*value || !blocks)
+ return 1;
+ *blocks = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ } else if (!strcmp(this_char,"nr_inodes")) {
+ if (!value || !*value || !inodes)
+ return 1;
+ *inodes = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ } else if (!strcmp(this_char,"mode")) {
+ if (!value || !*value || !mode)
+ return 1;
+ *mode = simple_strtoul(value,&value,8);
+ if (*value)
+ return 1;
+ }
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
+{
+ struct inode * inode;
+ struct dentry * root;
+ unsigned long blocks = ULONG_MAX; /* unlimited */
+ unsigned long inodes = ULONG_MAX; /* unlimited */
+ int mode = S_IRWXUGO | S_ISVTX;
+
+ if (shmem_parse_options (data, &mode, &blocks, &inodes)) {
+ printk(KERN_ERR "shmem fs invalid option\n");
+ return NULL;
+ }
+
+ spin_lock_init (&sb->u.shmem_sb.stat_lock);
+ sb->u.shmem_sb.max_blocks = blocks;
+ sb->u.shmem_sb.free_blocks = blocks;
+ sb->u.shmem_sb.max_inodes = inodes;
+ sb->u.shmem_sb.free_inodes = inodes;
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = SHMEM_MAGIC;
+ sb->s_op = &shmem_ops;
+ inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
+ if (!inode)
+ return NULL;
+
+ root = d_alloc_root(inode);
+ if (!root) {
+ iput(inode);
+ return NULL;
+ }
+ sb->s_root = root;
+ return sb;
+}
+
+static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
+{
+ int error;
+ unsigned long max_blocks, blocks;
+ unsigned long max_inodes, inodes;
+ struct shmem_sb_info *info = &sb->u.shmem_sb;
+
+ if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes))
+ return -EINVAL;
+
+ spin_lock(&info->stat_lock);
+ blocks = info->max_blocks - info->free_blocks;
+ inodes = info->max_inodes - info->free_inodes;
+ error = -EINVAL;
+ if (max_blocks < blocks)
+ goto out;
+ if (max_inodes < inodes)
+ goto out;
+ error = 0;
+ info->max_blocks = max_blocks;
+ info->free_blocks = max_blocks - blocks;
+ info->max_inodes = max_inodes;
+ info->free_inodes = max_inodes - inodes;
+out:
+ spin_unlock(&info->stat_lock);
+ return error;
+}
+
+static struct address_space_operations shmem_aops = {
+ writepage: shmem_writepage
+};
+
+static struct file_operations shmem_file_operations = {
+ mmap: shmem_mmap
+};
+
+static struct inode_operations shmem_inode_operations = {
+ truncate: shmem_truncate,
+};
+
+static struct file_operations shmem_dir_operations = {
+ read: generic_read_dir,
+ readdir: dcache_readdir,
+};
+
+static struct inode_operations shmem_dir_inode_operations = {
+ create: shmem_create,
+ lookup: shmem_lookup,
+ link: shmem_link,
+ unlink: shmem_unlink,
+ symlink: shmem_symlink,
+ mkdir: shmem_mkdir,
+ rmdir: shmem_rmdir,
+ mknod: shmem_mknod,
+ rename: shmem_rename,
+};
+
+static struct super_operations shmem_ops = {
+ statfs: shmem_statfs,
+ remount_fs: shmem_remount_fs,
+ delete_inode: shmem_delete_inode,
+ put_inode: force_delete,
+};
+
+static struct vm_operations_struct shmem_private_vm_ops = {
+ nopage: shmem_nopage,
+};
+
+static struct vm_operations_struct shmem_shared_vm_ops = {
+ nopage: shmem_nopage,
+};
+
+static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
+
+static int __init init_shmem_fs(void)
+{
+ int error;
+ struct vfsmount * res;
+
+ if ((error = register_filesystem(&shmem_fs_type))) {
+ printk (KERN_ERR "Could not register shmem fs\n");
+ return error;
+ }
+
+ res = kern_mount(&shmem_fs_type);
+ if (IS_ERR (res)) {
+ printk (KERN_ERR "could not kern_mount shmem fs\n");
+ unregister_filesystem(&shmem_fs_type);
+ return PTR_ERR(res);
+ }
+
+ devfs_mk_dir (NULL, "shm", NULL);
+ return 0;
+}
+
+static void __exit exit_shmem_fs(void)
+{
+ unregister_filesystem(&shmem_fs_type);
+}
+
+module_init(init_shmem_fs)
+module_exit(exit_shmem_fs)
+
+static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) {
+ swp_entry_t *test;
+
+ for (test = ptr; test < ptr + size; test++) {
+ if (test->val == entry.val) {
+ swap_free (entry);
+ *test = (swp_entry_t) {0};
+ return test - ptr;
+ }
+ }
+ return -1;
+}
+
+static int shmem_unuse_inode (struct inode *inode, swp_entry_t entry, struct page *page)
+{
+ swp_entry_t **base, **ptr;
+ unsigned long idx;
+ int offset;
+ struct shmem_inode_info *info = &inode->u.shmem_i;
+
+ idx = 0;
+ spin_lock (&info->lock);
+ if ((offset = shmem_clear_swp (entry,info->i_direct, SHMEM_NR_DIRECT)) >= 0)
+ goto found;
+
+ idx = SHMEM_NR_DIRECT;
+ if (!(base = info->i_indirect))
+ goto out;
+
+ for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
+ if (*ptr &&
+ (offset = shmem_clear_swp (entry, *ptr, ENTRIES_PER_PAGE)) >= 0)
+ goto found;
+ idx += ENTRIES_PER_PAGE;
+ }
+out:
+ spin_unlock (&info->lock);
+ return 0;
+found:
+ add_to_page_cache(page, inode->i_mapping, offset + idx);
+ set_page_dirty(page);
+ SetPageUptodate(page);
+ UnlockPage(page);
+ info->swapped--;
+ spin_unlock(&info->lock);
+ return 1;
+}
+
+/*
+ * unuse_shmem() search for an eventually swapped out shmem page.
+ */
+void shmem_unuse(swp_entry_t entry, struct page *page)
+{
+ struct list_head *p;
+ struct inode * inode;
+
+ spin_lock (&shmem_ilock);
+ list_for_each(p, &shmem_inodes) {
+ inode = list_entry(p, struct inode, u.shmem_i.list);
+
+ if (shmem_unuse_inode(inode, entry, page))
+ break;
+ }
+ spin_unlock (&shmem_ilock);
+}
+
+
+/*
+ * shmem_file_setup - get an unlinked file living in shmem fs
+ *
+ * @name: name for dentry (to be seen in /proc/<pid>/maps
+ * @size: size to be set for the file
+ *
+ */
+struct file *shmem_file_setup(char * name, loff_t size)
+{
+ int error;
+ struct file *file;
+ struct inode * inode;
+ struct dentry *dentry, *root;
+ struct qstr this;
+ int vm_enough_memory(long pages);
+
+ error = -ENOMEM;
+ if (!vm_enough_memory((size) >> PAGE_SHIFT))
+ goto out;
+
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = 0; /* will go */
+ root = shmem_fs_type.kern_mnt->mnt_root;
+ dentry = d_alloc(root, &this);
+ if (!dentry)
+ goto out;
+
+ error = -ENFILE;
+ file = get_empty_filp();
+ if (!file)
+ goto put_dentry;
+
+ error = -ENOSPC;
+ inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
+ if (!inode)
+ goto close_file;
+
+ d_instantiate(dentry, inode);
+ dentry->d_inode->i_size = size;
+ file->f_vfsmnt = mntget(shmem_fs_type.kern_mnt);
+ file->f_dentry = dentry;
+ file->f_op = &shmem_file_operations;
+ file->f_mode = FMODE_WRITE | FMODE_READ;
+ inode->i_nlink = 0; /* It is unlinked */
+ return(file);
+
+close_file:
+ put_filp(file);
+put_dentry:
+ dput (dentry);
+out:
+ return ERR_PTR(error);
+}
+/*
+ * shmem_zero_setup - setup a shared anonymous mapping
+ *
+ * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
+ */
+int shmem_zero_setup(struct vm_area_struct *vma)
+{
+ struct file *file;
+ loff_t size = vma->vm_end - vma->vm_start;
+
+ file = shmem_file_setup("dev/zero", size);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ if (vma->vm_file)
+ fput (vma->vm_file);
+ vma->vm_file = file;
+ vma->vm_ops = &shmem_shared_vm_ops;
+ return 0;
+}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index df45b34af..2a5a55b7b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -29,10 +29,9 @@ static struct address_space_operations swap_aops = {
};
struct address_space swapper_space = {
- { /* pages */
- &swapper_space.pages, /* .next */
- &swapper_space.pages /* .prev */
- },
+ LIST_HEAD_INIT(swapper_space.clean_pages),
+ LIST_HEAD_INIT(swapper_space.dirty_pages),
+ LIST_HEAD_INIT(swapper_space.locked_pages),
0, /* nrpages */
&swap_aops,
};
@@ -65,7 +64,7 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry)
BUG();
if (page->mapping)
BUG();
- flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty) | (1 << PG_referenced) | (1 << PG_arch_1));
+ flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1));
page->flags = flags | (1 << PG_uptodate);
add_to_page_cache_locked(page, &swapper_space, entry.val);
}
@@ -80,6 +79,7 @@ static inline void remove_from_swap_cache(struct page *page)
PAGE_BUG(page);
PageClearSwapCache(page);
+ ClearPageDirty(page);
__remove_inode_page(page);
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 688e2fcdd..57f815638 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -370,15 +370,15 @@ static int try_to_unuse(unsigned int type)
swap_free(entry);
return -ENOMEM;
}
+ if (PageSwapCache(page))
+ delete_from_swap_cache(page);
read_lock(&tasklist_lock);
for_each_task(p)
unuse_process(p->mm, entry, page);
read_unlock(&tasklist_lock);
- shm_unuse(entry, page);
+ shmem_unuse(entry, page);
/* Now get rid of the extra reference to the temporary
page we've been using. */
- if (PageSwapCache(page))
- delete_from_swap_cache(page);
page_cache_release(page);
/*
* Check for and clear any overflowed swap map counts.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 46eb771af..d4a74f41f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -40,7 +40,6 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
pte_t pte;
swp_entry_t entry;
struct page * page;
- int (*swapout)(struct page *, struct file *);
int onlist;
pte = *page_table;
@@ -92,7 +91,7 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un
if (PageSwapCache(page)) {
entry.val = page->index;
if (pte_dirty(pte))
- SetPageDirty(page);
+ set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
@@ -119,64 +118,19 @@ out_failed:
* Basically, this just makes it possible for us to do
* some real work in the future in "refill_inactive()".
*/
- if (!pte_dirty(pte)) {
- flush_cache_page(vma, address);
+ flush_cache_page(vma, address);
+ if (!pte_dirty(pte))
goto drop_pte;
- }
-
- /*
- * Don't go down into the swap-out stuff if
- * we cannot do I/O! Avoid recursing on FS
- * locks etc.
- */
- if (!(gfp_mask & __GFP_IO))
- goto out_unlock_restore;
-
- /*
- * Don't do any of the expensive stuff if
- * we're not really interested in this zone.
- */
- if (page->zone->free_pages + page->zone->inactive_clean_pages
- + page->zone->inactive_dirty_pages
- > page->zone->pages_high + inactive_target)
- goto out_unlock_restore;
/*
* Ok, it's really dirty. That means that
* we should either create a new swap cache
* entry for it, or we should write it back
* to its own backing store.
- *
- * Note that in neither case do we actually
- * know that we make a page available, but
- * as we potentially sleep we can no longer
- * continue scanning, so we migth as well
- * assume we free'd something.
- *
- * NOTE NOTE NOTE! This should just set a
- * dirty bit in 'page', and just drop the
- * pte. All the hard work would be done by
- * refill_inactive().
- *
- * That would get rid of a lot of problems.
*/
- flush_cache_page(vma, address);
- if (vma->vm_ops && (swapout = vma->vm_ops->swapout)) {
- int error;
- struct file *file = vma->vm_file;
- if (file) get_file(file);
-
- mm->rss--;
- flush_tlb_page(vma, address);
- spin_unlock(&mm->page_table_lock);
- error = swapout(page, file);
- if (file) fput(file);
- if (error < 0)
- goto out_unlock_restore;
- UnlockPage(page);
- deactivate_page(page);
- page_cache_release(page);
- return 1; /* We released page_table_lock */
+ if (page->mapping) {
+ set_page_dirty(page);
+ goto drop_pte;
}
/*
@@ -191,7 +145,7 @@ out_failed:
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
- SetPageDirty(page);
+ set_page_dirty(page);
goto set_swap_pte;
out_unlock_restore:
@@ -426,11 +380,6 @@ static int swap_out(unsigned int priority, int gfp_mask, unsigned long idle_time
ret = swap_out_mm(best, gfp_mask);
mmdrop(best);
- if (!ret)
- continue;
-
- if (ret < 0)
- kill_proc(pid, SIGBUS, 1);
__ret = 1;
goto out;
}
@@ -484,7 +433,7 @@ struct page * reclaim_page(zone_t * zone)
}
/* The page is dirty, or locked, move to inactive_dirty list. */
- if (page->buffers || TryLockPage(page)) {
+ if (page->buffers || PageDirty(page) || TryLockPage(page)) {
del_page_from_inactive_clean_list(page);
add_page_to_inactive_dirty_list(page);
continue;
@@ -603,11 +552,13 @@ dirty_page_rescan:
*/
if (PageDirty(page)) {
int (*writepage)(struct page *) = page->mapping->a_ops->writepage;
+ int result;
+
if (!writepage)
goto page_active;
- /* Can't start IO? Move it to the back of the list */
- if (!can_get_io_locks) {
+ /* First time through? Move it to the back of the list */
+ if (!launder_loop) {
list_del(page_lru);
list_add(page_lru, &inactive_dirty_list);
UnlockPage(page);
@@ -619,12 +570,16 @@ dirty_page_rescan:
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
- writepage(page);
+ result = writepage(page);
page_cache_release(page);
/* And re-start the thing.. */
spin_lock(&pagemap_lru_lock);
- continue;
+ if (result != 1)
+ continue;
+ /* writepage refused to do anything */
+ set_page_dirty(page);
+ goto page_active;
}
/*