diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 17:17:53 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-10 17:17:53 +0000 |
commit | b2ad5f821b1381492d792ca10b1eb7a107b48f14 (patch) | |
tree | 954a648692e7da983db1d2470953705f6a729264 /mm | |
parent | c9c06167e7933d93a6e396174c68abf242294abb (diff) |
Merge with Linux 2.4.0-prerelease. Big Makefile rewrite, test your
Makefiles.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 10 | ||||
-rw-r--r-- | mm/filemap.c | 317 | ||||
-rw-r--r-- | mm/memory.c | 16 | ||||
-rw-r--r-- | mm/mlock.c | 6 | ||||
-rw-r--r-- | mm/mmap.c | 140 | ||||
-rw-r--r-- | mm/mprotect.c | 3 | ||||
-rw-r--r-- | mm/mremap.c | 7 | ||||
-rw-r--r-- | mm/shmem.c | 873 | ||||
-rw-r--r-- | mm/swap_state.c | 10 | ||||
-rw-r--r-- | mm/swapfile.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 81 |
11 files changed, 1138 insertions, 331 deletions
diff --git a/mm/Makefile b/mm/Makefile index d74cdec48..63d2d4b4f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -8,12 +8,12 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := mm.o -O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ + +obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ - page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o + page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ + shmem.o -ifeq ($(CONFIG_HIGHMEM),y) -O_OBJS += highmem.o -endif +obj-$(CONFIG_HIGHMEM) += highmem.o include $(TOPDIR)/Rules.make diff --git a/mm/filemap.c b/mm/filemap.c index ec8ff8ac7..69fe40466 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -55,35 +55,48 @@ spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) -void __add_page_to_hash_queue(struct page * page, struct page **p) +static void add_page_to_hash_queue(struct page * page, struct page **p) { - atomic_inc(&page_cache_size); - if((page->next_hash = *p) != NULL) - (*p)->pprev_hash = &page->next_hash; + struct page *next = *p; + *p = page; + page->next_hash = next; page->pprev_hash = p; + if (next) + next->pprev_hash = &page->next_hash; if (page->buffers) PAGE_BUG(page); + atomic_inc(&page_cache_size); } -static inline void remove_page_from_hash_queue(struct page * page) +static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page) { - if(page->pprev_hash) { - if(page->next_hash) - page->next_hash->pprev_hash = page->pprev_hash; - *page->pprev_hash = page->next_hash; - page->pprev_hash = NULL; - } - atomic_dec(&page_cache_size); + struct list_head *head = &mapping->clean_pages; + + mapping->nrpages++; + list_add(&page->list, head); + page->mapping = mapping; } -static inline int sync_page(struct page *page) +static inline void remove_page_from_inode_queue(struct page * page) { - struct address_space *mapping = page->mapping; + struct address_space * mapping = page->mapping; - if (mapping && mapping->a_ops && mapping->a_ops->sync_page) - return mapping->a_ops->sync_page(page); - return 0; + mapping->nrpages--; + list_del(&page->list); + page->mapping = NULL; +} + +static inline void remove_page_from_hash_queue(struct page * page) +{ + struct page *next = page->next_hash; + struct page **pprev = page->pprev_hash; + + if (next) + next->pprev_hash = pprev; + *pprev = next; + page->pprev_hash = NULL; + atomic_dec(&page_cache_size); } /* @@ -93,6 +106,7 @@ static inline int sync_page(struct page *page) */ void __remove_inode_page(struct page *page) { + if (PageDirty(page)) BUG(); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->mapping = NULL; @@ -108,6 +122,30 @@ void remove_inode_page(struct page *page) spin_unlock(&pagecache_lock); } +static inline int sync_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping && mapping->a_ops && mapping->a_ops->sync_page) + return mapping->a_ops->sync_page(page); + return 0; +} + +/* + * Add a page to the dirty page list. + */ +void __set_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + + spin_lock(&pagecache_lock); + list_del(&page->list); + list_add(&page->list, &mapping->dirty_pages); + spin_unlock(&pagecache_lock); + + mark_inode_dirty_pages(mapping->host); +} + /** * invalidate_inode_pages - Invalidate all the unlocked pages of one inode * @inode: the inode which pages we want to invalidate @@ -121,7 +159,7 @@ void invalidate_inode_pages(struct inode * inode) struct list_head *head, *curr; struct page * page; - head = &inode->i_mapping->pages; + head = &inode->i_mapping->clean_pages; spin_lock(&pagecache_lock); spin_lock(&pagemap_lru_lock); @@ -131,15 +169,17 @@ void invalidate_inode_pages(struct inode * inode) page = list_entry(curr, struct page, list); curr = curr->next; - /* We cannot invalidate a locked page */ - if (TryLockPage(page)) + /* We cannot invalidate something in use.. */ + if (page_count(page) != 1) continue; - /* Neither can we invalidate something in use.. */ - if (page_count(page) != 1) { - UnlockPage(page); + /* ..or dirty.. */ + if (PageDirty(page)) + continue; + + /* ..or locked */ + if (TryLockPage(page)) continue; - } __lru_cache_del(page); __remove_inode_page(page); @@ -179,26 +219,12 @@ static inline void truncate_complete_page(struct page *page) page_cache_release(page); } -/** - * truncate_inode_pages - truncate *all* the pages from an offset - * @mapping: mapping to truncate - * @lstart: offset from with to truncate - * - * Truncate the page cache at a set offset, removing the pages - * that are beyond that offset (and zeroing out partial pages). - * If any page is locked we wait for it to become unlocked. - */ -void truncate_inode_pages(struct address_space * mapping, loff_t lstart) +void truncate_list_pages(struct list_head *head, unsigned long start, unsigned partial) { - struct list_head *head, *curr; + struct list_head *curr; struct page * page; - unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); - unsigned long start; - - start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; repeat: - head = &mapping->pages; spin_lock(&pagecache_lock); curr = head->next; while (curr != head) { @@ -242,6 +268,26 @@ repeat: spin_unlock(&pagecache_lock); } + +/** + * truncate_inode_pages - truncate *all* the pages from an offset + * @mapping: mapping to truncate + * @lstart: offset from with to truncate + * + * Truncate the page cache at a set offset, removing the pages + * that are beyond that offset (and zeroing out partial pages). + * If any page is locked we wait for it to become unlocked. + */ +void truncate_inode_pages(struct address_space * mapping, loff_t lstart) +{ + unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); + + truncate_list_pages(&mapping->clean_pages, start, partial); + truncate_list_pages(&mapping->dirty_pages, start, partial); + truncate_list_pages(&mapping->locked_pages, start, partial); +} + static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page) { goto inside; @@ -303,14 +349,12 @@ static int waitfor_one_page(struct page *page) return error; } -static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *)) +static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *)) { - struct list_head *head, *curr; + struct list_head *curr; struct page *page; int retval = 0; - head = &inode->i_mapping->pages; - spin_lock(&pagecache_lock); curr = head->next; while (curr != head) { @@ -349,11 +393,89 @@ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsig { int retval; - retval = do_buffer_fdatasync(inode, start_idx, end_idx, writeout_one_page); - retval |= do_buffer_fdatasync(inode, start_idx, end_idx, waitfor_one_page); + /* writeout dirty buffers on pages from both clean and dirty lists */ + retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page); + + /* now wait for locked buffers on pages from both clean and dirty lists */ + retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page); + return retval; } +/** + * filemap_fdatasync - walk the list of dirty pages of the given address space + * and writepage() all of them. + * + * @mapping: address space structure to write + * + */ +void filemap_fdatasync(struct address_space * mapping) +{ + int (*writepage)(struct page *) = mapping->a_ops->writepage; + + spin_lock(&pagecache_lock); + + while (!list_empty(&mapping->dirty_pages)) { + struct page *page = list_entry(mapping->dirty_pages.next, struct page, list); + + list_del(&page->list); + list_add(&page->list, &mapping->locked_pages); + + if (!PageDirty(page)) + continue; + + page_cache_get(page); + spin_unlock(&pagecache_lock); + + lock_page(page); + + if (PageDirty(page)) { + ClearPageDirty(page); + writepage(page); + } else + UnlockPage(page); + + page_cache_release(page); + spin_lock(&pagecache_lock); + } + spin_unlock(&pagecache_lock); +} + +/** + * filemap_fdatawait - walk the list of locked pages of the given address space + * and wait for all of them. + * + * @mapping: address space structure to wait for + * + */ +void filemap_fdatawait(struct address_space * mapping) +{ + spin_lock(&pagecache_lock); + + while (!list_empty(&mapping->locked_pages)) { + struct page *page = list_entry(mapping->locked_pages.next, struct page, list); + + list_del(&page->list); + list_add(&page->list, &mapping->clean_pages); + + if (!PageLocked(page)) + continue; + + page_cache_get(page); + spin_unlock(&pagecache_lock); + + ___wait_on_page(page); + + page_cache_release(page); + spin_lock(&pagecache_lock); + } + spin_unlock(&pagecache_lock); +} + /* * Add a page to the inode page cache. * @@ -369,7 +491,7 @@ void add_to_page_cache_locked(struct page * page, struct address_space *mapping, spin_lock(&pagecache_lock); page->index = index; add_page_to_inode_queue(mapping, page); - __add_page_to_hash_queue(page, page_hash(mapping, index)); + add_page_to_hash_queue(page, page_hash(mapping, index)); lru_cache_add(page); spin_unlock(&pagecache_lock); } @@ -392,7 +514,7 @@ static inline void __add_to_page_cache(struct page * page, page_cache_get(page); page->index = offset; add_page_to_inode_queue(mapping, page); - __add_page_to_hash_queue(page, hash); + add_page_to_hash_queue(page, hash); lru_cache_add(page); } @@ -542,8 +664,8 @@ void lock_page(struct page *page) * a rather lightweight function, finding and getting a reference to a * hashed page atomically, waiting for it if it's locked. */ -static struct page * __find_get_page(struct address_space *mapping, - unsigned long offset, struct page **hash) +struct page * __find_get_page(struct address_space *mapping, + unsigned long offset, struct page **hash) { struct page *page; @@ -1460,72 +1582,19 @@ page_not_uptodate: return NULL; } -/* - * If a task terminates while we're swapping the page, the vma and - * and file could be released: try_to_swap_out has done a get_file. - * vma/file is guaranteed to exist in the unmap/sync cases because - * mmap_sem is held. - * - * The "mapping" test takes care of somebody having truncated the - * page and thus made this write-page a no-op.. - */ -static int filemap_write_page(struct page * page, int wait) -{ - struct address_space * mapping = page->mapping; - int error = 0; - - if (mapping && mapping->a_ops->writepage) { - ClearPageDirty(page); - error = mapping->a_ops->writepage(page); - } - return error; -} - - -/* - * The page cache takes care of races between somebody - * trying to swap something out and swap something in - * at the same time.. - */ -extern void wakeup_bdflush(int); -int filemap_swapout(struct page * page, struct file *file) -{ - SetPageDirty(page); - return 0; -} - /* Called with mm->page_table_lock held to protect against other * threads/the swapper from ripping pte's out from under us. */ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { - pte_t pte; - struct page *page; - int error; - - pte = *ptep; + pte_t pte = *ptep; - if (!pte_present(pte)) - goto out; - if (!ptep_test_and_clear_dirty(ptep)) - goto out; - - flush_page_to_ram(pte_page(pte)); - flush_cache_page(vma, address); - flush_tlb_page(vma, address); - page = pte_page(pte); - page_cache_get(page); - spin_unlock(&vma->vm_mm->page_table_lock); - - lock_page(page); - error = filemap_write_page(page, 1); - page_cache_free(page); - - spin_lock(&vma->vm_mm->page_table_lock); - return error; - -out: + if (pte_present(pte) && ptep_test_and_clear_dirty(ptep)) { + struct page *page = pte_page(pte); + flush_tlb_page(vma, address); + set_page_dirty(page); + } return 0; } @@ -1623,9 +1692,7 @@ int filemap_sync(struct vm_area_struct * vma, unsigned long address, * backing-store for swapping.. */ static struct vm_operations_struct file_shared_mmap = { - sync: filemap_sync, nopage: filemap_nopage, - swapout: filemap_swapout, }; /* @@ -1667,16 +1734,19 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) static int msync_interval(struct vm_area_struct * vma, unsigned long start, unsigned long end, int flags) { - if (vma->vm_file && vma->vm_ops && vma->vm_ops->sync) { + struct file * file = vma->vm_file; + if (file && (vma->vm_flags & VM_SHARED)) { int error; - error = vma->vm_ops->sync(vma, start, end-start, flags); + error = filemap_sync(vma, start, end-start, flags); + if (!error && (flags & MS_SYNC)) { - struct file * file = vma->vm_file; - if (file && file->f_op && file->f_op->fsync) { - down(&file->f_dentry->d_inode->i_sem); + struct inode * inode = file->f_dentry->d_inode; + down(&inode->i_sem); + filemap_fdatasync(inode->i_mapping); + if (file->f_op && file->f_op->fsync) error = file->f_op->fsync(file, file->f_dentry, 1); - up(&file->f_dentry->d_inode->i_sem); - } + filemap_fdatawait(inode->i_mapping); + up(&inode->i_sem); } return error; } @@ -2439,6 +2509,17 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) if (bytes > count) bytes = count; + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + */ + { volatile unsigned char dummy; + __get_user(dummy, buf); + __get_user(dummy, buf+bytes-1); + } + status = -ENOMEM; /* we'll assign it later anyway */ page = __grab_cache_page(mapping, index, &cached_page); if (!page) diff --git a/mm/memory.c b/mm/memory.c index 13dad21a0..f4bb0141f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -259,22 +259,22 @@ nomem: /* * Return indicates whether a page was freed so caller can adjust rss */ -static inline int free_pte(pte_t page) +static inline int free_pte(pte_t pte) { - if (pte_present(page)) { - struct page *ptpage = pte_page(page); - if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage)) + if (pte_present(pte)) { + struct page *page = pte_page(pte); + if ((!VALID_PAGE(page)) || PageReserved(page)) return 0; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ - if (pte_dirty(page)) - SetPageDirty(ptpage); - free_page_and_swap_cache(ptpage); + if (pte_dirty(pte) && page->mapping) + set_page_dirty(page); + free_page_and_swap_cache(page); return 1; } - swap_free(pte_to_swp_entry(page)); + swap_free(pte_to_swp_entry(pte)); return 0; } diff --git a/mm/mlock.c b/mm/mlock.c index 551d61d39..16e9f947b 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -189,9 +189,6 @@ static int do_mlock(unsigned long start, size_t len, int on) break; } } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, start, end); - spin_unlock(¤t->mm->page_table_lock); return error; } @@ -263,9 +260,6 @@ static int do_mlockall(int flags) if (error) break; } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, 0, TASK_SIZE); - spin_unlock(¤t->mm->page_table_lock); return error; } @@ -333,32 +333,22 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon if (error) goto unmap_and_free_vma; } else if (flags & MAP_SHARED) { - error = map_zero_setup(vma); + error = shmem_zero_setup(vma); if (error) goto free_vma; } - /* - * merge_segments may merge our vma, so we can't refer to it - * after the call. Save the values we need now ... - */ - flags = vma->vm_flags; - /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM */ + flags = vma->vm_flags; addr = vma->vm_start; - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - __insert_vm_struct(mm, vma); - unlock_vma_mappings(vma); + insert_vm_struct(mm, vma); if (correct_wcount) atomic_inc(&file->f_dentry->d_inode->i_writecount); - merge_segments(mm, vma->vm_start, vma->vm_end); - spin_unlock(&mm->page_table_lock); mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { @@ -742,9 +732,6 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) end = end > mpnt->vm_end ? mpnt->vm_end : end; size = end - st; - if (mpnt->vm_ops && mpnt->vm_ops->unmap) - mpnt->vm_ops->unmap(mpnt, st, size); - if (mpnt->vm_flags & VM_DENYWRITE && (st != mpnt->vm_start || end != mpnt->vm_end) && (file = mpnt->vm_file) != NULL) { @@ -828,6 +815,23 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (!vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; + flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE) | mm->def_flags; + + flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + + /* Can we just expand an old anonymous mapping? */ + if (addr) { + struct vm_area_struct * vma = find_vma(mm, addr-1); + if (vma && vma->vm_end == addr && !vma->vm_file && + vma->vm_flags == flags) { + vma->vm_end = addr + len; + goto out; + } + } + + /* * create a vma struct for an anonymous mapping */ @@ -838,30 +842,16 @@ unsigned long do_brk(unsigned long addr, unsigned long len) vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE) | mm->def_flags; - - vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_flags = flags; + vma->vm_page_prot = protection_map[flags & 0x0f]; vma->vm_ops = NULL; vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; - /* - * merge_segments may merge our vma, so we can't refer to it - * after the call. Save the values we need now ... - */ - flags = vma->vm_flags; - addr = vma->vm_start; + insert_vm_struct(mm, vma); - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - __insert_vm_struct(mm, vma); - unlock_vma_mappings(vma); - merge_segments(mm, vma->vm_start, vma->vm_end); - spin_unlock(&mm->page_table_lock); - +out: mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { mm->locked_vm += len >> PAGE_SHIFT; @@ -900,8 +890,6 @@ void exit_mmap(struct mm_struct * mm) unsigned long size = end - start; if (mpnt->vm_ops) { - if (mpnt->vm_ops->unmap) - mpnt->vm_ops->unmap(mpnt, start, size); if (mpnt->vm_ops->close) mpnt->vm_ops->close(mpnt); } @@ -973,84 +961,8 @@ void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp) void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp) { lock_vma_mappings(vmp); + spin_lock(¤t->mm->page_table_lock); __insert_vm_struct(mm, vmp); + spin_unlock(¤t->mm->page_table_lock); unlock_vma_mappings(vmp); } - -/* Merge the list of memory segments if possible. - * Redundant vm_area_structs are freed. - * This assumes that the list is ordered by address. - * We don't need to traverse the entire list, only those segments - * which intersect or are adjacent to a given interval. - * - * We must already hold the mm semaphore when we get here.. - */ -void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) -{ - struct vm_area_struct *prev, *mpnt, *next, *prev1; - - mpnt = find_vma_prev(mm, start_addr, &prev1); - if (!mpnt) - return; - - if (prev1) { - prev = prev1; - } else { - prev = mpnt; - mpnt = mpnt->vm_next; - } - mm->mmap_cache = NULL; /* Kill the cache. */ - - /* prev and mpnt cycle through the list, as long as - * start_addr < mpnt->vm_end && prev->vm_start < end_addr - */ - for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) { - next = mpnt->vm_next; - - /* To share, we must have the same file, operations.. */ - if ((mpnt->vm_file != prev->vm_file)|| - (mpnt->vm_private_data != prev->vm_private_data) || - (mpnt->vm_ops != prev->vm_ops) || - (mpnt->vm_flags != prev->vm_flags) || - (prev->vm_end != mpnt->vm_start)) - continue; - - /* - * If we have a file or it's a shared memory area - * the offsets must be contiguous.. - */ - if ((mpnt->vm_file != NULL) || (mpnt->vm_flags & VM_SHM)) { - unsigned long off = prev->vm_pgoff; - off += (prev->vm_end - prev->vm_start) >> PAGE_SHIFT; - if (off != mpnt->vm_pgoff) - continue; - } - - /* merge prev with mpnt and set up pointers so the new - * big segment can possibly merge with the next one. - * The old unused mpnt is freed. - */ - if (mm->mmap_avl) - avl_remove(mpnt, &mm->mmap_avl); - prev->vm_end = mpnt->vm_end; - prev->vm_next = mpnt->vm_next; - mm->map_count--; - if (mpnt->vm_ops && mpnt->vm_ops->close) { - mpnt->vm_pgoff += (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - mpnt->vm_start = mpnt->vm_end; - spin_unlock(&mm->page_table_lock); - mpnt->vm_ops->close(mpnt); - } else - spin_unlock(&mm->page_table_lock); - - lock_vma_mappings(mpnt); - __remove_shared_vm_struct(mpnt); - unlock_vma_mappings(mpnt); - if (mpnt->vm_file) - fput(mpnt->vm_file); - kmem_cache_free(vm_area_cachep, mpnt); - mpnt = prev; - - spin_lock(&mm->page_table_lock); - } -} diff --git a/mm/mprotect.c b/mm/mprotect.c index e47987f1e..91905c8b1 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -277,9 +277,6 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot break; } } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, start, end); - spin_unlock(¤t->mm->page_table_lock); out: up(¤t->mm->mmap_sem); return error; diff --git a/mm/mremap.c b/mm/mremap.c index bdbcf4841..e237c9442 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -141,12 +141,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); - lock_vma_mappings(vma); - spin_lock(¤t->mm->page_table_lock); - __insert_vm_struct(current->mm, new_vma); - unlock_vma_mappings(vma); - merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); - spin_unlock(¤t->mm->page_table_lock); + insert_vm_struct(current->mm, new_vma); do_munmap(current->mm, addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { diff --git a/mm/shmem.c b/mm/shmem.c new file mode 100644 index 000000000..a81a74659 --- /dev/null +++ b/mm/shmem.c @@ -0,0 +1,873 @@ +/* + * Resizable simple shmem filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * 2000 Christoph Rohland + * + * This file is released under the GPL. + */ + +/* + * This shared memory handling is heavily based on the ramfs. It + * extends the ramfs by the ability to use swap which would makes it a + * completely usable filesystem. + * + * But read and write are not supported (yet) + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <asm/smplock.h> + +#include <asm/uaccess.h> + +#define SHMEM_MAGIC 0x01021994 + +#define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long)) +#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT) + +static struct super_operations shmem_ops; +static struct address_space_operations shmem_aops; +static struct file_operations shmem_file_operations; +static struct inode_operations shmem_inode_operations; +static struct file_operations shmem_dir_operations; +static struct inode_operations shmem_dir_inode_operations; +static struct vm_operations_struct shmem_shared_vm_ops; +static struct vm_operations_struct shmem_private_vm_ops; + +LIST_HEAD (shmem_inodes); +static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED; + +static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index) +{ + if (index < SHMEM_NR_DIRECT) + return info->i_direct+index; + + index -= SHMEM_NR_DIRECT; + if (index >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) + return NULL; + + if (!info->i_indirect) { + info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER); + if (!info->i_indirect) + return NULL; + } + if(!(info->i_indirect[index/ENTRIES_PER_PAGE])) { + info->i_indirect[index/ENTRIES_PER_PAGE] = (swp_entry_t *) get_zeroed_page(GFP_USER); + if (!info->i_indirect[index/ENTRIES_PER_PAGE]) + return NULL; + } + + return info->i_indirect[index/ENTRIES_PER_PAGE]+index%ENTRIES_PER_PAGE; +} + +static int shmem_free_swp(swp_entry_t *dir, unsigned int count) +{ + swp_entry_t *ptr, entry; + struct page * page; + int freed = 0; + + for (ptr = dir; ptr < dir + count; ptr++) { + if (!ptr->val) + continue; + entry = *ptr; + swap_free (entry); + *ptr = (swp_entry_t){0}; + freed++; + if (!(page = lookup_swap_cache(entry))) + continue; + delete_from_swap_cache(page); + page_cache_release(page); + } + return freed; +} + +/* + * shmem_truncate_part - free a bunch of swap entries + * + * @dir: pointer to swp_entries + * @size: number of entries in dir + * @start: offset to start from + * @inode: inode for statistics + * @freed: counter for freed pages + * + * It frees the swap entries from dir+start til dir+size + * + * returns 0 if it truncated something, else (offset-size) + */ + +static unsigned long +shmem_truncate_part (swp_entry_t * dir, unsigned long size, + unsigned long start, struct inode * inode, unsigned long *freed) { + if (start > size) + return start - size; + if (dir) + *freed += shmem_free_swp (dir+start, size-start); + + return 0; +} + +static void shmem_truncate (struct inode * inode) +{ + int clear_base; + unsigned long start; + unsigned long mmfreed, freed = 0; + swp_entry_t **base, **ptr; + struct shmem_inode_info * info = &inode->u.shmem_i; + + spin_lock (&info->lock); + start = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, start, inode, &freed); + + if (!(base = info->i_indirect)) + goto out;; + + clear_base = 1; + for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) { + if (!start) { + if (!*ptr) + continue; + freed += shmem_free_swp (*ptr, ENTRIES_PER_PAGE); + free_page ((unsigned long) *ptr); + *ptr = 0; + continue; + } + clear_base = 0; + start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, inode, &freed); + } + + if (!clear_base) + goto out; + + free_page ((unsigned long)base); + info->i_indirect = 0; + +out: + + /* + * We have to calculate the free blocks since we do not know + * how many pages the mm discarded + * + * But we know that normally + * inodes->i_blocks == inode->i_mapping->nrpages + info->swapped + * + * So the mm freed + * inodes->i_blocks - (inode->i_mapping->nrpages + info->swapped) + */ + + mmfreed = inode->i_blocks - (inode->i_mapping->nrpages + info->swapped); + info->swapped -= freed; + inode->i_blocks -= freed + mmfreed; + spin_unlock (&info->lock); + + spin_lock (&inode->i_sb->u.shmem_sb.stat_lock); + inode->i_sb->u.shmem_sb.free_blocks += freed + mmfreed; + spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); +} + +static void shmem_delete_inode(struct inode * inode) +{ + struct shmem_sb_info *info = &inode->i_sb->u.shmem_sb; + + spin_lock (&shmem_ilock); + list_del (&inode->u.shmem_i.list); + spin_unlock (&shmem_ilock); + inode->i_size = 0; + shmem_truncate (inode); + spin_lock (&info->stat_lock); + info->free_inodes++; + spin_unlock (&info->stat_lock); + clear_inode(inode); +} + +/* + * Move the page from the page cache to the swap cache + */ +static int shmem_writepage(struct page * page) +{ + int error; + struct shmem_inode_info *info; + swp_entry_t *entry, swap; + + info = &page->mapping->host->u.shmem_i; + if (info->locked) + return 1; + swap = __get_swap_page(2); + if (!swap.val) + return 1; + + spin_lock(&info->lock); + entry = shmem_swp_entry (info, page->index); + if (!entry) /* this had been allocted on page allocation */ + BUG(); + error = -EAGAIN; + if (entry->val) { + __swap_free(swap, 2); + goto out; + } + + *entry = swap; + error = 0; + /* Remove the from the page cache */ + lru_cache_del(page); + remove_inode_page(page); + + /* Add it to the swap cache */ + add_to_swap_cache(page, swap); + page_cache_release(page); + set_page_dirty(page); + info->swapped++; +out: + spin_unlock(&info->lock); + UnlockPage(page); + return error; +} + +/* + * shmem_nopage - either get the page from swap or allocate a new one + * + * If we allocate a new one we do not mark it dirty. That's up to the + * vm. If we swap it in we mark it dirty since we also free the swap + * entry since a page cannot live in both the swap and page cache + */ +struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share) +{ + unsigned long size; + struct page * page; + unsigned int idx; + swp_entry_t *entry; + struct inode * inode = vma->vm_file->f_dentry->d_inode; + struct address_space * mapping = inode->i_mapping; + struct shmem_inode_info *info; + + idx = (address - vma->vm_start) >> PAGE_SHIFT; + idx += vma->vm_pgoff; + + down (&inode->i_sem); + size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + page = NOPAGE_SIGBUS; + if ((idx >= size) && (vma->vm_mm == current->mm)) + goto out; + + /* retry, we may have slept */ + page = __find_lock_page(mapping, idx, page_hash (mapping, idx)); + if (page) + goto cached_page; + + info = &inode->u.shmem_i; + entry = shmem_swp_entry (info, idx); + if (!entry) + goto oom; + if (entry->val) { + unsigned long flags; + + /* Look it up and read it in.. */ + page = lookup_swap_cache(*entry); + if (!page) { + lock_kernel(); + swapin_readahead(*entry); + page = read_swap_cache(*entry); + unlock_kernel(); + if (!page) + goto oom; + } + + /* We have to this with page locked to prevent races */ + spin_lock (&info->lock); + swap_free(*entry); + lock_page(page); + delete_from_swap_cache_nolock(page); + *entry = (swp_entry_t) {0}; + flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1)); + page->flags = flags | (1 << PG_dirty); + add_to_page_cache_locked(page, mapping, idx); + info->swapped--; + spin_unlock (&info->lock); + } else { + spin_lock (&inode->i_sb->u.shmem_sb.stat_lock); + if (inode->i_sb->u.shmem_sb.free_blocks == 0) + goto no_space; + inode->i_sb->u.shmem_sb.free_blocks--; + spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); + /* Ok, get a new page */ + page = page_cache_alloc(); + if (!page) + goto oom; + clear_user_highpage(page, address); + inode->i_blocks++; + add_to_page_cache (page, mapping, idx); + } + /* We have the page */ + SetPageUptodate (page); + +cached_page: + UnlockPage (page); + up(&inode->i_sem); + + if (no_share) { + struct page *new_page = page_cache_alloc(); + + if (new_page) { + copy_user_highpage(new_page, page, address); + flush_page_to_ram(new_page); + } else + new_page = NOPAGE_OOM; + page_cache_release(page); + return new_page; + } + + flush_page_to_ram (page); + return(page); +no_space: + spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); +oom: + page = NOPAGE_OOM; +out: + up(&inode->i_sem); + return page; +} + +struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) +{ + struct inode * inode; + + spin_lock (&sb->u.shmem_sb.stat_lock); + if (!sb->u.shmem_sb.free_inodes) { + spin_unlock (&sb->u.shmem_sb.stat_lock); + return NULL; + } + sb->u.shmem_sb.free_inodes--; + spin_unlock (&sb->u.shmem_sb.stat_lock); + + inode = new_inode(sb); + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_rdev = to_kdev_t(dev); + inode->i_mapping->a_ops = &shmem_aops; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + spin_lock_init (&inode->u.shmem_i.lock); + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &shmem_inode_operations; + inode->i_fop = &shmem_file_operations; + break; + case S_IFDIR: + inode->i_op = &shmem_dir_inode_operations; + inode->i_fop = &shmem_dir_operations; + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + spin_lock (&shmem_ilock); + list_add (&inode->u.shmem_i.list, &shmem_inodes); + spin_unlock (&shmem_ilock); + } + return inode; +} + +static int shmem_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = SHMEM_MAGIC; + buf->f_bsize = PAGE_CACHE_SIZE; + spin_lock (&sb->u.shmem_sb.stat_lock); + if (sb->u.shmem_sb.max_blocks != ULONG_MAX || + sb->u.shmem_sb.max_inodes != ULONG_MAX) { + buf->f_blocks = sb->u.shmem_sb.max_blocks; + buf->f_bavail = buf->f_bfree = sb->u.shmem_sb.free_blocks; + buf->f_files = sb->u.shmem_sb.max_inodes; + buf->f_ffree = sb->u.shmem_sb.free_inodes; + } + spin_unlock (&sb->u.shmem_sb.stat_lock); + buf->f_namelen = 255; + return 0; +} + +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry) +{ + d_add(dentry, NULL); + return NULL; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) +{ + struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + } + return error; +} + +static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + return shmem_mknod(dir, dentry, mode | S_IFDIR, 0); +} + +static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) +{ + return shmem_mknod(dir, dentry, mode | S_IFREG, 0); +} + +/* + * Link a file.. + */ +static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) +{ + struct inode *inode = old_dentry->d_inode; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + inode->i_nlink++; + atomic_inc(&inode->i_count); /* New dentry reference */ + dget(dentry); /* Extra pinning count for the created dentry */ + d_instantiate(dentry, inode); + return 0; +} + +static inline int shmem_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +/* + * Check that a directory is empty (this works + * for regular files too, they'll just always be + * considered empty..). + * + * Note that an empty directory can still have + * children, they just all have to be negative.. + */ +static int shmem_empty(struct dentry *dentry) +{ + struct list_head *list; + + spin_lock(&dcache_lock); + list = dentry->d_subdirs.next; + + while (list != &dentry->d_subdirs) { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (shmem_positive(de)) { + spin_unlock(&dcache_lock); + return 0; + } + list = list->next; + } + spin_unlock(&dcache_lock); + return 1; +} + +/* + * This works for both directories and regular files. + * (non-directories will always have empty subdirs) + */ +static int shmem_unlink(struct inode * dir, struct dentry *dentry) +{ + int retval = -ENOTEMPTY; + + if (shmem_empty(dentry)) { + struct inode *inode = dentry->d_inode; + + inode->i_nlink--; + dput(dentry); /* Undo the count from "create" - this does all the work */ + retval = 0; + } + return retval; +} + +#define shmem_rmdir shmem_unlink + +/* + * The VFS layer already does all the dentry stuff for rename, + * we just have to decrement the usage count for the target if + * it exists so that the VFS layer correctly free's it when it + * gets overwritten. + */ +static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) +{ + int error = -ENOTEMPTY; + + if (shmem_empty(new_dentry)) { + struct inode *inode = new_dentry->d_inode; + if (inode) { + inode->i_nlink--; + dput(new_dentry); + } + error = 0; + } + return error; +} + +static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + int error; + + error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0); + if (!error) { + int l = strlen(symname)+1; + struct inode *inode = dentry->d_inode; + error = block_symlink(inode, symname, l); + } + return error; +} + +static int shmem_mmap(struct file * file, struct vm_area_struct * vma) +{ + struct vm_operations_struct * ops; + struct inode *inode = file->f_dentry->d_inode; + + ops = &shmem_private_vm_ops; + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + ops = &shmem_shared_vm_ops; + if (!inode->i_sb || !S_ISREG(inode->i_mode)) + return -EACCES; + UPDATE_ATIME(inode); + vma->vm_ops = ops; + return 0; +} + +static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes) +{ + char *this_char, *value; + + this_char = NULL; + if ( options ) + this_char = strtok(options,","); + for ( ; this_char; this_char = strtok(NULL,",")) { + if ((value = strchr(this_char,'=')) != NULL) + *value++ = 0; + if (!strcmp(this_char,"nr_blocks")) { + if (!value || !*value || !blocks) + return 1; + *blocks = simple_strtoul(value,&value,0); + if (*value) + return 1; + } else if (!strcmp(this_char,"nr_inodes")) { + if (!value || !*value || !inodes) + return 1; + *inodes = simple_strtoul(value,&value,0); + if (*value) + return 1; + } else if (!strcmp(this_char,"mode")) { + if (!value || !*value || !mode) + return 1; + *mode = simple_strtoul(value,&value,8); + if (*value) + return 1; + } + else + return 1; + } + + return 0; +} + +static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + unsigned long blocks = ULONG_MAX; /* unlimited */ + unsigned long inodes = ULONG_MAX; /* unlimited */ + int mode = S_IRWXUGO | S_ISVTX; + + if (shmem_parse_options (data, &mode, &blocks, &inodes)) { + printk(KERN_ERR "shmem fs invalid option\n"); + return NULL; + } + + spin_lock_init (&sb->u.shmem_sb.stat_lock); + sb->u.shmem_sb.max_blocks = blocks; + sb->u.shmem_sb.free_blocks = blocks; + sb->u.shmem_sb.max_inodes = inodes; + sb->u.shmem_sb.free_inodes = inodes; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = SHMEM_MAGIC; + sb->s_op = &shmem_ops; + inode = shmem_get_inode(sb, S_IFDIR | mode, 0); + if (!inode) + return NULL; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return NULL; + } + sb->s_root = root; + return sb; +} + +static int shmem_remount_fs (struct super_block *sb, int *flags, char *data) +{ + int error; + unsigned long max_blocks, blocks; + unsigned long max_inodes, inodes; + struct shmem_sb_info *info = &sb->u.shmem_sb; + + if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes)) + return -EINVAL; + + spin_lock(&info->stat_lock); + blocks = info->max_blocks - info->free_blocks; + inodes = info->max_inodes - info->free_inodes; + error = -EINVAL; + if (max_blocks < blocks) + goto out; + if (max_inodes < inodes) + goto out; + error = 0; + info->max_blocks = max_blocks; + info->free_blocks = max_blocks - blocks; + info->max_inodes = max_inodes; + info->free_inodes = max_inodes - inodes; +out: + spin_unlock(&info->stat_lock); + return error; +} + +static struct address_space_operations shmem_aops = { + writepage: shmem_writepage +}; + +static struct file_operations shmem_file_operations = { + mmap: shmem_mmap +}; + +static struct inode_operations shmem_inode_operations = { + truncate: shmem_truncate, +}; + +static struct file_operations shmem_dir_operations = { + read: generic_read_dir, + readdir: dcache_readdir, +}; + +static struct inode_operations shmem_dir_inode_operations = { + create: shmem_create, + lookup: shmem_lookup, + link: shmem_link, + unlink: shmem_unlink, + symlink: shmem_symlink, + mkdir: shmem_mkdir, + rmdir: shmem_rmdir, + mknod: shmem_mknod, + rename: shmem_rename, +}; + +static struct super_operations shmem_ops = { + statfs: shmem_statfs, + remount_fs: shmem_remount_fs, + delete_inode: shmem_delete_inode, + put_inode: force_delete, +}; + +static struct vm_operations_struct shmem_private_vm_ops = { + nopage: shmem_nopage, +}; + +static struct vm_operations_struct shmem_shared_vm_ops = { + nopage: shmem_nopage, +}; + +static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER); + +static int __init init_shmem_fs(void) +{ + int error; + struct vfsmount * res; + + if ((error = register_filesystem(&shmem_fs_type))) { + printk (KERN_ERR "Could not register shmem fs\n"); + return error; + } + + res = kern_mount(&shmem_fs_type); + if (IS_ERR (res)) { + printk (KERN_ERR "could not kern_mount shmem fs\n"); + unregister_filesystem(&shmem_fs_type); + return PTR_ERR(res); + } + + devfs_mk_dir (NULL, "shm", NULL); + return 0; +} + +static void __exit exit_shmem_fs(void) +{ + unregister_filesystem(&shmem_fs_type); +} + +module_init(init_shmem_fs) +module_exit(exit_shmem_fs) + +static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) { + swp_entry_t *test; + + for (test = ptr; test < ptr + size; test++) { + if (test->val == entry.val) { + swap_free (entry); + *test = (swp_entry_t) {0}; + return test - ptr; + } + } + return -1; +} + +static int shmem_unuse_inode (struct inode *inode, swp_entry_t entry, struct page *page) +{ + swp_entry_t **base, **ptr; + unsigned long idx; + int offset; + struct shmem_inode_info *info = &inode->u.shmem_i; + + idx = 0; + spin_lock (&info->lock); + if ((offset = shmem_clear_swp (entry,info->i_direct, SHMEM_NR_DIRECT)) >= 0) + goto found; + + idx = SHMEM_NR_DIRECT; + if (!(base = info->i_indirect)) + goto out; + + for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) { + if (*ptr && + (offset = shmem_clear_swp (entry, *ptr, ENTRIES_PER_PAGE)) >= 0) + goto found; + idx += ENTRIES_PER_PAGE; + } +out: + spin_unlock (&info->lock); + return 0; +found: + add_to_page_cache(page, inode->i_mapping, offset + idx); + set_page_dirty(page); + SetPageUptodate(page); + UnlockPage(page); + info->swapped--; + spin_unlock(&info->lock); + return 1; +} + +/* + * unuse_shmem() search for an eventually swapped out shmem page. + */ +void shmem_unuse(swp_entry_t entry, struct page *page) +{ + struct list_head *p; + struct inode * inode; + + spin_lock (&shmem_ilock); + list_for_each(p, &shmem_inodes) { + inode = list_entry(p, struct inode, u.shmem_i.list); + + if (shmem_unuse_inode(inode, entry, page)) + break; + } + spin_unlock (&shmem_ilock); +} + + +/* + * shmem_file_setup - get an unlinked file living in shmem fs + * + * @name: name for dentry (to be seen in /proc/<pid>/maps + * @size: size to be set for the file + * + */ +struct file *shmem_file_setup(char * name, loff_t size) +{ + int error; + struct file *file; + struct inode * inode; + struct dentry *dentry, *root; + struct qstr this; + int vm_enough_memory(long pages); + + error = -ENOMEM; + if (!vm_enough_memory((size) >> PAGE_SHIFT)) + goto out; + + this.name = name; + this.len = strlen(name); + this.hash = 0; /* will go */ + root = shmem_fs_type.kern_mnt->mnt_root; + dentry = d_alloc(root, &this); + if (!dentry) + goto out; + + error = -ENFILE; + file = get_empty_filp(); + if (!file) + goto put_dentry; + + error = -ENOSPC; + inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); + if (!inode) + goto close_file; + + d_instantiate(dentry, inode); + dentry->d_inode->i_size = size; + file->f_vfsmnt = mntget(shmem_fs_type.kern_mnt); + file->f_dentry = dentry; + file->f_op = &shmem_file_operations; + file->f_mode = FMODE_WRITE | FMODE_READ; + inode->i_nlink = 0; /* It is unlinked */ + return(file); + +close_file: + put_filp(file); +put_dentry: + dput (dentry); +out: + return ERR_PTR(error); +} +/* + * shmem_zero_setup - setup a shared anonymous mapping + * + * @vma: the vma to be mmapped is prepared by do_mmap_pgoff + */ +int shmem_zero_setup(struct vm_area_struct *vma) +{ + struct file *file; + loff_t size = vma->vm_end - vma->vm_start; + + file = shmem_file_setup("dev/zero", size); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (vma->vm_file) + fput (vma->vm_file); + vma->vm_file = file; + vma->vm_ops = &shmem_shared_vm_ops; + return 0; +} diff --git a/mm/swap_state.c b/mm/swap_state.c index df45b34af..2a5a55b7b 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -29,10 +29,9 @@ static struct address_space_operations swap_aops = { }; struct address_space swapper_space = { - { /* pages */ - &swapper_space.pages, /* .next */ - &swapper_space.pages /* .prev */ - }, + LIST_HEAD_INIT(swapper_space.clean_pages), + LIST_HEAD_INIT(swapper_space.dirty_pages), + LIST_HEAD_INIT(swapper_space.locked_pages), 0, /* nrpages */ &swap_aops, }; @@ -65,7 +64,7 @@ void add_to_swap_cache(struct page *page, swp_entry_t entry) BUG(); if (page->mapping) BUG(); - flags = page->flags & ~((1 << PG_error) | (1 << PG_dirty) | (1 << PG_referenced) | (1 << PG_arch_1)); + flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1)); page->flags = flags | (1 << PG_uptodate); add_to_page_cache_locked(page, &swapper_space, entry.val); } @@ -80,6 +79,7 @@ static inline void remove_from_swap_cache(struct page *page) PAGE_BUG(page); PageClearSwapCache(page); + ClearPageDirty(page); __remove_inode_page(page); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 688e2fcdd..57f815638 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -370,15 +370,15 @@ static int try_to_unuse(unsigned int type) swap_free(entry); return -ENOMEM; } + if (PageSwapCache(page)) + delete_from_swap_cache(page); read_lock(&tasklist_lock); for_each_task(p) unuse_process(p->mm, entry, page); read_unlock(&tasklist_lock); - shm_unuse(entry, page); + shmem_unuse(entry, page); /* Now get rid of the extra reference to the temporary page we've been using. */ - if (PageSwapCache(page)) - delete_from_swap_cache(page); page_cache_release(page); /* * Check for and clear any overflowed swap map counts. diff --git a/mm/vmscan.c b/mm/vmscan.c index 46eb771af..d4a74f41f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -40,7 +40,6 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un pte_t pte; swp_entry_t entry; struct page * page; - int (*swapout)(struct page *, struct file *); int onlist; pte = *page_table; @@ -92,7 +91,7 @@ static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, un if (PageSwapCache(page)) { entry.val = page->index; if (pte_dirty(pte)) - SetPageDirty(page); + set_page_dirty(page); set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry)); @@ -119,64 +118,19 @@ out_failed: * Basically, this just makes it possible for us to do * some real work in the future in "refill_inactive()". */ - if (!pte_dirty(pte)) { - flush_cache_page(vma, address); + flush_cache_page(vma, address); + if (!pte_dirty(pte)) goto drop_pte; - } - - /* - * Don't go down into the swap-out stuff if - * we cannot do I/O! Avoid recursing on FS - * locks etc. - */ - if (!(gfp_mask & __GFP_IO)) - goto out_unlock_restore; - - /* - * Don't do any of the expensive stuff if - * we're not really interested in this zone. - */ - if (page->zone->free_pages + page->zone->inactive_clean_pages - + page->zone->inactive_dirty_pages - > page->zone->pages_high + inactive_target) - goto out_unlock_restore; /* * Ok, it's really dirty. That means that * we should either create a new swap cache * entry for it, or we should write it back * to its own backing store. - * - * Note that in neither case do we actually - * know that we make a page available, but - * as we potentially sleep we can no longer - * continue scanning, so we migth as well - * assume we free'd something. - * - * NOTE NOTE NOTE! This should just set a - * dirty bit in 'page', and just drop the - * pte. All the hard work would be done by - * refill_inactive(). - * - * That would get rid of a lot of problems. */ - flush_cache_page(vma, address); - if (vma->vm_ops && (swapout = vma->vm_ops->swapout)) { - int error; - struct file *file = vma->vm_file; - if (file) get_file(file); - - mm->rss--; - flush_tlb_page(vma, address); - spin_unlock(&mm->page_table_lock); - error = swapout(page, file); - if (file) fput(file); - if (error < 0) - goto out_unlock_restore; - UnlockPage(page); - deactivate_page(page); - page_cache_release(page); - return 1; /* We released page_table_lock */ + if (page->mapping) { + set_page_dirty(page); + goto drop_pte; } /* @@ -191,7 +145,7 @@ out_failed: /* Add it to the swap cache and mark it dirty */ add_to_swap_cache(page, entry); - SetPageDirty(page); + set_page_dirty(page); goto set_swap_pte; out_unlock_restore: @@ -426,11 +380,6 @@ static int swap_out(unsigned int priority, int gfp_mask, unsigned long idle_time ret = swap_out_mm(best, gfp_mask); mmdrop(best); - if (!ret) - continue; - - if (ret < 0) - kill_proc(pid, SIGBUS, 1); __ret = 1; goto out; } @@ -484,7 +433,7 @@ struct page * reclaim_page(zone_t * zone) } /* The page is dirty, or locked, move to inactive_dirty list. */ - if (page->buffers || TryLockPage(page)) { + if (page->buffers || PageDirty(page) || TryLockPage(page)) { del_page_from_inactive_clean_list(page); add_page_to_inactive_dirty_list(page); continue; @@ -603,11 +552,13 @@ dirty_page_rescan: */ if (PageDirty(page)) { int (*writepage)(struct page *) = page->mapping->a_ops->writepage; + int result; + if (!writepage) goto page_active; - /* Can't start IO? Move it to the back of the list */ - if (!can_get_io_locks) { + /* First time through? Move it to the back of the list */ + if (!launder_loop) { list_del(page_lru); list_add(page_lru, &inactive_dirty_list); UnlockPage(page); @@ -619,12 +570,16 @@ dirty_page_rescan: page_cache_get(page); spin_unlock(&pagemap_lru_lock); - writepage(page); + result = writepage(page); page_cache_release(page); /* And re-start the thing.. */ spin_lock(&pagemap_lru_lock); - continue; + if (result != 1) + continue; + /* writepage refused to do anything */ + set_page_dirty(page); + goto page_active; } /* |