summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-10-05 01:18:40 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-10-05 01:18:40 +0000
commit012bb3e61e5eced6c610f9e036372bf0c8def2d1 (patch)
tree87efc733f9b164e8c85c0336f92c8fb7eff6d183 /mm/filemap.c
parent625a1589d3d6464b5d90b8a0918789e3afffd220 (diff)
Merge with Linux 2.4.0-test9. Please check DECstation, I had a number
of rejects to fixup while integrating Linus patches. I also found that this kernel will only boot SMP on Origin; the UP kernel freeze soon after bootup with SCSI timeout messages. I commit this anyway since I found that the last CVS versions had the same problem.
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c303
1 files changed, 113 insertions, 190 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 977225432..6aca16409 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -44,9 +44,8 @@
atomic_t page_cache_size = ATOMIC_INIT(0);
unsigned int page_hash_bits;
struct page **page_hash_table;
-struct list_head lru_cache;
-static spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
/*
* NOTE: to avoid deadlocking you must never acquire the pagecache_lock with
* the pagemap_lru_lock held.
@@ -92,7 +91,7 @@ static inline int sync_page(struct page *page)
* sure the page is locked and that nobody else uses it - or that usage
* is safe.
*/
-static inline void __remove_inode_page(struct page *page)
+void __remove_inode_page(struct page *page)
{
remove_page_from_inode_queue(page);
remove_page_from_hash_queue(page);
@@ -146,9 +145,40 @@ void invalidate_inode_pages(struct inode * inode)
spin_unlock(&pagecache_lock);
}
-/*
+static inline void truncate_partial_page(struct page *page, unsigned partial)
+{
+ memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
+
+ if (page->buffers)
+ block_flushpage(page, partial);
+
+}
+
+static inline void truncate_complete_page(struct page *page)
+{
+ if (!page->buffers || block_flushpage(page, 0))
+ lru_cache_del(page);
+
+ /*
+ * We remove the page from the page cache _after_ we have
+ * destroyed all buffer-cache references to it. Otherwise some
+ * other process might think this inode page is not in the
+ * page cache and creates a buffer-cache alias to it causing
+ * all sorts of fun problems ...
+ */
+ ClearPageDirty(page);
+ remove_inode_page(page);
+ page_cache_release(page);
+}
+
+/**
+ * truncate_inode_pages - truncate *all* the pages from an offset
+ * @mapping: mapping to truncate
+ * @lstart: offset from with to truncate
+ *
* Truncate the page cache at a set offset, removing the pages
* that are beyond that offset (and zeroing out partial pages).
+ * If any page is locked we wait for it to become unlocked.
*/
void truncate_inode_pages(struct address_space * mapping, loff_t lstart)
{
@@ -168,11 +198,10 @@ repeat:
page = list_entry(curr, struct page, list);
curr = curr->next;
-
offset = page->index;
- /* page wholly truncated - free it */
- if (offset >= start) {
+ /* Is one of the pages to truncate? */
+ if ((offset >= start) || (partial && (offset + 1) == start)) {
if (TryLockPage(page)) {
page_cache_get(page);
spin_unlock(&pagecache_lock);
@@ -183,23 +212,14 @@ repeat:
page_cache_get(page);
spin_unlock(&pagecache_lock);
- if (!page->buffers || block_flushpage(page, 0))
- lru_cache_del(page);
-
- /*
- * We remove the page from the page cache
- * _after_ we have destroyed all buffer-cache
- * references to it. Otherwise some other process
- * might think this inode page is not in the
- * page cache and creates a buffer-cache alias
- * to it causing all sorts of fun problems ...
- */
- remove_inode_page(page);
- ClearPageDirty(page);
+ if (partial && (offset + 1) == start) {
+ truncate_partial_page(page, partial);
+ partial = 0;
+ } else
+ truncate_complete_page(page);
UnlockPage(page);
page_cache_release(page);
- page_cache_release(page);
/*
* We have done things without the pagecache lock,
@@ -210,176 +230,10 @@ repeat:
*/
goto repeat;
}
- /*
- * there is only one partial page possible.
- */
- if (!partial)
- continue;
-
- /* and it's the one preceeding the first wholly truncated page */
- if ((offset + 1) != start)
- continue;
-
- /* partial truncate, clear end of page */
- if (TryLockPage(page)) {
- spin_unlock(&pagecache_lock);
- goto repeat;
- }
- page_cache_get(page);
- spin_unlock(&pagecache_lock);
-
- memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
- if (page->buffers)
- block_flushpage(page, partial);
-
- partial = 0;
-
- /*
- * we have dropped the spinlock so we have to
- * restart.
- */
- UnlockPage(page);
- page_cache_release(page);
- goto repeat;
}
spin_unlock(&pagecache_lock);
}
-/*
- * nr_dirty represents the number of dirty pages that we will write async
- * before doing sync writes. We can only do sync writes if we can
- * wait for IO (__GFP_IO set).
- */
-int shrink_mmap(int priority, int gfp_mask)
-{
- int ret = 0, count, nr_dirty;
- struct list_head * page_lru;
- struct page * page = NULL;
-
- count = nr_lru_pages / (priority + 1);
- nr_dirty = priority;
-
- /* we need pagemap_lru_lock for list_del() ... subtle code below */
- spin_lock(&pagemap_lru_lock);
- while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) {
- page = list_entry(page_lru, struct page, lru);
- list_del(page_lru);
-
- if (PageTestandClearReferenced(page))
- goto dispose_continue;
-
- count--;
- /*
- * Avoid unscalable SMP locking for pages we can
- * immediate tell are untouchable..
- */
- if (!page->buffers && page_count(page) > 1)
- goto dispose_continue;
-
- if (TryLockPage(page))
- goto dispose_continue;
-
- /* Release the pagemap_lru lock even if the page is not yet
- queued in any lru queue since we have just locked down
- the page so nobody else may SMP race with us running
- a lru_cache_del() (lru_cache_del() always run with the
- page locked down ;). */
- spin_unlock(&pagemap_lru_lock);
-
- /* avoid freeing the page while it's locked */
- page_cache_get(page);
-
- /*
- * Is it a buffer page? Try to clean it up regardless
- * of zone - it's old.
- */
- if (page->buffers) {
- int wait;
- /*
- * 0 - free it if can do so without IO
- * 1 - start write-out of dirty buffers
- * 2 - wait for locked buffers
- */
- wait = (gfp_mask & __GFP_IO) ? (nr_dirty-- < 0) ? 2 : 1 : 0;
- if (!try_to_free_buffers(page, wait))
- goto unlock_continue;
- /* page was locked, inode can't go away under us */
- if (!page->mapping) {
- atomic_dec(&buffermem_pages);
- goto made_buffer_progress;
- }
- }
-
- /* Take the pagecache_lock spinlock held to avoid
- other tasks to notice the page while we are looking at its
- page count. If it's a pagecache-page we'll free it
- in one atomic transaction after checking its page count. */
- spin_lock(&pagecache_lock);
-
- /*
- * We can't free pages unless there's just one user
- * (count == 2 because we added one ourselves above).
- */
- if (page_count(page) != 2)
- goto cache_unlock_continue;
-
- /*
- * Is it a page swap page? If so, we want to
- * drop it if it is no longer used, even if it
- * were to be marked referenced..
- */
- if (PageSwapCache(page)) {
- spin_unlock(&pagecache_lock);
- __delete_from_swap_cache(page);
- goto made_inode_progress;
- }
-
- /*
- * Page is from a zone we don't care about.
- * Don't drop page cache entries in vain.
- */
- if (page->zone->free_pages > page->zone->pages_high)
- goto cache_unlock_continue;
-
- /* is it a page-cache page? */
- if (page->mapping) {
- if (!PageDirty(page) && !pgcache_under_min()) {
- __remove_inode_page(page);
- spin_unlock(&pagecache_lock);
- goto made_inode_progress;
- }
- goto cache_unlock_continue;
- }
-
- printk(KERN_ERR "shrink_mmap: unknown LRU page!\n");
-
-cache_unlock_continue:
- spin_unlock(&pagecache_lock);
-unlock_continue:
- spin_lock(&pagemap_lru_lock);
- UnlockPage(page);
- page_cache_release(page);
-dispose_continue:
- list_add(page_lru, &lru_cache);
- }
- goto out;
-
-made_inode_progress:
- page_cache_release(page);
-made_buffer_progress:
- UnlockPage(page);
- page_cache_release(page);
- ret = 1;
- spin_lock(&pagemap_lru_lock);
- /* nr_lru_pages needs the spinlock */
- nr_lru_pages--;
-
-out:
- spin_unlock(&pagemap_lru_lock);
-
- return ret;
-}
-
static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page)
{
goto inside;
@@ -394,7 +248,14 @@ inside:
if (page->index == offset)
break;
}
- SetPageReferenced(page);
+ /*
+ * Touching the page may move it to the active list.
+ * If we end up with too few inactive pages, we wake
+ * up kswapd.
+ */
+ age_page_up(page);
+ if (inactive_shortage() > inactive_target / 2 && free_shortage())
+ wakeup_kswapd(0);
not_found:
return page;
}
@@ -626,6 +487,7 @@ void ___wait_on_page(struct page *page)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!PageLocked(page))
break;
+ run_task_queue(&tq_disk);
schedule();
} while (PageLocked(page));
tsk->state = TASK_RUNNING;
@@ -749,6 +611,53 @@ repeat:
#endif
/*
+ * We combine this with read-ahead to deactivate pages when we
+ * think there's sequential IO going on. Note that this is
+ * harmless since we don't actually evict the pages from memory
+ * but just move them to the inactive list.
+ *
+ * TODO:
+ * - make the readahead code smarter
+ * - move readahead to the VMA level so we can do the same
+ * trick with mmap()
+ *
+ * Rik van Riel, 2000
+ */
+static void drop_behind(struct file * file, unsigned long index)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct address_space *mapping = inode->i_mapping;
+ struct page **hash;
+ struct page *page;
+ unsigned long start;
+
+ /* Nothing to drop-behind if we're on the first page. */
+ if (!index)
+ return;
+
+ if (index > file->f_rawin)
+ start = index - file->f_rawin;
+ else
+ start = 0;
+
+ /*
+ * Go backwards from index-1 and drop all pages in the
+ * readahead window. Since the readahead window may have
+ * been increased since the last time we were called, we
+ * stop when the page isn't there.
+ */
+ spin_lock(&pagecache_lock);
+ while (--index >= start) {
+ hash = page_hash(mapping, index);
+ page = __find_page_nolock(mapping, index, *hash);
+ if (!page)
+ break;
+ deactivate_page(page);
+ }
+ spin_unlock(&pagecache_lock);
+}
+
+/*
* Read-ahead profiling information
* --------------------------------
* Every PROFILE_MAXREADCOUNT, the following information is written
@@ -971,6 +880,12 @@ static void generic_file_readahead(int reada_ok,
if (filp->f_ramax > max_readahead)
filp->f_ramax = max_readahead;
+ /*
+ * Move the pages that have already been passed
+ * to the inactive list.
+ */
+ drop_behind(filp, index);
+
#ifdef PROFILE_READAHEAD
profile_readahead((reada_ok == 2), filp);
#endif
@@ -1074,6 +989,13 @@ found_page:
goto page_not_up_to_date;
generic_file_readahead(reada_ok, filp, inode, page);
page_ok:
+ /* If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (page->mapping->i_mmap_shared != NULL)
+ flush_dcache_page(page);
+
/*
* Ok, we have the page, and it's up-to-date, so
* now we can copy it to user space...
@@ -2002,10 +1924,10 @@ static long madvise_willneed(struct vm_area_struct * vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for shrink_mmap to actually free
+ * zap_page_range call sets things up for refill_inactive to actually free
* these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
- * shrink_mmap to pick up before reclaiming other pages.
+ * refill_inactive to pick up before reclaiming other pages.
*
* NB: This interface discards data rather than pushes it out to swap,
* as some implementations do. This has performance implications for
@@ -2530,6 +2452,7 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
unlock:
/* Mark it unlocked again and drop the page.. */
UnlockPage(page);
+ deactivate_page(page);
page_cache_release(page);
if (status < 0)