1 files changed, 304 insertions, 0 deletions
diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c
new file mode 100644
index 000000000..d36c3a9ae
--- /dev/null
+++ b/fs/nfs/flushd.c
@@ -0,0 +1,304 @@
+/*
+ * linux/fs/nfs/flushd.c
+ *
+ * For each NFS mount, there is a separate cache object that contains
+ * a hash table of all clusters. With this cache, an async RPC task
+ * (`flushd') is associated, which wakes up occasionally to inspect
+ * its list of dirty buffers.
+ * (Note that RPC tasks aren't kernel threads. Take a look at the
+ * rpciod code to understand what they are).
+ *
+ * Inside the cache object, we also maintain a count of the current number
+ * of dirty pages, which may not exceed a certain threshold.
+ * (FIXME: This threshold should be configurable).
+ *
+ * The code is streamlined for what I think is the prevalent case for
+ * NFS traffic, which is sequential write access without concurrent
+ * access by different processes.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Rewritten 6/3/2000 by Trond Myklebust
+ * Copyright (C) 1999, 2000, Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/malloc.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include <linux/sched.h>
+
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_flushd.h>
+#include <linux/nfs_mount.h>
+
+/*
+ * Various constants
+ */
+#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
+
+/*
+ * This is the wait queue all cluster daemons sleep on
+ */
+static struct rpc_wait_queue    flushd_queue = RPC_INIT_WAITQ("nfs_flushd");
+
+/*
+ * Spinlock
+ */
+spinlock_t nfs_flushd_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Local function declarations.
+ */
+static void	nfs_flushd(struct rpc_task *);
+static void	nfs_flushd_exit(struct rpc_task *);
+
+
+int nfs_reqlist_init(struct nfs_server *server)
+{
+	struct nfs_reqlist	*cache;
+	struct rpc_task		*task;
+	int			status = 0;
+
+	dprintk("NFS: writecache_init\n");
+	spin_lock(&nfs_flushd_lock);
+	cache = server->rw_requests;
+
+	if (cache->task)
+		goto out_unlock;
+
+	/* Create the RPC task */
+	status = -ENOMEM;
+	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		goto out_unlock;
+
+	task->tk_calldata = server;
+
+	cache->task = task;
+
+	/* Run the task */
+	cache->runat = jiffies;
+
+	cache->auth = server->client->cl_auth;
+	task->tk_action   = nfs_flushd;
+	task->tk_exit   = nfs_flushd_exit;
+
+	spin_unlock(&nfs_flushd_lock);
+	rpc_execute(task);
+	return 0;
+ out_unlock:
+	spin_unlock(&nfs_flushd_lock);
+	return status;
+}
+
+void nfs_reqlist_exit(struct nfs_server *server)
+{
+	struct nfs_reqlist      *cache;
+
+	cache = server->rw_requests;
+	if (!cache)
+		return;
+
+	dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, cache->task);
+	while (cache->task || cache->inodes) {
+		spin_lock(&nfs_flushd_lock);
+		if (!cache->task) {
+			spin_unlock(&nfs_flushd_lock);
+			nfs_reqlist_init(server);
+		} else {
+			cache->task->tk_status = -ENOMEM;
+			rpc_wake_up_task(cache->task);
+			spin_unlock(&nfs_flushd_lock);
+		}
+		interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ);
+	}
+}
+
+int nfs_reqlist_alloc(struct nfs_server *server)
+{
+	struct nfs_reqlist	*cache;
+	if (server->rw_requests)
+		return 0;
+
+	cache = (struct nfs_reqlist *)kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return -ENOMEM;
+
+	memset(cache, 0, sizeof(*cache));
+	init_waitqueue_head(&cache->request_wait);
+	server->rw_requests = cache;
+
+	return 0;
+}
+
+void nfs_reqlist_free(struct nfs_server *server)
+{
+	if (server->rw_requests) {
+		kfree(server->rw_requests);
+		server->rw_requests = NULL;
+	}
+}
+
+void nfs_wake_flushd()
+{
+	rpc_wake_up_status(&flushd_queue, -ENOMEM);
+}
+
+static void inode_append_flushd(struct inode *inode)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct inode		**q;
+
+	spin_lock(&nfs_flushd_lock);
+	if (NFS_FLAGS(inode) & NFS_INO_FLUSH)
+		goto out;
+	inode->u.nfs_i.hash_next = NULL;
+
+	q = &cache->inodes;
+	while (*q)
+		q = &(*q)->u.nfs_i.hash_next;
+	*q = inode;
+
+	/* Note: we increase the inode i_count in order to prevent
+	 *	 it from disappearing when on the flush list
+	 */
+	NFS_FLAGS(inode) |= NFS_INO_FLUSH;
+	inode->i_count++;
+ out:
+	spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_remove_flushd(struct inode *inode)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct inode		**q;
+
+	spin_lock(&nfs_flushd_lock);
+	if (!(NFS_FLAGS(inode) & NFS_INO_FLUSH))
+		goto out;
+
+	q = &cache->inodes;
+	while (*q && *q != inode)
+		q = &(*q)->u.nfs_i.hash_next;
+	if (*q) {
+		*q = inode->u.nfs_i.hash_next;
+		NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+		iput(inode);
+	}
+ out:
+	spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_schedule_scan(struct inode *inode, unsigned long time)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct rpc_task		*task;
+	unsigned long		mintimeout;
+
+	if (time_after(NFS_NEXTSCAN(inode), time))
+		NFS_NEXTSCAN(inode) = time;
+	mintimeout = jiffies + 1 * HZ;
+	if (time_before(mintimeout, NFS_NEXTSCAN(inode)))
+		mintimeout = NFS_NEXTSCAN(inode);
+	inode_append_flushd(inode);
+
+	spin_lock(&nfs_flushd_lock);
+	task = cache->task;
+	if (!task) {
+		spin_unlock(&nfs_flushd_lock);
+		nfs_reqlist_init(NFS_SERVER(inode));
+	} else {
+		if (time_after(cache->runat, mintimeout))
+			rpc_wake_up_task(task);
+		spin_unlock(&nfs_flushd_lock);
+	}
+}
+
+
+static void
+nfs_flushd(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+	struct nfs_reqlist	*cache;
+	struct inode		*inode, *next;
+	unsigned long		delay = jiffies + NFS_WRITEBACK_LOCKDELAY;
+	int			flush = (task->tk_status == -ENOMEM);
+
+        dprintk("NFS: %4d flushd starting\n", task->tk_pid);
+	server = (struct nfs_server *) task->tk_calldata;
+        cache = server->rw_requests;
+
+	spin_lock(&nfs_flushd_lock);
+	next = cache->inodes;
+	cache->inodes = NULL;
+	spin_unlock(&nfs_flushd_lock);
+
+	while ((inode = next) != NULL) {
+		next = next->u.nfs_i.hash_next;
+		inode->u.nfs_i.hash_next = NULL;
+		NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+
+		if (flush) {
+			nfs_sync_file(inode, NULL, 0, 0, FLUSH_AGING);
+		} else if (time_after(jiffies, NFS_NEXTSCAN(inode))) {
+			NFS_NEXTSCAN(inode) = jiffies + NFS_WRITEBACK_LOCKDELAY;
+			nfs_flush_timeout(inode, FLUSH_AGING);
+#ifdef CONFIG_NFS_V3
+			nfs_commit_timeout(inode, FLUSH_AGING);
+#endif
+		}
+
+		if (nfs_have_writebacks(inode)) {
+			inode_append_flushd(inode);
+			if (time_after(delay, NFS_NEXTSCAN(inode)))
+				delay = NFS_NEXTSCAN(inode);
+		}
+		iput(inode);
+	}
+
+	dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid);
+	if (time_after(jiffies + 1 * HZ, delay))
+		delay = 1 * HZ;
+	else
+		delay = delay - jiffies;
+	task->tk_status = 0;
+	task->tk_action = nfs_flushd;
+	task->tk_timeout = delay;
+	cache->runat = jiffies + task->tk_timeout;
+
+	spin_lock(&nfs_flushd_lock);
+	if (!cache->nr_requests && !cache->inodes) {
+		cache->task = NULL;
+		task->tk_action = NULL;
+	} else
+		rpc_sleep_on(&flushd_queue, task, NULL, NULL);
+	spin_unlock(&nfs_flushd_lock);
+}
+
+static void
+nfs_flushd_exit(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+	struct nfs_reqlist	*cache;
+	server = (struct nfs_server *) task->tk_calldata;
+	cache = server->rw_requests;
+
+	spin_lock(&nfs_flushd_lock);
+	if (cache->task == task)
+		cache->task = NULL;
+	spin_unlock(&nfs_flushd_lock);
+	wake_up(&cache->request_wait);
+	rpc_release_task(task);
+}
+