Merge with Linux 2.1.67.

author: Ralf Baechle <ralf@linux-mips.org> 1997-12-06 23:51:34 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1997-12-06 23:51:34 +0000
commit: 230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree: 5dd821c8d33f450470588e7a543f74bf74306e9e /fs/nfs
parent: c9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)
7 files changed, 1195 insertions, 521 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index acee50754..9e1d936dd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,6 +29,13 @@
 
 #include <asm/segment.h>	/* for fs functions */
 
+#define NFS_MAX_AGE 10*HZ	/* max age for dentry validation */
+
+/* needed by smbfs as well ... move to dcache? */
+extern void nfs_renew_times(struct dentry *);
+
+#define NFS_PARANOIA 1
+
 /*
  * Head for a dircache entry. Currently still very simple; when
  * the cache grows larger, we will need a LRU list.
@@ -36,18 +43,20 @@
 struct nfs_dirent {
 	dev_t			dev;		/* device number */
 	ino_t			ino;		/* inode number */
-	u32			cookie;		/* cooke of first entry */
+	u32			cookie;		/* cookie of first entry */
 	unsigned short		valid  : 1,	/* data is valid */
 				locked : 1;	/* entry locked */
 	unsigned int		size;		/* # of entries */
 	unsigned long		age;		/* last used */
 	unsigned long		mtime;		/* last attr stamp */
 	struct wait_queue *	wait;
-	struct nfs_entry *	entry;
+	__u32 *			entry;		/* three __u32's per entry */
 };
 
-static int nfs_dir_open(struct inode * inode, struct file * file);
-static long nfs_dir_read(struct inode *, struct file *, char *, unsigned long);
+static int nfs_safe_remove(struct dentry *);
+
+static int nfs_dir_open(struct inode *, struct file *);
+static ssize_t nfs_dir_read(struct file *, char *, size_t, loff_t *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static int nfs_lookup(struct inode *, struct dentry *);
 static int nfs_create(struct inode *, struct dentry *, int);
@@ -57,7 +66,8 @@ static int nfs_unlink(struct inode *, struct dentry *);
 static int nfs_symlink(struct inode *, struct dentry *, const char *);
 static int nfs_link(struct inode *, struct inode *, struct dentry *);
 static int nfs_mknod(struct inode *, struct dentry *, int, int);
-static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+static int nfs_rename(struct inode *, struct dentry *,
+		      struct inode *, struct dentry *);
 
 static struct file_operations nfs_dir_operations = {
 	NULL,			/* lseek - default */
@@ -102,8 +112,8 @@ nfs_dir_open(struct inode *dir, struct file *file)
 	return nfs_revalidate_inode(NFS_SERVER(dir), dir);
 }
 
-static long
-nfs_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count)
+static ssize_t
+nfs_dir_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
 {
 	return -EISDIR;
 }
@@ -123,15 +133,16 @@ static struct nfs_dirent	dircache[NFS_MAX_DIRCACHE];
 
 static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
+	struct inode 		*inode = filp->f_dentry->d_inode;
 	static struct wait_queue *readdir_wait = NULL;
 	struct wait_queue	**waitp = NULL;
 	struct nfs_dirent	*cache, *free;
-	struct nfs_entry	*entry;
 	unsigned long		age, dead;
 	u32			cookie;
 	int			ismydir, result;
 	int			i, j, index = 0;
-	struct inode 		*inode = filp->f_dentry->d_inode;
+	__u32			*entry;
+	char			*name, *start;
 
 	dfprintk(VFS, "NFS: nfs_readdir(%x/%ld)\n", inode->i_dev, inode->i_ino);
 	if (!inode || !S_ISDIR(inode->i_mode)) {
@@ -148,7 +159,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 again:
 	if (waitp) {
 		interruptible_sleep_on(waitp);
-		if (current->signal & ~current->blocked)
+		if (signal_pending(current))
 			return -ERESTARTSYS;
 		waitp = NULL;
 	}
@@ -174,7 +185,7 @@ again:
 			goto again;
 		}
 
-		if (ismydir && cache->mtime != NFS_OLDMTIME(inode))
+		if (ismydir && cache->mtime != inode->i_mtime)
 			cache->valid = 0;
 
 		if (!cache->valid || cache->age < dead) {
@@ -194,17 +205,15 @@ again:
 			break;
 		}
 		for (j = 0; j < cache->size; j++) {
-			/*
-			dprintk("NFS: examing entry %.*s @%d\n",
-				(int) cache->entry[j].length,
-				cache->entry[j].name,
-				cache->entry[j].cookie);
-			 */
-			if (cache->entry[j].cookie != cookie)
+			__u32 *this_ent = cache->entry + j*3;
+
+			if (*(this_ent+1) != cookie)
 				continue;
 			if (j < cache->size - 1) {
-				entry = cache->entry + (index = j + 1);
-			} else if (cache->entry[j].eof) {
+				index = j + 1;
+				entry = this_ent + 3;
+			} else if (*(this_ent+2) & (1 << 15)) {
+				/* eof */
 				return 0;
 			}
 			break;
@@ -235,12 +244,10 @@ again:
 		cache->dev    = inode->i_dev;
 		cache->ino    = inode->i_ino;
 		if (!cache->entry) {
-			cache->entry = (struct nfs_entry *)
-						get_free_page(GFP_KERNEL);
-			if (!cache->entry) {
-				result = -ENOMEM;
+			result = -ENOMEM;
+			cache->entry = (__u32 *) get_free_page(GFP_KERNEL);
+			if (!cache->entry)
 				goto done;
-			}
 		}
 
 		result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode),
@@ -251,31 +258,35 @@ again:
 		cache->valid = 1;
 		entry = cache->entry + (index = 0);
 	}
-	cache->mtime = NFS_OLDMTIME(inode);
+	cache->mtime = inode->i_mtime;
 	cache->age = jiffies;
 
 	/*
 	 * Yowza! We have a cache entry...
 	 */
+	start = (char *) cache->entry;
 	while (index < cache->size) {
-		int	nextpos = entry->cookie;
+		__u32	fileid  = *entry++;
+		__u32	nextpos = *entry++; /* cookie */
+		__u32	length  = *entry++;
 
 		/*
+		 * Unpack the eof flag, offset, and length
+		 */
+		result = length & (1 << 15); /* eof flag */
+		name = start + ((length >> 16) & 0xFFFF);
+		length &= 0x7FFF;
+		/*
 		dprintk("NFS: filldir(%p, %.*s, %d, %d, %x, eof %x)\n", entry,
-				(int) entry->length, entry->name, entry->length,
+				(int) length, name, length,
 				(unsigned int) filp->f_pos,
-				entry->fileid, entry->eof);
+				fileid, result);
 		 */
 
-		if (filldir(dirent, entry->name, entry->length, cookie, entry->fileid) < 0)
+		if (filldir(dirent, name, length, cookie, fileid) < 0)
 			break;
 		cookie = nextpos;
-		if (nextpos != entry->cookie) {
-			printk("nfs_readdir: shouldn't happen!\n");
-			break;
-		}
 		index++;
-		entry++;
 	}
 	filp->f_pos = cookie;
 	result = 0;
@@ -293,47 +304,69 @@ done:
 }
 
 /*
- * Invalidate dircache entries for inode
+ * Invalidate dircache entries for an inode.
  */
 void
 nfs_invalidate_dircache(struct inode *inode)
 {
-	struct nfs_dirent *cache;
+	struct nfs_dirent *cache = dircache;
 	dev_t		dev = inode->i_dev;
 	ino_t		ino = inode->i_ino;
 	int		i;
 
 	dfprintk(DIRCACHE, "NFS: invalidate dircache for %x/%ld\n", dev, (long)ino);
-	for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) {
-		if (!cache->locked && cache->dev == dev && cache->ino == ino)
-			cache->valid = 0;	/* brute force */
+	for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+		if (cache->ino != ino)
+			continue;
+		if (cache->dev != dev)
+			continue;
+		if (cache->locked) {
+			printk("NFS: cache locked for %s/%ld\n",
+				kdevname(dev), (long) ino);
+			continue;
+		}
+		cache->valid = 0;	/* brute force */
 	}
 }
 
 /*
- * Free directory cache memory
- * Called from cleanup_module
+ * Invalidate the dircache for a super block (or all caches),
+ * and release the cache memory.
  */
 void
-nfs_free_dircache(void)
+nfs_invalidate_dircache_sb(struct super_block *sb)
 {
-	struct nfs_dirent *cache;
+	struct nfs_dirent *cache = dircache;
 	int		i;
 
-	dfprintk(DIRCACHE, "NFS: freeing dircache\n");
-	for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) {
-		cache->valid = 0;
+	for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+		if (sb && sb->s_dev != cache->dev)
+			continue;
 		if (cache->locked) {
-			printk("nfs_kfree_cache: locked entry in dircache!\n");
+			printk("NFS: cache locked at umount %s\n",
+				(cache->entry ? "(lost a page!)" : ""));
 			continue;
 		}
-		if (cache->entry)
+		cache->valid = 0;	/* brute force */
+		if (cache->entry) {
 			free_page((unsigned long) cache->entry);
-		cache->entry = NULL;
+			cache->entry = NULL;
+		}
 	}
 }
 
 /*
+ * Free directory cache memory
+ * Called from cleanup_module
+ */
+void
+nfs_free_dircache(void)
+{
+	dfprintk(DIRCACHE, "NFS: freeing dircache\n");
+	nfs_invalidate_dircache_sb(NULL);
+}
+
+/*
  * This is called every time the dcache has a lookup hit,
  * and we should check whether we can really trust that
  * lookup.
@@ -350,20 +383,92 @@ static int nfs_lookup_revalidate(struct dentry * dentry)
 	unsigned long time = jiffies - dentry->d_time;
 	unsigned long max = 5*HZ;
 
-	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
-		max = 10*HZ;
-	return time < max;
+	if (dentry->d_inode) {
+		if (is_bad_inode(dentry->d_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_lookup_validate: %s/%s has dud inode\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+			goto bad;
+		}
+		if (S_ISDIR(dentry->d_inode->i_mode))
+			max = NFS_MAX_AGE;
+	}
+
+	return (time < max) || IS_ROOT(dentry);
+bad:
+	return 0;
 }
 
-static void nfs_silly_delete(struct dentry *);
+/*
+ * This is called from dput() when d_count is going to 0.
+ * We use it to clean up silly-renamed files, and to check
+ * for dentries that have already expired.
+ */
+static void nfs_dentry_delete(struct dentry *dentry)
+{
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+		int error;
+		
+		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+#ifdef NFS_DEBUG
+printk("nfs_dentry_delete: unlinking %s/%s\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+		/* Unhash it first */
+		d_drop(dentry);
+		error = nfs_safe_remove(dentry);
+		if (error)
+			printk("NFS: can't silly-delete %s/%s, error=%d\n",
+				dentry->d_parent->d_name.name,
+				dentry->d_name.name, error);
+	}
+	/*
+	 * Check whether to expire the dentry ...
+	 */
+	else {
+		unsigned long age = jiffies - dentry->d_time;
+		if (age > NFS_MAX_AGE)
+			d_drop(dentry);
+	}
+
+#ifdef NFS_PARANOIA
+	/*
+	 * Sanity check: if the dentry has been unhashed and the
+	 * inode still has users, we could have problems ...
+	 */
+	if (list_empty(&dentry->d_hash) && dentry->d_inode) {
+		struct inode *inode = dentry->d_inode;
+		if (inode->i_count > 1) {
+printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name,
+inode->i_ino, inode->i_count, inode->i_nlink);
+		}
+	}
+#endif
+}
 
 static struct dentry_operations nfs_dentry_operations = {
-	nfs_lookup_revalidate,
+	nfs_lookup_revalidate,	/* d_validate(struct dentry *) */
 	0,			/* d_hash */
 	0,			/* d_compare */
-	nfs_silly_delete,
+	nfs_dentry_delete	/* d_delete(struct dentry *) */
 };
 
+/*
+ * Whenever a lookup succeeds, we know the parent directories
+ * are all valid, so we want to update the dentry timestamps.
+ */
+void nfs_renew_times(struct dentry * dentry)
+{
+	for (;;) {
+		dentry->d_time = jiffies;
+		if (dentry == dentry->d_parent)
+			break;
+		dentry = dentry->d_parent;
+	}
+}
+
 static int nfs_lookup(struct inode *dir, struct dentry * dentry)
 {
 	struct inode *inode;
@@ -373,38 +478,66 @@ static int nfs_lookup(struct inode *dir, struct dentry * dentry)
 	int error;
 
 	dfprintk(VFS, "NFS: lookup(%x/%ld, %.*s)\n",
-				dir->i_dev, dir->i_ino, len, dentry->d_name.name);
+			dir->i_dev, dir->i_ino, len, dentry->d_name.name);
 
 	if (!dir || !S_ISDIR(dir->i_mode)) {
 		printk("nfs_lookup: inode is NULL or not a directory\n");
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
-
-	error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name, &fhandle, &fattr);
+		goto out;
 
+	error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), 
+				dentry->d_name.name, &fhandle, &fattr);
 	inode = NULL;
+	if (error == -ENOENT)
+		goto no_entry;
 	if (!error) {
+		error = -EACCES;
 		inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
-		if (!inode)
-			return -EACCES;
-	} else if (error != -ENOENT)
-		return error;
-
-	dentry->d_time = jiffies;
-	dentry->d_op = &nfs_dentry_operations;
-	d_add(dentry, inode);
-	return 0;
+		if (inode) {
+	    no_entry:
+			dentry->d_op = &nfs_dentry_operations;
+			d_add(dentry, inode);
+			nfs_renew_times(dentry);
+			error = 0;
+		}
+	}
+out:
+	return error;
 }
 
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+static int nfs_instantiate(struct inode *dir, struct dentry *dentry,
+			struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+{
+	struct inode *inode;
+	int error = -EACCES;
+
+	inode = nfs_fhget(dir->i_sb, fhandle, fattr);
+	if (inode) {
+		d_instantiate(dentry, inode);
+		nfs_renew_times(dentry);
+		error = 0;
+	}
+	return error;
+}
+
+/*
+ * Following a failed create operation, we drop the dentry rather
+ * than retain a negative dentry. This avoids a problem in the event
+ * that the operation succeeded on the server, but an error in the
+ * reply path made it appear to have failed.
+ */
 static int nfs_create(struct inode *dir, struct dentry * dentry, int mode)
 {
 	struct nfs_sattr sattr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
-	struct inode *inode;
 	int error;
 
 	dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
@@ -415,33 +548,41 @@ static int nfs_create(struct inode *dir, struct dentry * dentry, int mode)
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
+		goto out;
 
 	sattr.mode = mode;
 	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
 	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
-		dentry->d_name.name, &sattr, &fhandle, &fattr);
-
-	if (error)
-		return error;
-
-	inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
-	if (!inode)
-		return -EACCES;
 
+	/*
+	 * Invalidate the dir cache before the operation to avoid a race.
+	 */
 	nfs_invalidate_dircache(dir);
-	d_instantiate(dentry, inode);
-	return 0;
+	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+			dentry->d_name.name, &sattr, &fhandle, &fattr);
+	if (!error)
+		error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+	else {
+#ifdef NFS_PARANOIA
+printk("nfs_create: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+		d_drop(dentry);
+	}
+out:
+	return error;
 }
 
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
 static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
 {
 	struct nfs_sattr sattr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
-	struct inode *inode;
 	int error;
 
 	dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
@@ -459,29 +600,31 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde
 	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
 	if (S_ISCHR(mode) || S_ISBLK(mode))
 		sattr.size = rdev; /* get out your barf bag */
-
 	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
-	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
-		dentry->d_name.name, &sattr, &fhandle, &fattr);
-
-	if (error)
-		return error;
-
-	inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
-	if (!inode)
-		return -EACCES;
 
 	nfs_invalidate_dircache(dir);
-	d_instantiate(dentry, inode);
-	return 0;
+	error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+				dentry->d_name.name, &sattr, &fhandle, &fattr);
+	if (!error)
+		error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+	else {
+#ifdef NFS_PARANOIA
+printk("nfs_mknod: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+		d_drop(dentry);
+	}
+	return error;
 }
 
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
 static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	struct nfs_sattr sattr;
 	struct nfs_fattr fattr;
 	struct nfs_fh fhandle;
-	struct inode * inode;
 	int error;
 
 	dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
@@ -499,21 +642,29 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
 	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
 
-	error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
-		dentry->d_name.name, &sattr, &fhandle, &fattr);
-
-	if (error)
-		return error;
-
-	inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
-	if (!inode)
-		return -EACCES;
-
 	nfs_invalidate_dircache(dir);
-	d_instantiate(dentry, inode);
-	return 0;
+	error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
+				dentry->d_name.name, &sattr, &fhandle, &fattr);
+	if (!error)
+		error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+	else {
+#ifdef NFS_PARANOIA
+printk("nfs_mkdir: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+		d_drop(dentry);
+	}
+	return error;
 }
 
+/*
+ * To avoid retaining a stale inode reference, we check the dentry
+ * use count prior to the operation, and return EBUSY if it has
+ * multiple users.
+ *
+ * Update inode->i_nlink immediately after a successful operation.
+ * (See comments for nfs_unlink.)
+ */
 static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
@@ -526,16 +677,28 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
-
+		goto out;
+
+	error = -EBUSY;
+	if (dentry->d_count > 1) {
+		/* Attempt to shrink child dentries ... */
+		shrink_dcache_parent(dentry);
+		if (dentry->d_count > 1)
+			goto out;
+	}
+	/* Drop the dentry to force a new lookup */
+	d_drop(dentry);
 	error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name);
-	if (error)
-		return error;
-
-	nfs_invalidate_dircache(dir);
-	d_delete(dentry);
-	return 0;
+	if (!error) {
+		if (dentry->d_inode->i_nlink)
+			dentry->d_inode->i_nlink --;
+		nfs_invalidate_dircache(dir);
+		nfs_renew_times(dentry);
+	}
+out:
+	return error;
 }
 
 
@@ -551,24 +714,22 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
  *
  *  Concerning my choice of the temp name: it is just nice to have
  *  i_ino part of the temp name, as this offers another check whether
- *  somebody attempts to remove the "silly renamed" dentry
- *  itself. Which is something that I consider evil. Your opinion may
- *  vary.
+ *  somebody attempts to remove the "silly renamed" dentry itself.
+ *  Which is something that I consider evil. Your opinion may vary.
  *  BUT:
  *  Now that I compute the hash value right, it should be possible to simply
  *  check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
  *  doing the string compare.
  *  WHICH MEANS:
  *  This offers the opportunity to shorten the temp name. Currently, I use
- *  the hex representation of i_ino + the hex value of jiffies. This
- *  sums up to as much as 36 characters for a 64 bit machine, and needs
- *  20 chars on a 32 bit machine. Have a look at jiffiesize etc.
+ *  the hex representation of i_ino + an event counter. This sums up to
+ *  as much as 36 characters for a 64 bit machine, and needs 20 chars on 
+ *  a 32 bit machine.
  *  QUINTESSENCE
  *  The use of i_ino is simply cosmetic. All we need is a unique temp
- *  file name for the .nfs files. The hex representation of "jiffies"
- *  seemed to be adequate. And as we retry in case such a file already
- *  exists we are guaranteed to succed (after some jiffies have passed
- *  by :)
+ *  file name for the .nfs files. The event counter seemed to be adequate.
+ *  And as we retry in case such a file already exists, we are guaranteed
+ *  to succeed.
  */
 
 static
@@ -576,14 +737,11 @@ struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
 {
 	struct qstr    sqstr;
 	struct dentry *sdentry;
-	int i, error;
+	int error;
 
 	sqstr.name = silly;
 	sqstr.len  = slen;
-	sqstr.hash = init_name_hash();
-	for (i= 0; i < slen; i++)
-		sqstr.hash = partial_name_hash(silly[i], sqstr.hash);
-	sqstr.hash = end_name_hash(sqstr.hash);
+	sqstr.hash = full_name_hash(silly, slen);
 	sdentry = d_lookup(parent, &sqstr);
 	if (!sdentry) {
 		sdentry = d_alloc(parent, &sqstr);
@@ -605,19 +763,29 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	const int      countersize = sizeof(sillycounter)*2;
 	const int      slen       = strlen(".nfs") + i_inosize + countersize;
 	char           silly[slen+1];
-	int            error;
 	struct dentry *sdentry;
+	int            error = -EIO;
 
+	/*
+	 * Note that a silly-renamed file can be deleted once it's
+	 * no longer in use -- it's just an ordinary file now.
+	 */
 	if (dentry->d_count == 1) {
-		return -EIO;  /* No need to silly rename. */
+		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+		goto out;  /* No need to silly rename. */
 	}
 
-	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-		return -EBUSY; /* don't allow to unlink silly inode -- nope,
-				* think a bit: silly DENTRY, NOT inode --
-				* itself
-				*/
-	}
+#ifdef NFS_PARANOIA
+if (!dentry->d_inode)
+printk("NFS: silly-renaming %s/%s, negative dentry??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+	/*
+	 * We don't allow a dentry to be silly-renamed twice.
+	 */
+	error = -EBUSY;
+	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+		goto out;
 
 	sprintf(silly, ".nfs%*.*lx",
 		i_inosize, i_inosize, dentry->d_inode->i_ino);
@@ -634,54 +802,109 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 			 dentry->d_name.name, silly);
 		
 		sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
-		if (IS_ERR(sdentry)) {
-			return -EIO; /* FIXME ? */
-		}		
+		/*
+		 * N.B. Better to return EBUSY here ... it could be
+		 * dangerous to delete the file while it's in use.
+		 */
+		if (IS_ERR(sdentry))
+			goto out;
 	} while(sdentry->d_inode != NULL); /* need negative lookup */
 
 	error = nfs_proc_rename(NFS_SERVER(dir),
 				NFS_FH(dir), dentry->d_name.name,
 				NFS_FH(dir), silly);
-	if (error) {
-		dput(sdentry);
-		return error;
+	if (!error) {
+		nfs_invalidate_dircache(dir);
+		nfs_renew_times(dentry);
+		d_move(dentry, sdentry);
+		dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+ 		/* If we return 0 we don't unlink */
 	}
-	nfs_invalidate_dircache(dir);
-	d_move(dentry, sdentry);
 	dput(sdentry);
-	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
-
-	return 0; /* don't unlink */
+out:
+	return error;
 }
 
-static void nfs_silly_delete(struct dentry *dentry)
+/*
+ * Remove a file after making sure there are no pending writes,
+ * and after checking that the file has only one user. 
+ *
+ * Updating inode->i_nlink here rather than waiting for the next
+ * nfs_refresh_inode() is not merely cosmetic; once an object has
+ * been deleted, we want to get rid of the inode locally.  The NFS
+ * server may reuse the fileid for a new inode, and we don't want
+ * that to be confused with this inode.
+ */
+static int nfs_safe_remove(struct dentry *dentry)
 {
-	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-		struct inode *dir = dentry->d_parent->d_inode;
-		int error;
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct inode *inode = dentry->d_inode;
+	int error, rehash = 0;
 		
-		dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+	error = -EBUSY;
+	if (inode) {
+		if (NFS_WRITEBACK(inode)) {
+			nfs_flush_dirty_pages(inode, 0, 0, 0);
+			if (NFS_WRITEBACK(inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s writes pending, d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+#endif
+				goto out;
+			}
+		}
+	} else {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s already negative??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+	}
 
-		/* Unhash it first */
+	if (dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s busy, d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+#endif
+		goto out;
+	}
+	/*
+	 * Unhash the dentry while we remove the file ...
+	 */
+	if (!list_empty(&dentry->d_hash)) {
 		d_drop(dentry);
-		dfprintk(VFS, "trying to unlink %s\n", dentry->d_name.name);
-		error = nfs_proc_remove(NFS_SERVER(dir),
+		rehash = 1;
+	}
+	error = nfs_proc_remove(NFS_SERVER(dir),
 					NFS_FH(dir), dentry->d_name.name);
-		if (error < 0)
-			printk("NFS " __FUNCTION__ " failed (err = %d)\n",
-			       -error);
-		dentry->d_inode->i_nlink --;
+	/*
+	 * ... then restore the hashed state.  This ensures that the
+	 * dentry can't become busy after having its file deleted.
+	 */
+	if (rehash) {
+		d_add(dentry, inode);
+	}
+#ifdef NFS_PARANOIA
+if (dentry->d_count > 1)
+printk("nfs_safe_remove: %s/%s busy after delete?? d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+if (inode && inode->i_count > 1)
+printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, inode->i_count);
+#endif
+	if (!error) {
 		nfs_invalidate_dircache(dir);
+		if (inode && inode->i_nlink)
+			inode->i_nlink --;
+		d_delete(dentry);
 	}
+out:
+	return error;
 }
 
 /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
  *  belongs to an active ".nfs..." file and we return -EBUSY.
  *
  *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
- * 
- *  inode->i_nlink is updated here rather than waiting for the next
- *  nfs_refresh_inode() for cosmetic reasons only.
  */
 static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 {
@@ -695,70 +918,77 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
+		goto out;
 
 	error = nfs_sillyrename(dir, dentry);
-
-	if (error == -EBUSY) {
-		return -EBUSY;
-	} else if (error < 0) {
-		error = nfs_proc_remove(NFS_SERVER(dir),
-					NFS_FH(dir), dentry->d_name.name);
-		if (error < 0)
-			return error;
-		
-		dentry->d_inode->i_nlink --;
-		nfs_invalidate_dircache(dir);
-		d_delete(dentry);
+	if (error && error != -EBUSY) {
+		error = nfs_safe_remove(dentry);
+		if (!error) {
+			nfs_renew_times(dentry);
+		}
 	}
-
-	return 0;
+out:
+	return error;
 }
 
-static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+static int
+nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
 	struct nfs_sattr sattr;
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
-				dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+			dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
 
 	if (!dir || !S_ISDIR(dir->i_mode)) {
 		printk("nfs_symlink: inode is NULL or not a directory\n");
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
+		goto out;
 
 	if (strlen(symname) > NFS_MAXPATHLEN)
-		return -ENAMETOOLONG;
+		goto out;
 
-	sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */
+#ifdef NFS_PARANOIA
+if (dentry->d_inode)
+printk("nfs_proc_symlink: %s/%s not negative!\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+	/*
+	 * Fill in the sattr for the call.
+ 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
+	 */
+	sattr.mode = S_IFLNK | S_IRWXUGO;
 	sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
 	sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
 
-	error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
-		dentry->d_name.name, symname, &sattr);
-
-	if (error)
-		return error;
-
-	nfs_invalidate_dircache(dir);
-	/*  this looks _funny_ doesn't it? But: nfs_proc_symlink()
-	 *  only fills in sattr, not fattr. Thus nfs_fhget() cannot be
-	 *  called, it would be pointless, without a valid fattr
-	 *  argument. Other possibility: call nfs_proc_lookup()
-	 *  HERE. But why? If somebody wants to reference this
-	 *  symlink, the cached_lookup() will fail, and
-	 *  nfs_proc_symlink() will be called anyway.
+	/*
+	 * Drop the dentry in advance to force a new lookup.
+	 * Since nfs_proc_symlink doesn't return a fattr, we
+	 * can't instantiate the new inode.
 	 */
 	d_drop(dentry);
-	return 0;
+	error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
+				dentry->d_name.name, symname, &sattr);
+	if (!error) {
+		nfs_invalidate_dircache(dir);
+		nfs_renew_times(dentry->d_parent);
+	} else if (error == -EEXIST) {
+		printk("nfs_proc_symlink: %s/%s already exists??\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name);
+	}
+
+out:
+	return error;
 }
 
-static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry)
+static int 
+nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry)
 {
 	int error;
 
@@ -771,20 +1001,20 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr
 		return -ENOENT;
 	}
 
+	error = -ENAMETOOLONG;
 	if (dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
-
-	error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode),
-		NFS_FH(dir), dentry->d_name.name);
-
-	if (error)
-		return error;
+		goto out;
 
-	nfs_invalidate_dircache(dir);
-	inode->i_count ++;
-	inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */
-	d_instantiate(dentry, inode);
-	return 0;
+	error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode), NFS_FH(dir),
+				dentry->d_name.name);
+	if (!error) {
+		nfs_invalidate_dircache(dir);
+		inode->i_count ++;
+		inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */
+		d_instantiate(dentry, inode);
+	}
+out:
+	return error;
 }
 
 /*
@@ -804,16 +1034,25 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr
  * implementation that only depends on the dcache stuff instead of
  * using the inode layer
  *
+ * Unfortunately, things are a little more complicated than indicated
+ * above. For a cross-directory move, we want to make sure we can get
+ * rid of the old inode after the operation.  This means there must be
+ * no pending writes (if it's a file), and the use count must be 1.
+ * If these conditions are met, we can drop the dentries before doing
+ * the rename.
  */
 static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		      struct inode *new_dir, struct dentry *new_dentry)
 {
-	int error;
-
-	dfprintk(VFS, "NFS: rename(%x/%ld, %s -> %x/%ld, %s)\n",
-				old_dir->i_dev, old_dir->i_ino, old_dentry->d_name.name,
-				new_dir->i_dev, new_dir->i_ino, new_dentry->d_name.name);
-
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	int error, rehash = 0, update = 1;
+
+#ifdef NFS_DEBUG_VERBOSE
+printk("nfs_rename: old %s/%s, count=%d, new %s/%s, count=%d\n",
+old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count,
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
 	if (!old_dir || !S_ISDIR(old_dir->i_mode)) {
 		printk("nfs_rename: old inode is NULL or not a directory\n");
 		return -ENOENT;
@@ -824,98 +1063,109 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return -ENOENT;
 	}
 
-	if (old_dentry->d_name.len > NFS_MAXNAMLEN || new_dentry->d_name.len > NFS_MAXNAMLEN)
-		return -ENAMETOOLONG;
+	error = -ENAMETOOLONG;
+	if (old_dentry->d_name.len > NFS_MAXNAMLEN ||
+	    new_dentry->d_name.len > NFS_MAXNAMLEN)
+		goto out;
 
-	if (new_dir != old_dir) {
-		error = nfs_sillyrename(old_dir, old_dentry);
+	/*
+	 * First check whether the target is busy ... we can't
+	 * safely do _any_ rename if the target is in use.
+	 */
+	if (new_dentry->d_count > 1) {
+		if (new_inode && S_ISDIR(new_inode->i_mode))
+			shrink_dcache_parent(new_dentry);
+	}
+	error = -EBUSY;
+	if (new_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: target %s/%s busy, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+		goto out;
+	}
 
-		if (error == -EBUSY) {
-			return -EBUSY;
-		} else if (error == 0) { /* did silly rename stuff */
-			error = nfs_link(old_dentry->d_inode,
-					 new_dir, new_dentry);
-			
-			return error;
+	/*
+	 * Check for within-directory rename ... no complications.
+	 */
+	if (new_dir == old_dir)
+		goto do_rename;
+	/*
+	 * Cross-directory move ... check whether it's a file.
+	 */
+	if (S_ISREG(old_inode->i_mode)) {
+		if (NFS_WRITEBACK(old_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: %s/%s has pending writes\n",
+old_dentry->d_parent->d_name.name, old_dentry->d_name.name);
+#endif
+			nfs_flush_dirty_pages(old_inode, 0, 0, 0);
+			if (NFS_WRITEBACK(old_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: %s/%s has pending writes after flush\n",
+old_dentry->d_parent->d_name.name, old_dentry->d_name.name);
+#endif
+				goto out;
+			}
 		}
-		/* no need for silly rename, proceed as usual */
 	}
-	error = nfs_proc_rename(NFS_SERVER(old_dir),
-				NFS_FH(old_dir), old_dentry->d_name.name,
-				NFS_FH(new_dir), new_dentry->d_name.name);
-	if (error)
-		return error;
-
-	nfs_invalidate_dircache(old_dir);
-	nfs_invalidate_dircache(new_dir);
+	/*
+	 * Moving a directory ... prune child dentries if needed.
+	 */
+	else if (old_dentry->d_count > 1)
+		shrink_dcache_parent(old_dentry);
 
-	/* Update the dcache */
-	d_move(old_dentry, new_dentry);
-	return 0;
-}
+	/*
+	 * Now check the use counts ... we can't safely do the
+	 * rename unless we can drop the dentries first.
+	 */
+	if (old_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: old dentry %s/%s busy, d_count=%d\n",
+old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count);
+#endif
+		goto out;
+	}
+	if (new_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+		goto out;
+	}
 
-/*
- * Many nfs protocol calls return the new file attributes after
- * an operation.  Here we update the inode to reflect the state
- * of the server's inode.
- */
+	d_drop(old_dentry);
+	update = 0;
 
-void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
-{
-	int was_empty;
-
-	dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n",
-		 inode->i_dev, inode->i_ino, inode->i_count);
-
-	if (!inode || !fattr) {
-		printk("nfs_refresh_inode: inode or fattr is NULL\n");
-		return;
-	}
-	if (inode->i_ino != fattr->fileid) {
-		printk("nfs_refresh_inode: inode number mismatch\n");
-		return;
-	}
-	was_empty = (inode->i_mode == 0);
-	inode->i_mode = fattr->mode;
-	inode->i_nlink = fattr->nlink;
-	inode->i_uid = fattr->uid;
-	inode->i_gid = fattr->gid;
-
-	/* Size changed from outside: invalidate caches on next read */
-	if (inode->i_size != fattr->size) {
-		dfprintk(PAGECACHE, "NFS:      cacheinv(%x/%ld)\n",
-					inode->i_dev, inode->i_ino);
-		NFS_CACHEINV(inode);
-	}
-	if (NFS_OLDMTIME(inode) != fattr->mtime.seconds) {
-		dfprintk(PAGECACHE, "NFS:      mtime change on %x/%ld\n",
-					inode->i_dev, inode->i_ino);
-		NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
-	}
-	inode->i_size = fattr->size;
-	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
-		inode->i_rdev = to_kdev_t(fattr->rdev);
-	else
-		inode->i_rdev = 0;
-	inode->i_blocks = fattr->blocks;
-	inode->i_atime = fattr->atime.seconds;
-	inode->i_mtime = fattr->mtime.seconds;
-	inode->i_ctime = fattr->ctime.seconds;
-	if (S_ISREG(inode->i_mode))
-		inode->i_op = &nfs_file_inode_operations;
-	else if (S_ISDIR(inode->i_mode))
-		inode->i_op = &nfs_dir_inode_operations;
-	else if (S_ISLNK(inode->i_mode))
-		inode->i_op = &nfs_symlink_inode_operations;
-	else if (S_ISCHR(inode->i_mode))
-		inode->i_op = &chrdev_inode_operations;
-	else if (S_ISBLK(inode->i_mode))
-		inode->i_op = &blkdev_inode_operations;
-	else if (S_ISFIFO(inode->i_mode)) {
-		if (was_empty)
-			init_fifo(inode);
-	} else
-		inode->i_op = NULL;
+do_rename:
+	/*
+	 * We must prevent any new references to the target while
+	 * the rename is in progress, so we unhash the dentry.
+	 */
+	if (!list_empty(&new_dentry->d_hash)) {
+		d_drop(new_dentry);
+		rehash = 1;
+	}
+	error = nfs_proc_rename(NFS_SERVER(old_dir),
+				NFS_FH(old_dir), old_dentry->d_name.name,
+				NFS_FH(new_dir), new_dentry->d_name.name);
+	if (rehash) {
+		d_add(new_dentry, new_inode);
+	}
+#ifdef NFS_PARANOIA
+if (new_dentry->d_count > 1)
+printk("nfs_rename: %s/%s busy after rename, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+	if (!error) {
+		nfs_invalidate_dircache(new_dir);
+		nfs_invalidate_dircache(old_dir);
+		/* Update the dcache if needed */
+		if (update)
+			d_move(old_dentry, new_dentry);
+	}
+out:
+	return error;
 }
 
 /*
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4587950ef..0f3bd5ed3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,9 +33,8 @@
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static long nfs_file_read(struct inode *, struct file *, char *, unsigned long);
-static long nfs_file_write(struct inode *, struct file *,
-					const char *, unsigned long);
+static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *);
+static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *);
 static int  nfs_file_close(struct inode *, struct file *);
 static int  nfs_fsync(struct file *, struct dentry *dentry);
 
@@ -84,67 +83,91 @@ struct inode_operations nfs_file_inode_operations = {
 # define IS_SWAPFILE(inode)	(0)
 #endif
 
-
+/*
+ * Flush all dirty pages, and check for write errors.
+ *
+ * Note that since the file close operation is called only by the
+ * _last_ process to close the file, we need to flush _all_ dirty 
+ * pages. This also means that there is little sense in checking
+ * for errors for this specific process -- we should probably just
+ * clear all errors.
+ */
 static int
 nfs_file_close(struct inode *inode, struct file *file)
 {
-	int	status;
+	int	status, error;
 
 	dfprintk(VFS, "nfs: close(%x/%ld)\n", inode->i_dev, inode->i_ino);
 
-	if ((status = nfs_flush_dirty_pages(inode, 0, 0)) < 0)
-		return status;
-	return nfs_write_error(inode);
+	status = nfs_flush_dirty_pages(inode, 0, 0, 0);
+	error = nfs_write_error(inode);
+	if (!status)
+		status = error;
+	return status;
 }
 
-static long
-nfs_file_read(struct inode * inode, struct file * file,
-				char * buf, unsigned long count)
+static ssize_t
+nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos)
 {
-	int	status;
+	struct inode * inode = file->f_dentry->d_inode;
+	ssize_t result;
 
 	dfprintk(VFS, "nfs: read(%x/%ld, %lu@%lu)\n",
 			inode->i_dev, inode->i_ino, count,
-			(unsigned long) file->f_pos);
+			(unsigned long) *ppos);
 
-	if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
-		return status;
-	return generic_file_read(inode, file, buf, count);
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!result)
+		result = generic_file_read(file, buf, count, ppos);
+	return result;
 }
 
 static int
 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
-	int	status;
 	struct inode *inode = file->f_dentry->d_inode;
+	int	status;
 
 	dfprintk(VFS, "nfs: mmap(%x/%ld)\n", inode->i_dev, inode->i_ino);
 
-	if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
-		return status;
-	return generic_file_mmap(file, vma);
+	status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (!status)
+		status = generic_file_mmap(file, vma);
+	return status;
 }
 
-static int nfs_fsync(struct file *file, struct dentry *dentry)
+/*
+ * Flush any dirty pages for this process, and check for write errors.
+ * The return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+static int
+nfs_fsync(struct file *file, struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
+	int status, error;
+
 	dfprintk(VFS, "nfs: fsync(%x/%ld)\n", inode->i_dev, inode->i_ino);
 
-	return nfs_flush_dirty_pages(inode, 0, 0);
+	status = nfs_flush_dirty_pages(inode, current->pid, 0, 0);
+	error = nfs_write_error(inode);
+	if (!status)
+		status = error;
+	return status;
 }
 
 /* 
  * Write to a file (through the page cache).
  */
-static long
-nfs_file_write(struct inode *inode, struct file *file,
-			const char *buf, unsigned long count)
+static ssize_t
+nfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
 {
-	int	result;
+	struct inode * inode = file->f_dentry->d_inode;
+	ssize_t result;
 
 	dfprintk(VFS, "nfs: write(%x/%ld (%d), %lu@%lu)\n",
 			inode->i_dev, inode->i_ino, inode->i_count,
-			count, (unsigned long) file->f_pos);
+			count, (unsigned long) *ppos);
 
 	if (!inode) {
 		printk("nfs_file_write: inode = NULL\n");
@@ -154,21 +177,26 @@ nfs_file_write(struct inode *inode, struct file *file,
 		printk("NFS: attempt to write to active swap file!\n");
 		return -EBUSY;
 	}
-	if ((result = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
-		return result;
+	result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (result)
+		goto out;
+
+	/* N.B. This should be impossible now -- inodes can't change mode */
 	if (!S_ISREG(inode->i_mode)) {
 		printk("nfs_file_write: write to non-file, mode %07o\n",
 			inode->i_mode);
 		return -EINVAL;
 	}
-	if (count <= 0)
-		return 0;
-
-	/* Return error from previous async call */
-	if ((result = nfs_write_error(inode)) < 0)
-		return result;
-
-	return generic_file_write(inode, file, buf, count);
+	result = count;
+	if (!count)
+		goto out;
+
+	/* Check for an error from a previous async call */
+	result = nfs_write_error(inode);
+	if (!result)
+		result = generic_file_write(file, buf, count, ppos);
+out:
+	return result;
 }
 
 /*
@@ -177,15 +205,15 @@ nfs_file_write(struct inode *inode, struct file *file,
 int
 nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
+	struct inode * inode = filp->f_dentry->d_inode;
 	int	status;
-	struct inode * inode;
 
 	dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n",
-			filp->f_dentry->d_inode->i_dev, filp->f_dentry->d_inode->i_ino,
+			inode->i_dev, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			fl->fl_start, fl->fl_end);
 
-	if (!(inode = filp->f_dentry->d_inode))
+	if (!inode)
 		return -EINVAL;
 
 	/* No mandatory locks over NFS */
@@ -209,8 +237,8 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	/* If unlocking a file region, flush dirty pages (unless we've
 	 * been killed by a signal, that is). */
 	if (cmd == F_SETLK && fl->fl_type == F_UNLCK
-	 && !(current->signal & ~current->blocked)) {
-		status = nfs_flush_dirty_pages(inode,
+	    && !signal_pending(current)) {
+		status = nfs_flush_dirty_pages(inode, current->pid,
 			fl->fl_start, fl->fl_end == NLM_OFFSET_MAX? 0 :
 			fl->fl_end - fl->fl_start + 1);
 		if (status < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e91b34a34..c070d130b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -33,13 +33,17 @@
 #include <asm/uaccess.h>
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
+#define NFS_PARANOIA 1
 
-static int nfs_notify_change(struct inode *, struct iattr *);
+extern void nfs_invalidate_dircache_sb(struct super_block *);
+extern int  check_failed_request(struct inode *);
+
+static void nfs_read_inode(struct inode *);
 static void nfs_put_inode(struct inode *);
 static void nfs_delete_inode(struct inode *);
+static int  nfs_notify_change(struct inode *, struct iattr *);
 static void nfs_put_super(struct super_block *);
-static void nfs_read_inode(struct inode *);
-static int nfs_statfs(struct super_block *, struct statfs *, int bufsiz);
+static int  nfs_statfs(struct super_block *, struct statfs *, int);
 
 static struct super_operations nfs_sops = { 
 	nfs_read_inode,		/* read inode */
@@ -67,20 +71,51 @@ nfs_read_inode(struct inode * inode)
 {
 	inode->i_blksize = inode->i_sb->s_blocksize;
 	inode->i_mode = 0;
+	inode->i_rdev = 0;
 	inode->i_op = NULL;
 	NFS_CACHEINV(inode);
+	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 }
 
 static void
 nfs_put_inode(struct inode * inode)
 {
 	dprintk("NFS: put_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
+	/*
+	 * We want to get rid of unused inodes ...
+	 */
+	if (inode->i_count == 1)
+		inode->i_nlink = 0;
 }
 
 static void
 nfs_delete_inode(struct inode * inode)
 {
+	int failed;
+
 	dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
+	/*
+	 * Flush out any pending write requests ...
+	 */
+	if (NFS_WRITEBACK(inode) != NULL) {
+		unsigned long timeout = jiffies + 5*HZ;
+		printk("NFS: inode %ld, invalidating pending RPC requests\n",
+			inode->i_ino);
+		nfs_invalidate_pages(inode);
+		while (NFS_WRITEBACK(inode) != NULL && jiffies < timeout) {
+			current->state = TASK_INTERRUPTIBLE;
+			current->timeout = jiffies + HZ/10;
+			schedule();
+		}
+		current->state = TASK_RUNNING;
+		if (NFS_WRITEBACK(inode) != NULL)
+			printk("NFS: Arghhh, stuck RPC requests!\n");
+	}
+
+	failed = check_failed_request(inode);
+	if (failed)
+		printk("NFS: inode %ld had %d failed requests\n",
+			inode->i_ino, failed);
 	clear_inode(inode);
 }
 
@@ -90,13 +125,21 @@ nfs_put_super(struct super_block *sb)
 	struct nfs_server *server = &sb->u.nfs_sb.s_server;
 	struct rpc_clnt	*rpc;
 
+	/*
+	 * Lock the super block while we bring down the daemons.
+	 */
+	lock_super(sb);
 	if ((rpc = server->client) != NULL)
 		rpc_shutdown_client(rpc);
 
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 	rpciod_down();		/* release rpciod */
-	lock_super(sb);
+	/*
+	 * Invalidate the dircache for this superblock.
+	 */
+	nfs_invalidate_dircache_sb(sb);
+
 	sb->s_dev = 0;
 	unlock_super(sb);
 	MOD_DEC_USE_COUNT;
@@ -147,14 +190,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	unsigned int		authflavor;
 	int			tcp;
 	kdev_t			dev = sb->s_dev;
+	struct inode		*root_inode;
 
 	MOD_INC_USE_COUNT;
-	if (!data) {
-		printk("nfs_read_super: missing data argument\n");
-		sb->s_dev = 0;
-		MOD_DEC_USE_COUNT;
-		return NULL;
-	}
+	if (!data)
+		goto out_miss_args;
+
 	if (data->version != NFS_MOUNT_VERSION) {
 		printk("nfs warning: mount version %s than kernel\n",
 			data->version < NFS_MOUNT_VERSION ? "older" : "newer");
@@ -164,13 +205,19 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 			data->bsize  = 0;
 	}
 
+	/* We now require that the mount process passes the remote address */
+	memcpy(&srvaddr, &data->addr, sizeof(srvaddr));
+	if (srvaddr.sin_addr.s_addr == INADDR_ANY)
+		goto out_no_remote;
+
 	lock_super(sb);
 
-	server           = &sb->u.nfs_sb.s_server;
 	sb->s_magic      = NFS_SUPER_MAGIC;
 	sb->s_dev        = dev;
 	sb->s_op         = &nfs_sops;
 	sb->s_blocksize  = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+	sb->u.nfs_sb.s_root = data->root;
+	server           = &sb->u.nfs_sb.s_server;
 	server->rsize    = nfs_block_size(data->rsize, NULL);
 	server->wsize    = nfs_block_size(data->wsize, NULL);
 	server->flags    = data->flags;
@@ -179,15 +226,6 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 	strcpy(server->hostname, data->hostname);
-	sb->u.nfs_sb.s_root = data->root;
-
-	/* We now require that the mount process passes the remote address */
-	memcpy(&srvaddr, &data->addr, sizeof(srvaddr));
-	if (srvaddr.sin_addr.s_addr == INADDR_ANY) {
-		printk("NFS: mount program didn't pass remote address!\n");
-		MOD_DEC_USE_COUNT;
-		return NULL;
-	}
 
 	/* Which protocol do we use? */
 	tcp   = (data->flags & NFS_MOUNT_TCP);
@@ -210,18 +248,13 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	/* Now create transport and client */
 	xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
 						&srvaddr, &timeparms);
-	if (xprt == NULL) {
-		printk("NFS: cannot create RPC transport.\n");
-		goto failure;
-	}
+	if (xprt == NULL)
+		goto out_no_xprt;
 
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 						NFS_VERSION, authflavor);
-	if (clnt == NULL) {
-		printk("NFS: cannot create RPC client.\n");
-		xprt_destroy(xprt);
-		goto failure;
-	}
+	if (clnt == NULL)
+		goto out_no_client;
 
 	clnt->cl_intr     = (data->flags & NFS_MOUNT_INTR)? 1 : 0;
 	clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0;
@@ -229,29 +262,67 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	server->client    = clnt;
 
 	/* Fire up rpciod if not yet running */
+#ifdef RPCIOD_RESULT
+	if (rpciod_up())
+		goto out_no_iod;
+#else
 	rpciod_up();
+#endif
 
-	/* Unlock super block and try to get root fh attributes */
+	/*
+	 * Keep the super block locked while we try to get 
+	 * the root fh attributes.
+	 */
+	root_inode = nfs_fhget(sb, &data->root, NULL);
+	if (!root_inode)
+		goto out_no_root;
+	sb->s_root = d_alloc_root(root_inode, NULL);
+	if (!sb->s_root)
+		goto out_no_root;
+	/* We're airborne */
 	unlock_super(sb);
 
-	sb->s_root = d_alloc_root(nfs_fhget(sb, &data->root, NULL), NULL);
-	if (sb->s_root != NULL) {
-		/* We're airborne */
-		if (!(server->flags & NFS_MOUNT_NONLM))
-			lockd_up();
-		return sb;
-	}
+	/* Check whether to start the lockd process */
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_up();
+	return sb;
 
 	/* Yargs. It didn't work out. */
+out_no_root:
 	printk("nfs_read_super: get root inode failed\n");
-	rpc_shutdown_client(server->client);
+	iput(root_inode);
 	rpciod_down();
+#ifdef RPCIOD_RESULT
+	goto out_shutdown;
 
-failure:
-	MOD_DEC_USE_COUNT;
-	if (sb->s_lock)
-		unlock_super(sb);
+out_no_iod:
+	printk("nfs_read_super: couldn't start rpciod!\n");
+out_shutdown:
+#endif
+	rpc_shutdown_client(server->client);
+	goto out_unlock;
+
+out_no_client:
+	printk("NFS: cannot create RPC client.\n");
+	xprt_destroy(xprt);
+	goto out_unlock;
+
+out_no_xprt:
+	printk("NFS: cannot create RPC transport.\n");
+out_unlock:
+	unlock_super(sb);
+	goto out_fail;
+
+out_no_remote:
+	printk("NFS: mount program didn't pass remote address!\n");
+	goto out_fail;
+
+out_miss_args:
+	printk("nfs_read_super: missing data argument\n");
+
+out_fail:
 	sb->s_dev = 0;
+	MOD_DEC_USE_COUNT;
 	return NULL;
 }
 
@@ -312,14 +383,48 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle,
 		printk("nfs_fhget: iget failed\n");
 		return NULL;
 	}
-	if (inode->i_dev == sb->s_dev) {
-		if (inode->i_ino != fattr->fileid) {
-			printk("nfs_fhget: unexpected inode from iget\n");
-			return inode;
-		}
-		*NFS_FH(inode) = *fhandle;
-		nfs_refresh_inode(inode, fattr);
+#ifdef NFS_PARANOIA
+if (inode->i_dev != sb->s_dev)
+printk("nfs_fhget: impossible\n");
+#endif
+
+	if (inode->i_ino != fattr->fileid) {
+		printk("nfs_fhget: unexpected inode from iget\n");
+		return inode;
 	}
+
+	/*
+	 * Check whether the mode has been set, as we only want to
+	 * do this once. (We don't allow inodes to change types.)
+	 */
+	if (inode->i_mode == 0) {
+		inode->i_mode = fattr->mode;
+		if (S_ISREG(inode->i_mode))
+			inode->i_op = &nfs_file_inode_operations;
+		else if (S_ISDIR(inode->i_mode))
+			inode->i_op = &nfs_dir_inode_operations;
+		else if (S_ISLNK(inode->i_mode))
+			inode->i_op = &nfs_symlink_inode_operations;
+		else if (S_ISCHR(inode->i_mode)) {
+			inode->i_op = &chrdev_inode_operations;
+			inode->i_rdev = to_kdev_t(fattr->rdev);
+		} else if (S_ISBLK(inode->i_mode)) {
+			inode->i_op = &blkdev_inode_operations;
+			inode->i_rdev = to_kdev_t(fattr->rdev);
+		} else if (S_ISFIFO(inode->i_mode))
+			init_fifo(inode);
+		else
+			inode->i_op = NULL;
+		/*
+		 * Preset the size and mtime, as there's no need
+		 * to invalidate the caches.
+		 */ 
+		inode->i_size  = fattr->size;
+		inode->i_mtime = fattr->mtime.seconds;
+		NFS_OLDMTIME(inode) = fattr->mtime.seconds;
+	}
+	*NFS_FH(inode) = *fhandle;
+	nfs_refresh_inode(inode, fattr);
 	dprintk("NFS: fhget(%x/%ld ct=%d)\n",
 		inode->i_dev, inode->i_ino,
 		inode->i_count);
@@ -334,6 +439,17 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
 	struct nfs_fattr fattr;
 	int error;
 
+	/*
+	 * Make sure the inode is up-to-date.
+	 */
+	error = nfs_revalidate(inode);
+	if (error) {
+#ifdef NFS_PARANOIA
+printk("nfs_notify_change: revalidate failed, error=%d\n", error);
+#endif
+		goto out;
+	}
+
 	sattr.mode = (u32) -1;
 	if (attr->ia_valid & ATTR_MODE) 
 		sattr.mode = attr->ia_mode;
@@ -346,7 +462,6 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
 	if (attr->ia_valid & ATTR_GID)
 		sattr.gid = attr->ia_gid;
 
-
 	sattr.size = (u32) -1;
 	if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode))
 		sattr.size = attr->ia_size;
@@ -364,11 +479,25 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
 	}
 
 	error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode),
-		&sattr, &fattr);
-	if (!error) {
+				&sattr, &fattr);
+	if (error)
+		goto out;
+	/*
+	 * If we changed the size or mtime, update the inode
+	 * now to avoid invalidating the page cache.
+	 */
+	if (sattr.size != (u32) -1) {
+		if (sattr.size != fattr.size)
+			printk("nfs_notify_change: sattr=%d, fattr=%d??\n",
+				sattr.size, fattr.size);
 		nfs_truncate_dirty_pages(inode, sattr.size);
-		nfs_refresh_inode(inode, &fattr);
+		inode->i_size  = sattr.size;
+		inode->i_mtime = fattr.mtime.seconds;
 	}
+	if (sattr.mtime.seconds != (u32) -1)
+		inode->i_mtime = fattr.mtime.seconds;
+	error = nfs_refresh_inode(inode, &fattr);
+out:
 	return error;
 }
 
@@ -384,57 +513,154 @@ nfs_revalidate(struct inode *inode)
 /*
  * This function is called whenever some part of NFS notices that
  * the cached attributes have to be refreshed.
- *
- * This is a bit tricky because we have to make sure all dirty pages
- * have been sent off to the server before calling invalidate_inode_pages.
- * To make sure no other process adds more write requests while we try
- * our best to flush them, we make them sleep during the attribute refresh.
- *
- * A very similar scenario holds for the dir cache.
  */
 int
 _nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
 	struct nfs_fattr fattr;
-	int		 status;
+	int		 status = 0;
 
 	if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode))
-		return 0;
+		goto out;
 
 	dfprintk(PAGECACHE, "NFS: revalidating %x/%ld inode\n",
 			inode->i_dev, inode->i_ino);
-	NFS_READTIME(inode) = jiffies;
-	if ((status = nfs_proc_getattr(server, NFS_FH(inode), &fattr)) < 0)
+	status = nfs_proc_getattr(server, NFS_FH(inode), &fattr);
+	if (status) {
+#ifdef NFS_PARANOIA
+printk("nfs_revalidate_inode: getattr failed, error=%d\n", status);
+#endif
 		goto done;
+	}
 
-	nfs_refresh_inode(inode, &fattr);
-	if (fattr.mtime.seconds != NFS_OLDMTIME(inode)) {
-		if (!S_ISDIR(inode->i_mode)) {
-			/* This sends off all dirty pages off to the server.
-			 * Note that this function must not sleep. */
-			nfs_invalidate_pages(inode);
-			invalidate_inode_pages(inode);
-		} else {
-			nfs_invalidate_dircache(inode);
-		}
-
-		NFS_OLDMTIME(inode)  = fattr.mtime.seconds;
-		NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
-	} else {
+	status = nfs_refresh_inode(inode, &fattr);
+	if (status)
+		goto done;
+	if (fattr.mtime.seconds == NFS_OLDMTIME(inode)) {
 		/* Update attrtimeo value */
 		if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode))
 			NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode);
 	}
-	status = 0;
+	NFS_OLDMTIME(inode) = fattr.mtime.seconds;
 
 done:
 	dfprintk(PAGECACHE,
 		"NFS: inode %x/%ld revalidation complete (status %d).\n",
 				inode->i_dev, inode->i_ino, status);
+out:
 	return status;
 }
 
 /*
+ * Many nfs protocol calls return the new file attributes after
+ * an operation.  Here we update the inode to reflect the state
+ * of the server's inode.
+ *
+ * This is a bit tricky because we have to make sure all dirty pages
+ * have been sent off to the server before calling invalidate_inode_pages.
+ * To make sure no other process adds more write requests while we try
+ * our best to flush them, we make them sleep during the attribute refresh.
+ *
+ * A very similar scenario holds for the dir cache.
+ */
+int
+nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	int invalid = 0;
+	int error = -EIO;
+
+	dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n",
+		 inode->i_dev, inode->i_ino, inode->i_count);
+
+	if (!inode || !fattr) {
+		printk("nfs_refresh_inode: inode or fattr is NULL\n");
+		goto out;
+	}
+	if (inode->i_ino != fattr->fileid) {
+		printk("nfs_refresh_inode: inode number mismatch\n");
+		goto out;
+	}
+
+	/*
+	 * Make sure the inode's type hasn't changed.
+	 */
+	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+		goto out_changed;
+
+	/*
+	 * If the size or mtime changed from outside, we want
+	 * to invalidate the local caches immediately.
+	 */
+	if (inode->i_size != fattr->size) {
+#ifdef NFS_DEBUG_VERBOSE
+printk("NFS: size change on %x/%ld\n", inode->i_dev, inode->i_ino);
+#endif
+		invalid = 1;
+	}
+	if (inode->i_mtime != fattr->mtime.seconds) {
+#ifdef NFS_DEBUG_VERBOSE
+printk("NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino);
+#endif
+		invalid = 1;
+	}
+
+	inode->i_mode = fattr->mode;
+	inode->i_nlink = fattr->nlink;
+	inode->i_uid = fattr->uid;
+	inode->i_gid = fattr->gid;
+
+	inode->i_size = fattr->size;
+	inode->i_blocks = fattr->blocks;
+	inode->i_atime = fattr->atime.seconds;
+	inode->i_mtime = fattr->mtime.seconds;
+	inode->i_ctime = fattr->ctime.seconds;
+	/*
+	 * Update the read time so we don't revalidate too often.
+	 */
+	NFS_READTIME(inode) = jiffies;
+	error = 0;
+	if (invalid)
+		goto out_invalid;
+out:
+	return error;
+
+out_changed:
+	/*
+	 * Big trouble! The inode has become a different object.
+	 */
+#ifdef NFS_PARANOIA
+printk("nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n",
+inode->i_ino, inode->i_mode, fattr->mode);
+#endif
+	fattr->mode = inode->i_mode; /* save mode */
+	make_bad_inode(inode);
+	inode->i_mode = fattr->mode; /* restore mode */
+	/*
+	 * No need to worry about unhashing the dentry, as the
+	 * lookup validation will know that the inode is bad.
+	 * (But we fall through to invalidate the caches.)
+	 */
+
+out_invalid:
+	/*
+	 * Invalidate the local caches
+	 */
+#ifdef NFS_DEBUG_VERBOSE
+printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages);
+#endif
+	if (!S_ISDIR(inode->i_mode)) {
+		/* This sends off all dirty pages off to the server.
+		 * Note that this function must not sleep. */
+		nfs_invalidate_pages(inode);
+		invalidate_inode_pages(inode);
+	} else
+		nfs_invalidate_dircache(inode);
+	NFS_CACHEINV(inode);
+	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+	goto out;
+}
+
+/*
  * File system information
  */
 static struct file_system_type nfs_fs_type = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5eec5eb65..0311b7d0b 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -23,6 +23,7 @@
 #include <linux/nfs_fs.h>
 
 #define NFSDBG_FACILITY		NFSDBG_XDR
+/* #define NFS_PARANOIA 1 */
 
 #define QUADLEN(len)		(((len) + 3) >> 2)
 static int			nfs_stat_to_errno(int stat);
@@ -371,17 +372,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
  * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
  * After decoding, the layout in memory looks like this:
  *	entry1 entry2 ... entryN <space> stringN ... string2 string1
+ * Each entry consists of three __u32 values, the same space as NFS uses.
  * Note that the strings are not null-terminated so that the entire number
  * of entries returned by the server should fit into the buffer.
  */
 static int
 nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 {
-	struct nfs_entry	*entry;
 	struct iovec		*iov = req->rq_rvec;
 	int			status, nr, len;
-	char			*string;
+	char			*string, *start;
 	u32			*end;
+	__u32			fileid, cookie, *entry;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
@@ -396,10 +398,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 	end = (u32 *) ((u8 *) p + iov[1].iov_len);
 
 	/* Get start and end of dirent buffer */
-	entry  = (struct nfs_entry *) res->buffer;
+	entry  = (__u32 *) res->buffer;
+	start  = (char *) res->buffer;
 	string = (char *) res->buffer + res->bufsiz;
-	for (nr = 0; *p++; nr++, entry++) {
-		entry->fileid = ntohl(*p++);
+	for (nr = 0; *p++; nr++) {
+		fileid = ntohl(*p++);
 
 		len = ntohl(*p++);
 		if ((p + QUADLEN(len) + 3) > end) {
@@ -413,27 +416,36 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 			return -errno_NFSERR_IO;
 		}
 		string -= len;
-		if ((void *) (entry+1) > (void *) string) {
-			/* This may actually happen because an nfs_entry
-			 * will take up more space than the XDR data. On
-			 * 32bit machines that's due to 8byte alignment,
-			 * on 64bit machines that's because the char * takes
-			 * up 2 longs.
-			 *
-			 * THIS IS BAD!
+		if ((void *) (entry+3) > (void *) string) {
+			/* 
+			 * This error is impossible as long as the temp
+			 * buffer is no larger than the user buffer. The 
+			 * current packing algorithm uses the same amount
+			 * of space in the user buffer as in the XDR data,
+			 * so it's guaranteed to fit.
 			 */
-			printk(KERN_NOTICE "NFS: should not happen in %s!\n",
+			printk("NFS: incorrect buffer size in %s!\n",
 				__FUNCTION__);
 			break;
 		}
 
-		entry->name = string;
-		entry->length = len;
 		memmove(string, p, len);
 		p += QUADLEN(len);
-		entry->cookie = ntohl(*p++);
-		entry->eof = !p[0] && p[1];
+		cookie = ntohl(*p++);
+		/*
+		 * To make everything fit, we encode the length, offset,
+		 * and eof flag into 32 bits. This works for filenames
+		 * up to 32K and PAGE_SIZE up to 64K.
+		 */
+		status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
+		*entry++ = fileid;
+		*entry++ = cookie;
+		*entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
 	}
+#ifdef NFS_PARANOIA
+printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
+nr, ((char *) entry - start), (start + res->bufsiz - string));
+#endif
 	return nr;
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f48a6217c..94096d928 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -384,17 +384,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
  * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
  * After decoding, the layout in memory looks like this:
  *	entry1 entry2 ... entryN <space> stringN ... string2 string1
+ * Each entry consists of three __u32 values, the same space as NFS uses.
  * Note that the strings are not null-terminated so that the entire number
  * of entries returned by the server should fit into the buffer.
  */
 static int
 nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 {
-	struct nfs_entry	*entry;
 	struct iovec		*iov = req->rq_rvec;
 	int			status, nr, len;
-	char			*string;
+	char			*string, *start;
 	u32			*end;
+	__u32			fileid, cookie, *entry;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
@@ -413,10 +414,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 		return -errno_NFSERR_IO;
 	}
 
-	string = (char *) res->buffer + res->bufsiz;
-	entry = (struct nfs_entry *) res->buffer;
-	for (nr = 0; *p++; nr++, entry++) {
-		entry->fileid = ntohl(*p++);
+	entry  = (__u32 *) res->buffer;
+	start  = (char *) res->buffer;
+	string = start + res->bufsiz;
+	for (nr = 0; *p++; nr++) {
+		fileid = ntohl(*p++);
 
 		len = ntohl(*p++);
 		if ((p + QUADLEN(len) + 3) > end) {
@@ -430,22 +432,40 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
 			return -errno_NFSERR_IO;
 		}
 		string -= len;
-		if ((void *) (entry+1) > (void *) string) {
-			dprintk("NFS: shouldnothappen in readdirres_decode!\n");
-			break;	/* should not happen */
+		if ((void *) (entry+3) > (void *) string) {
+			/* 
+			 * This error is impossible as long as the temp
+			 * buffer is no larger than the user buffer. The 
+			 * current packing algorithm uses the same amount
+			 * of space in the user buffer as in the XDR data,
+			 * so it's guaranteed to fit.
+			 */
+			printk("NFS: incorrect buffer size in %s!\n",
+				__FUNCTION__);
+			break;
 		}
 
-		entry->name = string;
-		entry->length = len;
 		memmove(string, p, len);
 		p += QUADLEN(len);
-		entry->cookie = ntohl(*p++);
-		entry->eof = !p[0] && p[1];
+		cookie = ntohl(*p++);
+		/*
+		 * To make everything fit, we encode the length, offset,
+		 * and eof flag into 32 bits. This works for filenames
+		 * up to 32K and PAGE_SIZE up to 64K.
+		 */
+		status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
+		*entry++ = fileid;
+		*entry++ = cookie;
+		*entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
 		/*
 		dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n",
-			len, string, entry->cookie, entry->eof);
+			len, string, cookie, status);
 		 */
 	}
+#ifdef NFS_PARANOIA
+printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
+nr, ((char *) entry - start), (start + res->bufsiz - string));
+#endif
 	return nr;
 }
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58dcd95d0..416ed294e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -250,25 +250,43 @@ nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name)
  */
 int
 nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
-			u32 cookie, unsigned int size, struct nfs_entry *entry)
+			u32 cookie, unsigned int size, __u32 *entry)
 {
 	struct nfs_readdirargs	arg;
 	struct nfs_readdirres	res;
 	void *			buffer;
+	unsigned int		buf_size = PAGE_SIZE;
 	int			status;
 
 	/* First get a temp buffer for the readdir reply */
-	while (!(buffer = (void *) get_free_page(GFP_USER))) {
-		need_resched = 1;
-		schedule();
-		if (signalled())
-			return -ERESTARTSYS;
-	}
+	/* N.B. does this really need to be cleared? */
+	status = -ENOMEM;
+	buffer = (void *) get_free_page(GFP_KERNEL);
+	if (!buffer)
+		goto out;
+
+	/*
+	 * Calculate the effective size the buffer.  To make sure
+	 * that the returned data will fit into the user's buffer,
+	 * we decrease the buffer size as necessary.
+	 *
+	 * Note: NFS returns three __u32 values for each entry,
+	 * and we assume that the data is packed into the user
+	 * buffer with the same efficiency. 
+	 */
+	if (size < buf_size)
+		buf_size = size;
+	if (server->rsize < buf_size)
+		buf_size = server->rsize;
+#if 0
+printk("nfs_proc_readdir: user size=%d, rsize=%d, buf_size=%d\n",
+size, server->rsize, buf_size);
+#endif
 
 	arg.fh = fhandle;
 	arg.cookie = cookie;
 	arg.buffer = buffer;
-	arg.bufsiz = server->rsize < PAGE_SIZE? server->rsize : PAGE_SIZE;
+	arg.bufsiz = buf_size;
 	res.buffer = entry;
 	res.bufsiz = size;
 
@@ -276,6 +294,7 @@ nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
 	status = rpc_call(server->client, NFSPROC_READDIR, &arg, &res, 0);
 	dprintk("NFS reply readdir: %d\n", status);
 	free_page((unsigned long) buffer);
+out:
 	return status;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ec5a1f7be..97663cc11 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -68,6 +68,8 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
+int check_failed_request(struct inode *);
+
 static void			nfs_wback_lock(struct rpc_task *task);
 static void			nfs_wback_result(struct rpc_task *task);
 
@@ -120,6 +122,7 @@ struct nfs_wreq {
  * Limit number of delayed writes
  */
 static int			nr_write_requests = 0;
+static int			nr_failed_requests = 0;
 static struct rpc_wait_queue	write_queue = RPC_INIT_WAITQ("write_chain");
 struct nfs_wreq *		nfs_failed_requests = NULL;
 
@@ -196,22 +199,44 @@ nfs_writepage_sync(struct inode *inode, struct page *page,
 			clear_bit(PG_uptodate, &page->flags);
 			goto io_error;
 		}
+		if (result != wsize)
+			printk("NFS: short write, wsize=%u, result=%d\n",
+			wsize, result);
 		refresh = 1;
 		buffer  += wsize;
 		offset  += wsize;
 		written += wsize;
 		count   -= wsize;
+		/*
+		 * If we've extended the file, update the inode
+		 * now so we don't invalidate the cache.
+		 */
+		if (offset > inode->i_size)
+			inode->i_size = offset;
 	} while (count);
 
 io_error:
+	/* N.B. do we want to refresh if there was an error?? (fattr valid?) */
 	if (refresh) {
 		/* See comments in nfs_wback_result */
+		/* N.B. I don't think this is right -- sync writes in order */
 		if (fattr.size < inode->i_size)
 			fattr.size = inode->i_size;
+		if (fattr.mtime.seconds < inode->i_mtime)
+			printk("nfs_writepage_sync: prior time??\n");
 		/* Solaris 2.5 server seems to send garbled
 		 * fattrs occasionally */
-		if (inode->i_ino == fattr.fileid)
+		if (inode->i_ino == fattr.fileid) {
+			/*
+			 * We expect the mtime value to change, and
+			 * don't want to invalidate the caches.
+			 */
+			inode->i_mtime = fattr.mtime.seconds;
 			nfs_refresh_inode(inode, &fattr);
+		} 
+		else
+			printk("nfs_writepage_sync: inode %ld, got %u?\n",
+				inode->i_ino, fattr.fileid);
 	}
 
 	nfs_unlock_page(page);
@@ -260,28 +285,73 @@ find_write_request(struct inode *inode, struct page *page)
 /*
  * Find a failed write request by pid
  */
-static inline struct nfs_wreq *
+static struct nfs_wreq *
 find_failed_request(struct inode *inode, pid_t pid)
 {
 	struct nfs_wreq	*head, *req;
 
-	if (!(req = head = nfs_failed_requests))
-		return NULL;
-	do {
-		if (req->wb_inode == inode && req->wb_pid == pid)
+	req = head = nfs_failed_requests;
+	while (req != NULL) {
+		if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid))
 			return req;
-	} while ((req = WB_NEXT(req)) != head);
+		if ((req = WB_NEXT(req)) == head)
+			break;
+	}
 	return NULL;
 }
 
 /*
+ * Add a request to the failed list.
+ */
+static void
+append_failed_request(struct nfs_wreq * req)
+{
+	static int old_max = 16;
+
+	append_write_request(&nfs_failed_requests, req);
+	nr_failed_requests++;
+	if (nr_failed_requests >= old_max) {
+		printk("NFS: %d failed requests\n", nr_failed_requests);
+		old_max = old_max << 1;
+	}
+}
+
+/*
+ * Remove a request from the failed list and free it.
+ */
+static void
+remove_failed_request(struct nfs_wreq * req)
+{
+	remove_write_request(&nfs_failed_requests, req);
+	kfree(req);
+	nr_failed_requests--;
+}
+
+/*
+ * Find and release all failed requests for this inode.
+ */
+int
+check_failed_request(struct inode * inode)
+{
+	struct nfs_wreq * req;
+	int found = 0;
+
+	while ((req = find_failed_request(inode, 0)) != NULL) {
+		remove_failed_request(req);
+		found++;
+	}
+	return found;
+}
+
+/*
  * Try to merge adjacent write requests. This works only for requests
  * issued by the same user.
  */
 static inline int
-update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes)
+update_write_request(struct nfs_wreq *req, unsigned int first,
+			unsigned int bytes)
 {
-	unsigned	rqfirst = req->wb_offset,
+	unsigned int	rqfirst = req->wb_offset,
 			rqlast = rqfirst + req->wb_bytes,
 			last = first + bytes;
 
@@ -313,7 +383,7 @@ update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes)
  */
 static inline struct nfs_wreq *
 create_write_request(struct inode *inode, struct page *page,
-				unsigned offset, unsigned bytes)
+			unsigned int offset, unsigned int bytes)
 {
 	struct nfs_wreq *wreq;
 	struct rpc_clnt	*clnt = NFS_CLIENT(inode);
@@ -327,7 +397,7 @@ create_write_request(struct inode *inode, struct page *page,
 
 	wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER);
 	if (!wreq)
-		return NULL;
+		goto out_fail;
 	memset(wreq, 0, sizeof(*wreq));
 
 	task = &wreq->wb_task;
@@ -336,11 +406,8 @@ create_write_request(struct inode *inode, struct page *page,
 	task->tk_action = nfs_wback_lock;
 
 	rpcauth_lookupcred(task);	/* Obtain user creds */
-	if (task->tk_status < 0) {
-		rpc_release_task(task);
-		kfree(wreq);
-		return NULL;
-	}
+	if (task->tk_status < 0)
+		goto out_req;
 
 	/* Put the task on inode's writeback request list. */
 	wreq->wb_inode  = inode;
@@ -357,6 +424,12 @@ create_write_request(struct inode *inode, struct page *page,
 		rpc_wake_up_next(&write_queue);
 
 	return wreq;
+
+out_req:
+	rpc_release_task(task);
+	kfree(wreq);
+out_fail:
+	return NULL;
 }
 
 /*
@@ -423,7 +496,9 @@ wait_on_write_request(struct nfs_wreq *req)
 	}
 	remove_wait_queue(&page->wait, &wait);
 	current->state = TASK_RUNNING;
-	atomic_dec(&page->count);
+	if (atomic_read(&page->count) == 1)
+		printk("NFS: page unused while waiting\n");
+	free_page(page_address(page));
 	return retval;
 }
 
@@ -487,12 +562,13 @@ nfs_updatepage(struct inode *inode, struct page *page, const char *buffer,
 	}
 
 	/* Create the write request. */
-	if (!(req = create_write_request(inode, page, offset, count))) {
-		status = -ENOBUFS;
+	status = -ENOBUFS;
+	req = create_write_request(inode, page, offset, count);
+	if (!req)
 		goto done;
-	}
 
 	/* Copy data to page buffer. */
+	/* N.B. should check for fault here ... */
 	copy_from_user(page_addr + offset, buffer, count);
 
 	/* Schedule request */
@@ -519,6 +595,7 @@ done:
 			transfer_page_lock(req);
 			/* rpc_execute(&req->wb_task); */
 			if (sync) {
+				/* N.B. if signalled, result not ready? */
 				wait_on_write_request(req);
 				if ((count = nfs_write_error(inode)) < 0)
 					status = count;
@@ -578,10 +655,20 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
 
 			if (rqoffset < end && offset < rqend
 			 && (pid == 0 || req->wb_pid == pid)) {
-				if (!WB_HAVELOCK(req))
+				if (!WB_HAVELOCK(req)) {
+#ifdef NFS_PARANOIA
+printk("nfs_flush: flushing inode=%ld, %d @ %lu\n",
+req->wb_inode->i_ino, req->wb_bytes, rqoffset);
+#endif
 					nfs_flush_request(req);
+				}
 				last = req;
 			}
+		} else {
+#ifdef NFS_PARANOIA
+printk("nfs_flush_pages: in progress inode=%ld, %d @ %lu\n",
+req->wb_inode->i_ino, req->wb_bytes, rqoffset);
+#endif
 		}
 		if (invalidate)
 			req->wb_flags |= NFS_WRITE_INVALIDATE;
@@ -593,7 +680,11 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
 }
 
 /*
- * Cancel all writeback requests, both pending and in process.
+ * Cancel all writeback requests, both pending and in progress.
+ *
+ * N.B. This doesn't seem to wake up the tasks -- are we sure
+ * they will eventually complete? Also, this could overwrite a
+ * failed status code from an already-completed task.
  */
 static void
 nfs_cancel_dirty(struct inode *inode, pid_t pid)
@@ -602,7 +693,8 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid)
 
 	req = head = NFS_WRITEBACK(inode);
 	while (req != NULL) {
-		if (req->wb_pid == pid) {
+		/* N.B. check for task already finished? */
+		if (pid == 0 || req->wb_pid == pid) {
 			req->wb_flags |= NFS_WRITE_CANCELLED;
 			rpc_exit(&req->wb_task, 0);
 		}
@@ -620,36 +712,43 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid)
  * this isn't used by the nlm module yet.
  */
 int
-nfs_flush_dirty_pages(struct inode *inode, off_t offset, off_t len)
+nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len)
 {
 	struct nfs_wreq *last = NULL;
+	int result = 0, cancel = 0;
 
 	dprintk("NFS:      flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n",
 				inode->i_dev, inode->i_ino, current->pid,
 				offset, len);
 
-	if (IS_SOFT && signalled())
-		nfs_cancel_dirty(inode, current->pid);
+	if (IS_SOFT && signalled()) {
+		nfs_cancel_dirty(inode, pid);
+		cancel = 1;
+	}
 
 	for (;;) {
-		if (IS_SOFT && signalled())
-			return -ERESTARTSYS;
+		if (IS_SOFT && signalled()) {
+			if (!cancel)
+				nfs_cancel_dirty(inode, pid);
+			result = -ERESTARTSYS;
+			break;
+		}
 
-		/* Flush all pending writes for this pid and file region */
-		last = nfs_flush_pages(inode, current->pid, offset, len, 0);
+		/* Flush all pending writes for the pid and file region */
+		last = nfs_flush_pages(inode, pid, offset, len, 0);
 		if (last == NULL)
 			break;
 		wait_on_write_request(last);
 	}
 
-	return 0;
+	return result;
 }
 
 /*
  * Flush out any pending write requests and flag that they be discarded
  * after the write is complete.
  *
- * This function is called from nfs_revalidate_inode just before it calls
+ * This function is called from nfs_refresh_inode just before it calls
  * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure
  * that all dirty pages are locked, so that invalidate_inode_pages does
  * not throw away any dirty pages.
@@ -705,15 +804,14 @@ nfs_check_error(struct inode *inode)
 	dprintk("nfs:      checking for write error inode %04x/%ld\n",
 			inode->i_dev, inode->i_ino);
 
-	if (!(req = find_failed_request(inode, current->pid)))
-		return 0;
-
-	dprintk("nfs: write error %d inode %04x/%ld\n",
+	req = find_failed_request(inode, current->pid);
+	if (req) {
+		dprintk("nfs: write error %d inode %04x/%ld\n",
 			req->wb_task.tk_status, inode->i_dev, inode->i_ino);
 
-	status = req->wb_task.tk_status;
-	remove_write_request(&nfs_failed_requests, req);
-	kfree(req);
+		status = req->wb_task.tk_status;
+		remove_failed_request(req);
+	}
 	return status;
 }
 
@@ -789,36 +887,45 @@ nfs_wback_result(struct rpc_task *task)
 	page   = req->wb_page;
 	status = task->tk_status;
 
-	/* Remove request from writeback list and wake up tasks
-	 * sleeping on it. */
-	remove_write_request(&NFS_WRITEBACK(inode), req);
-
 	if (status < 0) {
 		/*
 		 * An error occurred. Report the error back to the
-		 * application by adding the failed request to the
-		 * inode's error list.
+		 * application by adding the request to the failed
+		 * requests list.
 		 */
-		if (find_failed_request(inode, req->wb_pid)) {
+		if (find_failed_request(inode, req->wb_pid))
 			status = 0;
-		} else {
-			dprintk("NFS: %4d saving write failure code\n",
-						task->tk_pid);
-			append_write_request(&nfs_failed_requests, req);
-		}
 		clear_bit(PG_uptodate, &page->flags);
 	} else if (!WB_CANCELLED(req)) {
+		struct nfs_fattr *fattr = req->wb_fattr;
 		/* Update attributes as result of writeback. 
 		 * Beware: when UDP replies arrive out of order, we
 		 * may end up overwriting a previous, bigger file size.
 		 */
-		if (req->wb_fattr->size < inode->i_size)
-			req->wb_fattr->size = inode->i_size;
-		/* possible Solaris 2.5 server bug workaround */
-		if (inode->i_ino == req->wb_fattr->fileid)
-			nfs_refresh_inode(inode, req->wb_fattr);
+		if (fattr->mtime.seconds >= inode->i_mtime) {
+			if (fattr->size < inode->i_size)
+				fattr->size = inode->i_size;
+
+			/* possible Solaris 2.5 server bug workaround */
+			if (inode->i_ino == fattr->fileid) {
+				/*
+				 * We expect these values to change, and
+				 * don't want to invalidate the caches.
+				 */
+				inode->i_size  = fattr->size;
+				inode->i_mtime = fattr->mtime.seconds;
+				nfs_refresh_inode(inode, fattr);
+			}
+			else
+				printk("nfs_wback_result: inode %ld, got %u?\n",
+					inode->i_ino, fattr->fileid);
+		}
 	}
 
+	/*
+	 * This call might block, so we defer removing the request
+	 * from the inode's writeback list.
+	 */
 	rpc_release_task(task);
 
 	if (WB_INVALIDATE(req))
@@ -830,8 +937,20 @@ nfs_wback_result(struct rpc_task *task)
 		kfree(req->wb_args);
 		req->wb_args = 0;
 	}
+
+	/*
+	 * Now it's safe to remove the request from the inode's 
+	 * writeback list and wake up any tasks sleeping on it.
+	 * If the request failed, add it to the failed list.
+	 */
+	remove_write_request(&NFS_WRITEBACK(inode), req);
+
 	if (status >= 0)
 		kfree(req);
+	else {
+		dprintk("NFS: %4d saving write failure code\n", task->tk_pid);
+		append_failed_request(req);
+	}
 
 	free_page(page_address(page));
 	nr_write_requests--;
author	Ralf Baechle <ralf@linux-mips.org>	1997-12-06 23:51:34 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1997-12-06 23:51:34 +0000
commit	230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree	5dd821c8d33f450470588e7a543f74bf74306e9e /fs/nfs
parent	c9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)