summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-12-06 23:51:34 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-12-06 23:51:34 +0000
commit230e5ab6a084ed50470f101934782dbf54b0d06b (patch)
tree5dd821c8d33f450470588e7a543f74bf74306e9e /fs/nfs
parentc9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff)
Merge with Linux 2.1.67.
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/dir.c858
-rw-r--r--fs/nfs/file.c112
-rw-r--r--fs/nfs/inode.c384
-rw-r--r--fs/nfs/nfs2xdr.c48
-rw-r--r--fs/nfs/nfs3xdr.c48
-rw-r--r--fs/nfs/proc.c35
-rw-r--r--fs/nfs/write.c231
7 files changed, 1195 insertions, 521 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index acee50754..9e1d936dd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,6 +29,13 @@
#include <asm/segment.h> /* for fs functions */
+#define NFS_MAX_AGE 10*HZ /* max age for dentry validation */
+
+/* needed by smbfs as well ... move to dcache? */
+extern void nfs_renew_times(struct dentry *);
+
+#define NFS_PARANOIA 1
+
/*
* Head for a dircache entry. Currently still very simple; when
* the cache grows larger, we will need a LRU list.
@@ -36,18 +43,20 @@
struct nfs_dirent {
dev_t dev; /* device number */
ino_t ino; /* inode number */
- u32 cookie; /* cooke of first entry */
+ u32 cookie; /* cookie of first entry */
unsigned short valid : 1, /* data is valid */
locked : 1; /* entry locked */
unsigned int size; /* # of entries */
unsigned long age; /* last used */
unsigned long mtime; /* last attr stamp */
struct wait_queue * wait;
- struct nfs_entry * entry;
+ __u32 * entry; /* three __u32's per entry */
};
-static int nfs_dir_open(struct inode * inode, struct file * file);
-static long nfs_dir_read(struct inode *, struct file *, char *, unsigned long);
+static int nfs_safe_remove(struct dentry *);
+
+static int nfs_dir_open(struct inode *, struct file *);
+static ssize_t nfs_dir_read(struct file *, char *, size_t, loff_t *);
static int nfs_readdir(struct file *, void *, filldir_t);
static int nfs_lookup(struct inode *, struct dentry *);
static int nfs_create(struct inode *, struct dentry *, int);
@@ -57,7 +66,8 @@ static int nfs_unlink(struct inode *, struct dentry *);
static int nfs_symlink(struct inode *, struct dentry *, const char *);
static int nfs_link(struct inode *, struct inode *, struct dentry *);
static int nfs_mknod(struct inode *, struct dentry *, int, int);
-static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+static int nfs_rename(struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
static struct file_operations nfs_dir_operations = {
NULL, /* lseek - default */
@@ -102,8 +112,8 @@ nfs_dir_open(struct inode *dir, struct file *file)
return nfs_revalidate_inode(NFS_SERVER(dir), dir);
}
-static long
-nfs_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count)
+static ssize_t
+nfs_dir_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
return -EISDIR;
}
@@ -123,15 +133,16 @@ static struct nfs_dirent dircache[NFS_MAX_DIRCACHE];
static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
+ struct inode *inode = filp->f_dentry->d_inode;
static struct wait_queue *readdir_wait = NULL;
struct wait_queue **waitp = NULL;
struct nfs_dirent *cache, *free;
- struct nfs_entry *entry;
unsigned long age, dead;
u32 cookie;
int ismydir, result;
int i, j, index = 0;
- struct inode *inode = filp->f_dentry->d_inode;
+ __u32 *entry;
+ char *name, *start;
dfprintk(VFS, "NFS: nfs_readdir(%x/%ld)\n", inode->i_dev, inode->i_ino);
if (!inode || !S_ISDIR(inode->i_mode)) {
@@ -148,7 +159,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
again:
if (waitp) {
interruptible_sleep_on(waitp);
- if (current->signal & ~current->blocked)
+ if (signal_pending(current))
return -ERESTARTSYS;
waitp = NULL;
}
@@ -174,7 +185,7 @@ again:
goto again;
}
- if (ismydir && cache->mtime != NFS_OLDMTIME(inode))
+ if (ismydir && cache->mtime != inode->i_mtime)
cache->valid = 0;
if (!cache->valid || cache->age < dead) {
@@ -194,17 +205,15 @@ again:
break;
}
for (j = 0; j < cache->size; j++) {
- /*
- dprintk("NFS: examing entry %.*s @%d\n",
- (int) cache->entry[j].length,
- cache->entry[j].name,
- cache->entry[j].cookie);
- */
- if (cache->entry[j].cookie != cookie)
+ __u32 *this_ent = cache->entry + j*3;
+
+ if (*(this_ent+1) != cookie)
continue;
if (j < cache->size - 1) {
- entry = cache->entry + (index = j + 1);
- } else if (cache->entry[j].eof) {
+ index = j + 1;
+ entry = this_ent + 3;
+ } else if (*(this_ent+2) & (1 << 15)) {
+ /* eof */
return 0;
}
break;
@@ -235,12 +244,10 @@ again:
cache->dev = inode->i_dev;
cache->ino = inode->i_ino;
if (!cache->entry) {
- cache->entry = (struct nfs_entry *)
- get_free_page(GFP_KERNEL);
- if (!cache->entry) {
- result = -ENOMEM;
+ result = -ENOMEM;
+ cache->entry = (__u32 *) get_free_page(GFP_KERNEL);
+ if (!cache->entry)
goto done;
- }
}
result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode),
@@ -251,31 +258,35 @@ again:
cache->valid = 1;
entry = cache->entry + (index = 0);
}
- cache->mtime = NFS_OLDMTIME(inode);
+ cache->mtime = inode->i_mtime;
cache->age = jiffies;
/*
* Yowza! We have a cache entry...
*/
+ start = (char *) cache->entry;
while (index < cache->size) {
- int nextpos = entry->cookie;
+ __u32 fileid = *entry++;
+ __u32 nextpos = *entry++; /* cookie */
+ __u32 length = *entry++;
/*
+ * Unpack the eof flag, offset, and length
+ */
+ result = length & (1 << 15); /* eof flag */
+ name = start + ((length >> 16) & 0xFFFF);
+ length &= 0x7FFF;
+ /*
dprintk("NFS: filldir(%p, %.*s, %d, %d, %x, eof %x)\n", entry,
- (int) entry->length, entry->name, entry->length,
+ (int) length, name, length,
(unsigned int) filp->f_pos,
- entry->fileid, entry->eof);
+ fileid, result);
*/
- if (filldir(dirent, entry->name, entry->length, cookie, entry->fileid) < 0)
+ if (filldir(dirent, name, length, cookie, fileid) < 0)
break;
cookie = nextpos;
- if (nextpos != entry->cookie) {
- printk("nfs_readdir: shouldn't happen!\n");
- break;
- }
index++;
- entry++;
}
filp->f_pos = cookie;
result = 0;
@@ -293,47 +304,69 @@ done:
}
/*
- * Invalidate dircache entries for inode
+ * Invalidate dircache entries for an inode.
*/
void
nfs_invalidate_dircache(struct inode *inode)
{
- struct nfs_dirent *cache;
+ struct nfs_dirent *cache = dircache;
dev_t dev = inode->i_dev;
ino_t ino = inode->i_ino;
int i;
dfprintk(DIRCACHE, "NFS: invalidate dircache for %x/%ld\n", dev, (long)ino);
- for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) {
- if (!cache->locked && cache->dev == dev && cache->ino == ino)
- cache->valid = 0; /* brute force */
+ for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+ if (cache->ino != ino)
+ continue;
+ if (cache->dev != dev)
+ continue;
+ if (cache->locked) {
+ printk("NFS: cache locked for %s/%ld\n",
+ kdevname(dev), (long) ino);
+ continue;
+ }
+ cache->valid = 0; /* brute force */
}
}
/*
- * Free directory cache memory
- * Called from cleanup_module
+ * Invalidate the dircache for a super block (or all caches),
+ * and release the cache memory.
*/
void
-nfs_free_dircache(void)
+nfs_invalidate_dircache_sb(struct super_block *sb)
{
- struct nfs_dirent *cache;
+ struct nfs_dirent *cache = dircache;
int i;
- dfprintk(DIRCACHE, "NFS: freeing dircache\n");
- for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) {
- cache->valid = 0;
+ for (i = NFS_MAX_DIRCACHE; i--; cache++) {
+ if (sb && sb->s_dev != cache->dev)
+ continue;
if (cache->locked) {
- printk("nfs_kfree_cache: locked entry in dircache!\n");
+ printk("NFS: cache locked at umount %s\n",
+ (cache->entry ? "(lost a page!)" : ""));
continue;
}
- if (cache->entry)
+ cache->valid = 0; /* brute force */
+ if (cache->entry) {
free_page((unsigned long) cache->entry);
- cache->entry = NULL;
+ cache->entry = NULL;
+ }
}
}
/*
+ * Free directory cache memory
+ * Called from cleanup_module
+ */
+void
+nfs_free_dircache(void)
+{
+ dfprintk(DIRCACHE, "NFS: freeing dircache\n");
+ nfs_invalidate_dircache_sb(NULL);
+}
+
+/*
* This is called every time the dcache has a lookup hit,
* and we should check whether we can really trust that
* lookup.
@@ -350,20 +383,92 @@ static int nfs_lookup_revalidate(struct dentry * dentry)
unsigned long time = jiffies - dentry->d_time;
unsigned long max = 5*HZ;
- if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
- max = 10*HZ;
- return time < max;
+ if (dentry->d_inode) {
+ if (is_bad_inode(dentry->d_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_lookup_validate: %s/%s has dud inode\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ goto bad;
+ }
+ if (S_ISDIR(dentry->d_inode->i_mode))
+ max = NFS_MAX_AGE;
+ }
+
+ return (time < max) || IS_ROOT(dentry);
+bad:
+ return 0;
}
-static void nfs_silly_delete(struct dentry *);
+/*
+ * This is called from dput() when d_count is going to 0.
+ * We use it to clean up silly-renamed files, and to check
+ * for dentries that have already expired.
+ */
+static void nfs_dentry_delete(struct dentry *dentry)
+{
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ int error;
+
+ dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+#ifdef NFS_DEBUG
+printk("nfs_dentry_delete: unlinking %s/%s\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /* Unhash it first */
+ d_drop(dentry);
+ error = nfs_safe_remove(dentry);
+ if (error)
+ printk("NFS: can't silly-delete %s/%s, error=%d\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name, error);
+ }
+ /*
+ * Check whether to expire the dentry ...
+ */
+ else {
+ unsigned long age = jiffies - dentry->d_time;
+ if (age > NFS_MAX_AGE)
+ d_drop(dentry);
+ }
+
+#ifdef NFS_PARANOIA
+ /*
+ * Sanity check: if the dentry has been unhashed and the
+ * inode still has users, we could have problems ...
+ */
+ if (list_empty(&dentry->d_hash) && dentry->d_inode) {
+ struct inode *inode = dentry->d_inode;
+ if (inode->i_count > 1) {
+printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name,
+inode->i_ino, inode->i_count, inode->i_nlink);
+ }
+ }
+#endif
+}
static struct dentry_operations nfs_dentry_operations = {
- nfs_lookup_revalidate,
+ nfs_lookup_revalidate, /* d_validate(struct dentry *) */
0, /* d_hash */
0, /* d_compare */
- nfs_silly_delete,
+ nfs_dentry_delete /* d_delete(struct dentry *) */
};
+/*
+ * Whenever a lookup succeeds, we know the parent directories
+ * are all valid, so we want to update the dentry timestamps.
+ */
+void nfs_renew_times(struct dentry * dentry)
+{
+ for (;;) {
+ dentry->d_time = jiffies;
+ if (dentry == dentry->d_parent)
+ break;
+ dentry = dentry->d_parent;
+ }
+}
+
static int nfs_lookup(struct inode *dir, struct dentry * dentry)
{
struct inode *inode;
@@ -373,38 +478,66 @@ static int nfs_lookup(struct inode *dir, struct dentry * dentry)
int error;
dfprintk(VFS, "NFS: lookup(%x/%ld, %.*s)\n",
- dir->i_dev, dir->i_ino, len, dentry->d_name.name);
+ dir->i_dev, dir->i_ino, len, dentry->d_name.name);
if (!dir || !S_ISDIR(dir->i_mode)) {
printk("nfs_lookup: inode is NULL or not a directory\n");
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
-
- error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name, &fhandle, &fattr);
+ goto out;
+ error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir),
+ dentry->d_name.name, &fhandle, &fattr);
inode = NULL;
+ if (error == -ENOENT)
+ goto no_entry;
if (!error) {
+ error = -EACCES;
inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
- if (!inode)
- return -EACCES;
- } else if (error != -ENOENT)
- return error;
-
- dentry->d_time = jiffies;
- dentry->d_op = &nfs_dentry_operations;
- d_add(dentry, inode);
- return 0;
+ if (inode) {
+ no_entry:
+ dentry->d_op = &nfs_dentry_operations;
+ d_add(dentry, inode);
+ nfs_renew_times(dentry);
+ error = 0;
+ }
+ }
+out:
+ return error;
}
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+static int nfs_instantiate(struct inode *dir, struct dentry *dentry,
+ struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+{
+ struct inode *inode;
+ int error = -EACCES;
+
+ inode = nfs_fhget(dir->i_sb, fhandle, fattr);
+ if (inode) {
+ d_instantiate(dentry, inode);
+ nfs_renew_times(dentry);
+ error = 0;
+ }
+ return error;
+}
+
+/*
+ * Following a failed create operation, we drop the dentry rather
+ * than retain a negative dentry. This avoids a problem in the event
+ * that the operation succeeded on the server, but an error in the
+ * reply path made it appear to have failed.
+ */
static int nfs_create(struct inode *dir, struct dentry * dentry, int mode)
{
struct nfs_sattr sattr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
- struct inode *inode;
int error;
dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
@@ -415,33 +548,41 @@ static int nfs_create(struct inode *dir, struct dentry * dentry, int mode)
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ goto out;
sattr.mode = mode;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
- error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
-
- if (error)
- return error;
-
- inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
- if (!inode)
- return -EACCES;
+ /*
+ * Invalidate the dir cache before the operation to avoid a race.
+ */
nfs_invalidate_dircache(dir);
- d_instantiate(dentry, inode);
- return 0;
+ error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+ dentry->d_name.name, &sattr, &fhandle, &fattr);
+ if (!error)
+ error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_create: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
+out:
+ return error;
}
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
{
struct nfs_sattr sattr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
- struct inode *inode;
int error;
dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
@@ -459,29 +600,31 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
if (S_ISCHR(mode) || S_ISBLK(mode))
sattr.size = rdev; /* get out your barf bag */
-
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
- error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
-
- if (error)
- return error;
-
- inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
- if (!inode)
- return -EACCES;
nfs_invalidate_dircache(dir);
- d_instantiate(dentry, inode);
- return 0;
+ error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir),
+ dentry->d_name.name, &sattr, &fhandle, &fattr);
+ if (!error)
+ error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_mknod: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
+ return error;
}
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
struct nfs_sattr sattr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
- struct inode * inode;
int error;
dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
@@ -499,21 +642,29 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
- error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
-
- if (error)
- return error;
-
- inode = nfs_fhget(dir->i_sb, &fhandle, &fattr);
- if (!inode)
- return -EACCES;
-
nfs_invalidate_dircache(dir);
- d_instantiate(dentry, inode);
- return 0;
+ error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir),
+ dentry->d_name.name, &sattr, &fhandle, &fattr);
+ if (!error)
+ error = nfs_instantiate(dir, dentry, &fattr, &fhandle);
+ else {
+#ifdef NFS_PARANOIA
+printk("nfs_mkdir: %s/%s failed, error=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, error);
+#endif
+ d_drop(dentry);
+ }
+ return error;
}
+/*
+ * To avoid retaining a stale inode reference, we check the dentry
+ * use count prior to the operation, and return EBUSY if it has
+ * multiple users.
+ *
+ * Update inode->i_nlink immediately after a successful operation.
+ * (See comments for nfs_unlink.)
+ */
static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
@@ -526,16 +677,28 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
-
+ goto out;
+
+ error = -EBUSY;
+ if (dentry->d_count > 1) {
+ /* Attempt to shrink child dentries ... */
+ shrink_dcache_parent(dentry);
+ if (dentry->d_count > 1)
+ goto out;
+ }
+ /* Drop the dentry to force a new lookup */
+ d_drop(dentry);
error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name);
- if (error)
- return error;
-
- nfs_invalidate_dircache(dir);
- d_delete(dentry);
- return 0;
+ if (!error) {
+ if (dentry->d_inode->i_nlink)
+ dentry->d_inode->i_nlink --;
+ nfs_invalidate_dircache(dir);
+ nfs_renew_times(dentry);
+ }
+out:
+ return error;
}
@@ -551,24 +714,22 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
*
* Concerning my choice of the temp name: it is just nice to have
* i_ino part of the temp name, as this offers another check whether
- * somebody attempts to remove the "silly renamed" dentry
- * itself. Which is something that I consider evil. Your opinion may
- * vary.
+ * somebody attempts to remove the "silly renamed" dentry itself.
+ * Which is something that I consider evil. Your opinion may vary.
* BUT:
* Now that I compute the hash value right, it should be possible to simply
* check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
* doing the string compare.
* WHICH MEANS:
* This offers the opportunity to shorten the temp name. Currently, I use
- * the hex representation of i_ino + the hex value of jiffies. This
- * sums up to as much as 36 characters for a 64 bit machine, and needs
- * 20 chars on a 32 bit machine. Have a look at jiffiesize etc.
+ * the hex representation of i_ino + an event counter. This sums up to
+ * as much as 36 characters for a 64 bit machine, and needs 20 chars on
+ * a 32 bit machine.
* QUINTESSENCE
* The use of i_ino is simply cosmetic. All we need is a unique temp
- * file name for the .nfs files. The hex representation of "jiffies"
- * seemed to be adequate. And as we retry in case such a file already
- * exists we are guaranteed to succed (after some jiffies have passed
- * by :)
+ * file name for the .nfs files. The event counter seemed to be adequate.
+ * And as we retry in case such a file already exists, we are guaranteed
+ * to succeed.
*/
static
@@ -576,14 +737,11 @@ struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
{
struct qstr sqstr;
struct dentry *sdentry;
- int i, error;
+ int error;
sqstr.name = silly;
sqstr.len = slen;
- sqstr.hash = init_name_hash();
- for (i= 0; i < slen; i++)
- sqstr.hash = partial_name_hash(silly[i], sqstr.hash);
- sqstr.hash = end_name_hash(sqstr.hash);
+ sqstr.hash = full_name_hash(silly, slen);
sdentry = d_lookup(parent, &sqstr);
if (!sdentry) {
sdentry = d_alloc(parent, &sqstr);
@@ -605,19 +763,29 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
const int countersize = sizeof(sillycounter)*2;
const int slen = strlen(".nfs") + i_inosize + countersize;
char silly[slen+1];
- int error;
struct dentry *sdentry;
+ int error = -EIO;
+ /*
+ * Note that a silly-renamed file can be deleted once it's
+ * no longer in use -- it's just an ordinary file now.
+ */
if (dentry->d_count == 1) {
- return -EIO; /* No need to silly rename. */
+ dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+ goto out; /* No need to silly rename. */
}
- if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
- return -EBUSY; /* don't allow to unlink silly inode -- nope,
- * think a bit: silly DENTRY, NOT inode --
- * itself
- */
- }
+#ifdef NFS_PARANOIA
+if (!dentry->d_inode)
+printk("NFS: silly-renaming %s/%s, negative dentry??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /*
+ * We don't allow a dentry to be silly-renamed twice.
+ */
+ error = -EBUSY;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ goto out;
sprintf(silly, ".nfs%*.*lx",
i_inosize, i_inosize, dentry->d_inode->i_ino);
@@ -634,54 +802,109 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dentry->d_name.name, silly);
sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
- if (IS_ERR(sdentry)) {
- return -EIO; /* FIXME ? */
- }
+ /*
+ * N.B. Better to return EBUSY here ... it could be
+ * dangerous to delete the file while it's in use.
+ */
+ if (IS_ERR(sdentry))
+ goto out;
} while(sdentry->d_inode != NULL); /* need negative lookup */
error = nfs_proc_rename(NFS_SERVER(dir),
NFS_FH(dir), dentry->d_name.name,
NFS_FH(dir), silly);
- if (error) {
- dput(sdentry);
- return error;
+ if (!error) {
+ nfs_invalidate_dircache(dir);
+ nfs_renew_times(dentry);
+ d_move(dentry, sdentry);
+ dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+ /* If we return 0 we don't unlink */
}
- nfs_invalidate_dircache(dir);
- d_move(dentry, sdentry);
dput(sdentry);
- dentry->d_flags |= DCACHE_NFSFS_RENAMED;
-
- return 0; /* don't unlink */
+out:
+ return error;
}
-static void nfs_silly_delete(struct dentry *dentry)
+/*
+ * Remove a file after making sure there are no pending writes,
+ * and after checking that the file has only one user.
+ *
+ * Updating inode->i_nlink here rather than waiting for the next
+ * nfs_refresh_inode() is not merely cosmetic; once an object has
+ * been deleted, we want to get rid of the inode locally. The NFS
+ * server may reuse the fileid for a new inode, and we don't want
+ * that to be confused with this inode.
+ */
+static int nfs_safe_remove(struct dentry *dentry)
{
- if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
- struct inode *dir = dentry->d_parent->d_inode;
- int error;
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *inode = dentry->d_inode;
+ int error, rehash = 0;
- dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+ error = -EBUSY;
+ if (inode) {
+ if (NFS_WRITEBACK(inode)) {
+ nfs_flush_dirty_pages(inode, 0, 0, 0);
+ if (NFS_WRITEBACK(inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s writes pending, d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+#endif
+ goto out;
+ }
+ }
+ } else {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s already negative??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ }
- /* Unhash it first */
+ if (dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_safe_remove: %s/%s busy, d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+#endif
+ goto out;
+ }
+ /*
+ * Unhash the dentry while we remove the file ...
+ */
+ if (!list_empty(&dentry->d_hash)) {
d_drop(dentry);
- dfprintk(VFS, "trying to unlink %s\n", dentry->d_name.name);
- error = nfs_proc_remove(NFS_SERVER(dir),
+ rehash = 1;
+ }
+ error = nfs_proc_remove(NFS_SERVER(dir),
NFS_FH(dir), dentry->d_name.name);
- if (error < 0)
- printk("NFS " __FUNCTION__ " failed (err = %d)\n",
- -error);
- dentry->d_inode->i_nlink --;
+ /*
+ * ... then restore the hashed state. This ensures that the
+ * dentry can't become busy after having its file deleted.
+ */
+ if (rehash) {
+ d_add(dentry, inode);
+ }
+#ifdef NFS_PARANOIA
+if (dentry->d_count > 1)
+printk("nfs_safe_remove: %s/%s busy after delete?? d_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
+if (inode && inode->i_count > 1)
+printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, inode->i_count);
+#endif
+ if (!error) {
nfs_invalidate_dircache(dir);
+ if (inode && inode->i_nlink)
+ inode->i_nlink --;
+ d_delete(dentry);
}
+out:
+ return error;
}
/* We do silly rename. In case sillyrename() returns -EBUSY, the inode
* belongs to an active ".nfs..." file and we return -EBUSY.
*
* If sillyrename() returns 0, we do nothing, otherwise we unlink.
- *
- * inode->i_nlink is updated here rather than waiting for the next
- * nfs_refresh_inode() for cosmetic reasons only.
*/
static int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
@@ -695,70 +918,77 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ goto out;
error = nfs_sillyrename(dir, dentry);
-
- if (error == -EBUSY) {
- return -EBUSY;
- } else if (error < 0) {
- error = nfs_proc_remove(NFS_SERVER(dir),
- NFS_FH(dir), dentry->d_name.name);
- if (error < 0)
- return error;
-
- dentry->d_inode->i_nlink --;
- nfs_invalidate_dircache(dir);
- d_delete(dentry);
+ if (error && error != -EBUSY) {
+ error = nfs_safe_remove(dentry);
+ if (!error) {
+ nfs_renew_times(dentry);
+ }
}
-
- return 0;
+out:
+ return error;
}
-static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+static int
+nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
struct nfs_sattr sattr;
int error;
dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
- dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+ dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
if (!dir || !S_ISDIR(dir->i_mode)) {
printk("nfs_symlink: inode is NULL or not a directory\n");
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ goto out;
if (strlen(symname) > NFS_MAXPATHLEN)
- return -ENAMETOOLONG;
+ goto out;
- sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */
+#ifdef NFS_PARANOIA
+if (dentry->d_inode)
+printk("nfs_proc_symlink: %s/%s not negative!\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /*
+ * Fill in the sattr for the call.
+ * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
+ */
+ sattr.mode = S_IFLNK | S_IRWXUGO;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
- error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
- dentry->d_name.name, symname, &sattr);
-
- if (error)
- return error;
-
- nfs_invalidate_dircache(dir);
- /* this looks _funny_ doesn't it? But: nfs_proc_symlink()
- * only fills in sattr, not fattr. Thus nfs_fhget() cannot be
- * called, it would be pointless, without a valid fattr
- * argument. Other possibility: call nfs_proc_lookup()
- * HERE. But why? If somebody wants to reference this
- * symlink, the cached_lookup() will fail, and
- * nfs_proc_symlink() will be called anyway.
+ /*
+ * Drop the dentry in advance to force a new lookup.
+ * Since nfs_proc_symlink doesn't return a fattr, we
+ * can't instantiate the new inode.
*/
d_drop(dentry);
- return 0;
+ error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir),
+ dentry->d_name.name, symname, &sattr);
+ if (!error) {
+ nfs_invalidate_dircache(dir);
+ nfs_renew_times(dentry->d_parent);
+ } else if (error == -EEXIST) {
+ printk("nfs_proc_symlink: %s/%s already exists??\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ }
+
+out:
+ return error;
}
-static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry)
+static int
+nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry)
{
int error;
@@ -771,20 +1001,20 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr
return -ENOENT;
}
+ error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
-
- error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode),
- NFS_FH(dir), dentry->d_name.name);
-
- if (error)
- return error;
+ goto out;
- nfs_invalidate_dircache(dir);
- inode->i_count ++;
- inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */
- d_instantiate(dentry, inode);
- return 0;
+ error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode), NFS_FH(dir),
+ dentry->d_name.name);
+ if (!error) {
+ nfs_invalidate_dircache(dir);
+ inode->i_count ++;
+ inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */
+ d_instantiate(dentry, inode);
+ }
+out:
+ return error;
}
/*
@@ -804,16 +1034,25 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr
* implementation that only depends on the dcache stuff instead of
* using the inode layer
*
+ * Unfortunately, things are a little more complicated than indicated
+ * above. For a cross-directory move, we want to make sure we can get
+ * rid of the old inode after the operation. This means there must be
+ * no pending writes (if it's a file), and the use count must be 1.
+ * If these conditions are met, we can drop the dentries before doing
+ * the rename.
*/
static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
- int error;
-
- dfprintk(VFS, "NFS: rename(%x/%ld, %s -> %x/%ld, %s)\n",
- old_dir->i_dev, old_dir->i_ino, old_dentry->d_name.name,
- new_dir->i_dev, new_dir->i_ino, new_dentry->d_name.name);
-
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ int error, rehash = 0, update = 1;
+
+#ifdef NFS_DEBUG_VERBOSE
+printk("nfs_rename: old %s/%s, count=%d, new %s/%s, count=%d\n",
+old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count,
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
if (!old_dir || !S_ISDIR(old_dir->i_mode)) {
printk("nfs_rename: old inode is NULL or not a directory\n");
return -ENOENT;
@@ -824,98 +1063,109 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
return -ENOENT;
}
- if (old_dentry->d_name.len > NFS_MAXNAMLEN || new_dentry->d_name.len > NFS_MAXNAMLEN)
- return -ENAMETOOLONG;
+ error = -ENAMETOOLONG;
+ if (old_dentry->d_name.len > NFS_MAXNAMLEN ||
+ new_dentry->d_name.len > NFS_MAXNAMLEN)
+ goto out;
- if (new_dir != old_dir) {
- error = nfs_sillyrename(old_dir, old_dentry);
+ /*
+ * First check whether the target is busy ... we can't
+ * safely do _any_ rename if the target is in use.
+ */
+ if (new_dentry->d_count > 1) {
+ if (new_inode && S_ISDIR(new_inode->i_mode))
+ shrink_dcache_parent(new_dentry);
+ }
+ error = -EBUSY;
+ if (new_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: target %s/%s busy, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+ goto out;
+ }
- if (error == -EBUSY) {
- return -EBUSY;
- } else if (error == 0) { /* did silly rename stuff */
- error = nfs_link(old_dentry->d_inode,
- new_dir, new_dentry);
-
- return error;
+ /*
+ * Check for within-directory rename ... no complications.
+ */
+ if (new_dir == old_dir)
+ goto do_rename;
+ /*
+ * Cross-directory move ... check whether it's a file.
+ */
+ if (S_ISREG(old_inode->i_mode)) {
+ if (NFS_WRITEBACK(old_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: %s/%s has pending writes\n",
+old_dentry->d_parent->d_name.name, old_dentry->d_name.name);
+#endif
+ nfs_flush_dirty_pages(old_inode, 0, 0, 0);
+ if (NFS_WRITEBACK(old_inode)) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: %s/%s has pending writes after flush\n",
+old_dentry->d_parent->d_name.name, old_dentry->d_name.name);
+#endif
+ goto out;
+ }
}
- /* no need for silly rename, proceed as usual */
}
- error = nfs_proc_rename(NFS_SERVER(old_dir),
- NFS_FH(old_dir), old_dentry->d_name.name,
- NFS_FH(new_dir), new_dentry->d_name.name);
- if (error)
- return error;
-
- nfs_invalidate_dircache(old_dir);
- nfs_invalidate_dircache(new_dir);
+ /*
+ * Moving a directory ... prune child dentries if needed.
+ */
+ else if (old_dentry->d_count > 1)
+ shrink_dcache_parent(old_dentry);
- /* Update the dcache */
- d_move(old_dentry, new_dentry);
- return 0;
-}
+ /*
+ * Now check the use counts ... we can't safely do the
+ * rename unless we can drop the dentries first.
+ */
+ if (old_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: old dentry %s/%s busy, d_count=%d\n",
+old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count);
+#endif
+ goto out;
+ }
+ if (new_dentry->d_count > 1) {
+#ifdef NFS_PARANOIA
+printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+ goto out;
+ }
-/*
- * Many nfs protocol calls return the new file attributes after
- * an operation. Here we update the inode to reflect the state
- * of the server's inode.
- */
+ d_drop(old_dentry);
+ update = 0;
-void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
-{
- int was_empty;
-
- dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n",
- inode->i_dev, inode->i_ino, inode->i_count);
-
- if (!inode || !fattr) {
- printk("nfs_refresh_inode: inode or fattr is NULL\n");
- return;
- }
- if (inode->i_ino != fattr->fileid) {
- printk("nfs_refresh_inode: inode number mismatch\n");
- return;
- }
- was_empty = (inode->i_mode == 0);
- inode->i_mode = fattr->mode;
- inode->i_nlink = fattr->nlink;
- inode->i_uid = fattr->uid;
- inode->i_gid = fattr->gid;
-
- /* Size changed from outside: invalidate caches on next read */
- if (inode->i_size != fattr->size) {
- dfprintk(PAGECACHE, "NFS: cacheinv(%x/%ld)\n",
- inode->i_dev, inode->i_ino);
- NFS_CACHEINV(inode);
- }
- if (NFS_OLDMTIME(inode) != fattr->mtime.seconds) {
- dfprintk(PAGECACHE, "NFS: mtime change on %x/%ld\n",
- inode->i_dev, inode->i_ino);
- NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
- }
- inode->i_size = fattr->size;
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
- inode->i_rdev = to_kdev_t(fattr->rdev);
- else
- inode->i_rdev = 0;
- inode->i_blocks = fattr->blocks;
- inode->i_atime = fattr->atime.seconds;
- inode->i_mtime = fattr->mtime.seconds;
- inode->i_ctime = fattr->ctime.seconds;
- if (S_ISREG(inode->i_mode))
- inode->i_op = &nfs_file_inode_operations;
- else if (S_ISDIR(inode->i_mode))
- inode->i_op = &nfs_dir_inode_operations;
- else if (S_ISLNK(inode->i_mode))
- inode->i_op = &nfs_symlink_inode_operations;
- else if (S_ISCHR(inode->i_mode))
- inode->i_op = &chrdev_inode_operations;
- else if (S_ISBLK(inode->i_mode))
- inode->i_op = &blkdev_inode_operations;
- else if (S_ISFIFO(inode->i_mode)) {
- if (was_empty)
- init_fifo(inode);
- } else
- inode->i_op = NULL;
+do_rename:
+ /*
+ * We must prevent any new references to the target while
+ * the rename is in progress, so we unhash the dentry.
+ */
+ if (!list_empty(&new_dentry->d_hash)) {
+ d_drop(new_dentry);
+ rehash = 1;
+ }
+ error = nfs_proc_rename(NFS_SERVER(old_dir),
+ NFS_FH(old_dir), old_dentry->d_name.name,
+ NFS_FH(new_dir), new_dentry->d_name.name);
+ if (rehash) {
+ d_add(new_dentry, new_inode);
+ }
+#ifdef NFS_PARANOIA
+if (new_dentry->d_count > 1)
+printk("nfs_rename: %s/%s busy after rename, d_count=%d\n",
+new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count);
+#endif
+ if (!error) {
+ nfs_invalidate_dircache(new_dir);
+ nfs_invalidate_dircache(old_dir);
+ /* Update the dcache if needed */
+ if (update)
+ d_move(old_dentry, new_dentry);
+ }
+out:
+ return error;
}
/*
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4587950ef..0f3bd5ed3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,9 +33,8 @@
#define NFSDBG_FACILITY NFSDBG_FILE
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
-static long nfs_file_read(struct inode *, struct file *, char *, unsigned long);
-static long nfs_file_write(struct inode *, struct file *,
- const char *, unsigned long);
+static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *);
+static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *);
static int nfs_file_close(struct inode *, struct file *);
static int nfs_fsync(struct file *, struct dentry *dentry);
@@ -84,67 +83,91 @@ struct inode_operations nfs_file_inode_operations = {
# define IS_SWAPFILE(inode) (0)
#endif
-
+/*
+ * Flush all dirty pages, and check for write errors.
+ *
+ * Note that since the file close operation is called only by the
+ * _last_ process to close the file, we need to flush _all_ dirty
+ * pages. This also means that there is little sense in checking
+ * for errors for this specific process -- we should probably just
+ * clear all errors.
+ */
static int
nfs_file_close(struct inode *inode, struct file *file)
{
- int status;
+ int status, error;
dfprintk(VFS, "nfs: close(%x/%ld)\n", inode->i_dev, inode->i_ino);
- if ((status = nfs_flush_dirty_pages(inode, 0, 0)) < 0)
- return status;
- return nfs_write_error(inode);
+ status = nfs_flush_dirty_pages(inode, 0, 0, 0);
+ error = nfs_write_error(inode);
+ if (!status)
+ status = error;
+ return status;
}
-static long
-nfs_file_read(struct inode * inode, struct file * file,
- char * buf, unsigned long count)
+static ssize_t
+nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos)
{
- int status;
+ struct inode * inode = file->f_dentry->d_inode;
+ ssize_t result;
dfprintk(VFS, "nfs: read(%x/%ld, %lu@%lu)\n",
inode->i_dev, inode->i_ino, count,
- (unsigned long) file->f_pos);
+ (unsigned long) *ppos);
- if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
- return status;
- return generic_file_read(inode, file, buf, count);
+ result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!result)
+ result = generic_file_read(file, buf, count, ppos);
+ return result;
}
static int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
- int status;
struct inode *inode = file->f_dentry->d_inode;
+ int status;
dfprintk(VFS, "nfs: mmap(%x/%ld)\n", inode->i_dev, inode->i_ino);
- if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
- return status;
- return generic_file_mmap(file, vma);
+ status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!status)
+ status = generic_file_mmap(file, vma);
+ return status;
}
-static int nfs_fsync(struct file *file, struct dentry *dentry)
+/*
+ * Flush any dirty pages for this process, and check for write errors.
+ * The return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+static int
+nfs_fsync(struct file *file, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
+ int status, error;
+
dfprintk(VFS, "nfs: fsync(%x/%ld)\n", inode->i_dev, inode->i_ino);
- return nfs_flush_dirty_pages(inode, 0, 0);
+ status = nfs_flush_dirty_pages(inode, current->pid, 0, 0);
+ error = nfs_write_error(inode);
+ if (!status)
+ status = error;
+ return status;
}
/*
* Write to a file (through the page cache).
*/
-static long
-nfs_file_write(struct inode *inode, struct file *file,
- const char *buf, unsigned long count)
+static ssize_t
+nfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
{
- int result;
+ struct inode * inode = file->f_dentry->d_inode;
+ ssize_t result;
dfprintk(VFS, "nfs: write(%x/%ld (%d), %lu@%lu)\n",
inode->i_dev, inode->i_ino, inode->i_count,
- count, (unsigned long) file->f_pos);
+ count, (unsigned long) *ppos);
if (!inode) {
printk("nfs_file_write: inode = NULL\n");
@@ -154,21 +177,26 @@ nfs_file_write(struct inode *inode, struct file *file,
printk("NFS: attempt to write to active swap file!\n");
return -EBUSY;
}
- if ((result = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0)
- return result;
+ result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (result)
+ goto out;
+
+ /* N.B. This should be impossible now -- inodes can't change mode */
if (!S_ISREG(inode->i_mode)) {
printk("nfs_file_write: write to non-file, mode %07o\n",
inode->i_mode);
return -EINVAL;
}
- if (count <= 0)
- return 0;
-
- /* Return error from previous async call */
- if ((result = nfs_write_error(inode)) < 0)
- return result;
-
- return generic_file_write(inode, file, buf, count);
+ result = count;
+ if (!count)
+ goto out;
+
+ /* Check for an error from a previous async call */
+ result = nfs_write_error(inode);
+ if (!result)
+ result = generic_file_write(file, buf, count, ppos);
+out:
+ return result;
}
/*
@@ -177,15 +205,15 @@ nfs_file_write(struct inode *inode, struct file *file,
int
nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
+ struct inode * inode = filp->f_dentry->d_inode;
int status;
- struct inode * inode;
dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n",
- filp->f_dentry->d_inode->i_dev, filp->f_dentry->d_inode->i_ino,
+ inode->i_dev, inode->i_ino,
fl->fl_type, fl->fl_flags,
fl->fl_start, fl->fl_end);
- if (!(inode = filp->f_dentry->d_inode))
+ if (!inode)
return -EINVAL;
/* No mandatory locks over NFS */
@@ -209,8 +237,8 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
/* If unlocking a file region, flush dirty pages (unless we've
* been killed by a signal, that is). */
if (cmd == F_SETLK && fl->fl_type == F_UNLCK
- && !(current->signal & ~current->blocked)) {
- status = nfs_flush_dirty_pages(inode,
+ && !signal_pending(current)) {
+ status = nfs_flush_dirty_pages(inode, current->pid,
fl->fl_start, fl->fl_end == NLM_OFFSET_MAX? 0 :
fl->fl_end - fl->fl_start + 1);
if (status < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e91b34a34..c070d130b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -33,13 +33,17 @@
#include <asm/uaccess.h>
#define NFSDBG_FACILITY NFSDBG_VFS
+#define NFS_PARANOIA 1
-static int nfs_notify_change(struct inode *, struct iattr *);
+extern void nfs_invalidate_dircache_sb(struct super_block *);
+extern int check_failed_request(struct inode *);
+
+static void nfs_read_inode(struct inode *);
static void nfs_put_inode(struct inode *);
static void nfs_delete_inode(struct inode *);
+static int nfs_notify_change(struct inode *, struct iattr *);
static void nfs_put_super(struct super_block *);
-static void nfs_read_inode(struct inode *);
-static int nfs_statfs(struct super_block *, struct statfs *, int bufsiz);
+static int nfs_statfs(struct super_block *, struct statfs *, int);
static struct super_operations nfs_sops = {
nfs_read_inode, /* read inode */
@@ -67,20 +71,51 @@ nfs_read_inode(struct inode * inode)
{
inode->i_blksize = inode->i_sb->s_blocksize;
inode->i_mode = 0;
+ inode->i_rdev = 0;
inode->i_op = NULL;
NFS_CACHEINV(inode);
+ NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
}
static void
nfs_put_inode(struct inode * inode)
{
dprintk("NFS: put_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
+ /*
+ * We want to get rid of unused inodes ...
+ */
+ if (inode->i_count == 1)
+ inode->i_nlink = 0;
}
static void
nfs_delete_inode(struct inode * inode)
{
+ int failed;
+
dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
+ /*
+ * Flush out any pending write requests ...
+ */
+ if (NFS_WRITEBACK(inode) != NULL) {
+ unsigned long timeout = jiffies + 5*HZ;
+ printk("NFS: inode %ld, invalidating pending RPC requests\n",
+ inode->i_ino);
+ nfs_invalidate_pages(inode);
+ while (NFS_WRITEBACK(inode) != NULL && jiffies < timeout) {
+ current->state = TASK_INTERRUPTIBLE;
+ current->timeout = jiffies + HZ/10;
+ schedule();
+ }
+ current->state = TASK_RUNNING;
+ if (NFS_WRITEBACK(inode) != NULL)
+ printk("NFS: Arghhh, stuck RPC requests!\n");
+ }
+
+ failed = check_failed_request(inode);
+ if (failed)
+ printk("NFS: inode %ld had %d failed requests\n",
+ inode->i_ino, failed);
clear_inode(inode);
}
@@ -90,13 +125,21 @@ nfs_put_super(struct super_block *sb)
struct nfs_server *server = &sb->u.nfs_sb.s_server;
struct rpc_clnt *rpc;
+ /*
+ * Lock the super block while we bring down the daemons.
+ */
+ lock_super(sb);
if ((rpc = server->client) != NULL)
rpc_shutdown_client(rpc);
if (!(server->flags & NFS_MOUNT_NONLM))
lockd_down(); /* release rpc.lockd */
rpciod_down(); /* release rpciod */
- lock_super(sb);
+ /*
+ * Invalidate the dircache for this superblock.
+ */
+ nfs_invalidate_dircache_sb(sb);
+
sb->s_dev = 0;
unlock_super(sb);
MOD_DEC_USE_COUNT;
@@ -147,14 +190,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
unsigned int authflavor;
int tcp;
kdev_t dev = sb->s_dev;
+ struct inode *root_inode;
MOD_INC_USE_COUNT;
- if (!data) {
- printk("nfs_read_super: missing data argument\n");
- sb->s_dev = 0;
- MOD_DEC_USE_COUNT;
- return NULL;
- }
+ if (!data)
+ goto out_miss_args;
+
if (data->version != NFS_MOUNT_VERSION) {
printk("nfs warning: mount version %s than kernel\n",
data->version < NFS_MOUNT_VERSION ? "older" : "newer");
@@ -164,13 +205,19 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
data->bsize = 0;
}
+ /* We now require that the mount process passes the remote address */
+ memcpy(&srvaddr, &data->addr, sizeof(srvaddr));
+ if (srvaddr.sin_addr.s_addr == INADDR_ANY)
+ goto out_no_remote;
+
lock_super(sb);
- server = &sb->u.nfs_sb.s_server;
sb->s_magic = NFS_SUPER_MAGIC;
sb->s_dev = dev;
sb->s_op = &nfs_sops;
sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+ sb->u.nfs_sb.s_root = data->root;
+ server = &sb->u.nfs_sb.s_server;
server->rsize = nfs_block_size(data->rsize, NULL);
server->wsize = nfs_block_size(data->wsize, NULL);
server->flags = data->flags;
@@ -179,15 +226,6 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
server->acdirmin = data->acdirmin*HZ;
server->acdirmax = data->acdirmax*HZ;
strcpy(server->hostname, data->hostname);
- sb->u.nfs_sb.s_root = data->root;
-
- /* We now require that the mount process passes the remote address */
- memcpy(&srvaddr, &data->addr, sizeof(srvaddr));
- if (srvaddr.sin_addr.s_addr == INADDR_ANY) {
- printk("NFS: mount program didn't pass remote address!\n");
- MOD_DEC_USE_COUNT;
- return NULL;
- }
/* Which protocol do we use? */
tcp = (data->flags & NFS_MOUNT_TCP);
@@ -210,18 +248,13 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
/* Now create transport and client */
xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP,
&srvaddr, &timeparms);
- if (xprt == NULL) {
- printk("NFS: cannot create RPC transport.\n");
- goto failure;
- }
+ if (xprt == NULL)
+ goto out_no_xprt;
clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
NFS_VERSION, authflavor);
- if (clnt == NULL) {
- printk("NFS: cannot create RPC client.\n");
- xprt_destroy(xprt);
- goto failure;
- }
+ if (clnt == NULL)
+ goto out_no_client;
clnt->cl_intr = (data->flags & NFS_MOUNT_INTR)? 1 : 0;
clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0;
@@ -229,29 +262,67 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
server->client = clnt;
/* Fire up rpciod if not yet running */
+#ifdef RPCIOD_RESULT
+ if (rpciod_up())
+ goto out_no_iod;
+#else
rpciod_up();
+#endif
- /* Unlock super block and try to get root fh attributes */
+ /*
+ * Keep the super block locked while we try to get
+ * the root fh attributes.
+ */
+ root_inode = nfs_fhget(sb, &data->root, NULL);
+ if (!root_inode)
+ goto out_no_root;
+ sb->s_root = d_alloc_root(root_inode, NULL);
+ if (!sb->s_root)
+ goto out_no_root;
+ /* We're airborne */
unlock_super(sb);
- sb->s_root = d_alloc_root(nfs_fhget(sb, &data->root, NULL), NULL);
- if (sb->s_root != NULL) {
- /* We're airborne */
- if (!(server->flags & NFS_MOUNT_NONLM))
- lockd_up();
- return sb;
- }
+ /* Check whether to start the lockd process */
+ if (!(server->flags & NFS_MOUNT_NONLM))
+ lockd_up();
+ return sb;
/* Yargs. It didn't work out. */
+out_no_root:
printk("nfs_read_super: get root inode failed\n");
- rpc_shutdown_client(server->client);
+ iput(root_inode);
rpciod_down();
+#ifdef RPCIOD_RESULT
+ goto out_shutdown;
-failure:
- MOD_DEC_USE_COUNT;
- if (sb->s_lock)
- unlock_super(sb);
+out_no_iod:
+ printk("nfs_read_super: couldn't start rpciod!\n");
+out_shutdown:
+#endif
+ rpc_shutdown_client(server->client);
+ goto out_unlock;
+
+out_no_client:
+ printk("NFS: cannot create RPC client.\n");
+ xprt_destroy(xprt);
+ goto out_unlock;
+
+out_no_xprt:
+ printk("NFS: cannot create RPC transport.\n");
+out_unlock:
+ unlock_super(sb);
+ goto out_fail;
+
+out_no_remote:
+ printk("NFS: mount program didn't pass remote address!\n");
+ goto out_fail;
+
+out_miss_args:
+ printk("nfs_read_super: missing data argument\n");
+
+out_fail:
sb->s_dev = 0;
+ MOD_DEC_USE_COUNT;
return NULL;
}
@@ -312,14 +383,48 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle,
printk("nfs_fhget: iget failed\n");
return NULL;
}
- if (inode->i_dev == sb->s_dev) {
- if (inode->i_ino != fattr->fileid) {
- printk("nfs_fhget: unexpected inode from iget\n");
- return inode;
- }
- *NFS_FH(inode) = *fhandle;
- nfs_refresh_inode(inode, fattr);
+#ifdef NFS_PARANOIA
+if (inode->i_dev != sb->s_dev)
+printk("nfs_fhget: impossible\n");
+#endif
+
+ if (inode->i_ino != fattr->fileid) {
+ printk("nfs_fhget: unexpected inode from iget\n");
+ return inode;
}
+
+ /*
+ * Check whether the mode has been set, as we only want to
+ * do this once. (We don't allow inodes to change types.)
+ */
+ if (inode->i_mode == 0) {
+ inode->i_mode = fattr->mode;
+ if (S_ISREG(inode->i_mode))
+ inode->i_op = &nfs_file_inode_operations;
+ else if (S_ISDIR(inode->i_mode))
+ inode->i_op = &nfs_dir_inode_operations;
+ else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &nfs_symlink_inode_operations;
+ else if (S_ISCHR(inode->i_mode)) {
+ inode->i_op = &chrdev_inode_operations;
+ inode->i_rdev = to_kdev_t(fattr->rdev);
+ } else if (S_ISBLK(inode->i_mode)) {
+ inode->i_op = &blkdev_inode_operations;
+ inode->i_rdev = to_kdev_t(fattr->rdev);
+ } else if (S_ISFIFO(inode->i_mode))
+ init_fifo(inode);
+ else
+ inode->i_op = NULL;
+ /*
+ * Preset the size and mtime, as there's no need
+ * to invalidate the caches.
+ */
+ inode->i_size = fattr->size;
+ inode->i_mtime = fattr->mtime.seconds;
+ NFS_OLDMTIME(inode) = fattr->mtime.seconds;
+ }
+ *NFS_FH(inode) = *fhandle;
+ nfs_refresh_inode(inode, fattr);
dprintk("NFS: fhget(%x/%ld ct=%d)\n",
inode->i_dev, inode->i_ino,
inode->i_count);
@@ -334,6 +439,17 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
struct nfs_fattr fattr;
int error;
+ /*
+ * Make sure the inode is up-to-date.
+ */
+ error = nfs_revalidate(inode);
+ if (error) {
+#ifdef NFS_PARANOIA
+printk("nfs_notify_change: revalidate failed, error=%d\n", error);
+#endif
+ goto out;
+ }
+
sattr.mode = (u32) -1;
if (attr->ia_valid & ATTR_MODE)
sattr.mode = attr->ia_mode;
@@ -346,7 +462,6 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
if (attr->ia_valid & ATTR_GID)
sattr.gid = attr->ia_gid;
-
sattr.size = (u32) -1;
if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode))
sattr.size = attr->ia_size;
@@ -364,11 +479,25 @@ nfs_notify_change(struct inode *inode, struct iattr *attr)
}
error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode),
- &sattr, &fattr);
- if (!error) {
+ &sattr, &fattr);
+ if (error)
+ goto out;
+ /*
+ * If we changed the size or mtime, update the inode
+ * now to avoid invalidating the page cache.
+ */
+ if (sattr.size != (u32) -1) {
+ if (sattr.size != fattr.size)
+ printk("nfs_notify_change: sattr=%d, fattr=%d??\n",
+ sattr.size, fattr.size);
nfs_truncate_dirty_pages(inode, sattr.size);
- nfs_refresh_inode(inode, &fattr);
+ inode->i_size = sattr.size;
+ inode->i_mtime = fattr.mtime.seconds;
}
+ if (sattr.mtime.seconds != (u32) -1)
+ inode->i_mtime = fattr.mtime.seconds;
+ error = nfs_refresh_inode(inode, &fattr);
+out:
return error;
}
@@ -384,57 +513,154 @@ nfs_revalidate(struct inode *inode)
/*
* This function is called whenever some part of NFS notices that
* the cached attributes have to be refreshed.
- *
- * This is a bit tricky because we have to make sure all dirty pages
- * have been sent off to the server before calling invalidate_inode_pages.
- * To make sure no other process adds more write requests while we try
- * our best to flush them, we make them sleep during the attribute refresh.
- *
- * A very similar scenario holds for the dir cache.
*/
int
_nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
struct nfs_fattr fattr;
- int status;
+ int status = 0;
if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode))
- return 0;
+ goto out;
dfprintk(PAGECACHE, "NFS: revalidating %x/%ld inode\n",
inode->i_dev, inode->i_ino);
- NFS_READTIME(inode) = jiffies;
- if ((status = nfs_proc_getattr(server, NFS_FH(inode), &fattr)) < 0)
+ status = nfs_proc_getattr(server, NFS_FH(inode), &fattr);
+ if (status) {
+#ifdef NFS_PARANOIA
+printk("nfs_revalidate_inode: getattr failed, error=%d\n", status);
+#endif
goto done;
+ }
- nfs_refresh_inode(inode, &fattr);
- if (fattr.mtime.seconds != NFS_OLDMTIME(inode)) {
- if (!S_ISDIR(inode->i_mode)) {
- /* This sends off all dirty pages off to the server.
- * Note that this function must not sleep. */
- nfs_invalidate_pages(inode);
- invalidate_inode_pages(inode);
- } else {
- nfs_invalidate_dircache(inode);
- }
-
- NFS_OLDMTIME(inode) = fattr.mtime.seconds;
- NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
- } else {
+ status = nfs_refresh_inode(inode, &fattr);
+ if (status)
+ goto done;
+ if (fattr.mtime.seconds == NFS_OLDMTIME(inode)) {
/* Update attrtimeo value */
if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode))
NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode);
}
- status = 0;
+ NFS_OLDMTIME(inode) = fattr.mtime.seconds;
done:
dfprintk(PAGECACHE,
"NFS: inode %x/%ld revalidation complete (status %d).\n",
inode->i_dev, inode->i_ino, status);
+out:
return status;
}
/*
+ * Many nfs protocol calls return the new file attributes after
+ * an operation. Here we update the inode to reflect the state
+ * of the server's inode.
+ *
+ * This is a bit tricky because we have to make sure all dirty pages
+ * have been sent off to the server before calling invalidate_inode_pages.
+ * To make sure no other process adds more write requests while we try
+ * our best to flush them, we make them sleep during the attribute refresh.
+ *
+ * A very similar scenario holds for the dir cache.
+ */
+int
+nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int invalid = 0;
+ int error = -EIO;
+
+ dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n",
+ inode->i_dev, inode->i_ino, inode->i_count);
+
+ if (!inode || !fattr) {
+ printk("nfs_refresh_inode: inode or fattr is NULL\n");
+ goto out;
+ }
+ if (inode->i_ino != fattr->fileid) {
+ printk("nfs_refresh_inode: inode number mismatch\n");
+ goto out;
+ }
+
+ /*
+ * Make sure the inode's type hasn't changed.
+ */
+ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ goto out_changed;
+
+ /*
+ * If the size or mtime changed from outside, we want
+ * to invalidate the local caches immediately.
+ */
+ if (inode->i_size != fattr->size) {
+#ifdef NFS_DEBUG_VERBOSE
+printk("NFS: size change on %x/%ld\n", inode->i_dev, inode->i_ino);
+#endif
+ invalid = 1;
+ }
+ if (inode->i_mtime != fattr->mtime.seconds) {
+#ifdef NFS_DEBUG_VERBOSE
+printk("NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino);
+#endif
+ invalid = 1;
+ }
+
+ inode->i_mode = fattr->mode;
+ inode->i_nlink = fattr->nlink;
+ inode->i_uid = fattr->uid;
+ inode->i_gid = fattr->gid;
+
+ inode->i_size = fattr->size;
+ inode->i_blocks = fattr->blocks;
+ inode->i_atime = fattr->atime.seconds;
+ inode->i_mtime = fattr->mtime.seconds;
+ inode->i_ctime = fattr->ctime.seconds;
+ /*
+ * Update the read time so we don't revalidate too often.
+ */
+ NFS_READTIME(inode) = jiffies;
+ error = 0;
+ if (invalid)
+ goto out_invalid;
+out:
+ return error;
+
+out_changed:
+ /*
+ * Big trouble! The inode has become a different object.
+ */
+#ifdef NFS_PARANOIA
+printk("nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n",
+inode->i_ino, inode->i_mode, fattr->mode);
+#endif
+ fattr->mode = inode->i_mode; /* save mode */
+ make_bad_inode(inode);
+ inode->i_mode = fattr->mode; /* restore mode */
+ /*
+ * No need to worry about unhashing the dentry, as the
+ * lookup validation will know that the inode is bad.
+ * (But we fall through to invalidate the caches.)
+ */
+
+out_invalid:
+ /*
+ * Invalidate the local caches
+ */
+#ifdef NFS_DEBUG_VERBOSE
+printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages);
+#endif
+ if (!S_ISDIR(inode->i_mode)) {
+ /* This sends off all dirty pages off to the server.
+ * Note that this function must not sleep. */
+ nfs_invalidate_pages(inode);
+ invalidate_inode_pages(inode);
+ } else
+ nfs_invalidate_dircache(inode);
+ NFS_CACHEINV(inode);
+ NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+ goto out;
+}
+
+/*
* File system information
*/
static struct file_system_type nfs_fs_type = {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5eec5eb65..0311b7d0b 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -23,6 +23,7 @@
#include <linux/nfs_fs.h>
#define NFSDBG_FACILITY NFSDBG_XDR
+/* #define NFS_PARANOIA 1 */
#define QUADLEN(len) (((len) + 3) >> 2)
static int nfs_stat_to_errno(int stat);
@@ -371,17 +372,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
* to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
* After decoding, the layout in memory looks like this:
* entry1 entry2 ... entryN <space> stringN ... string2 string1
+ * Each entry consists of three __u32 values, the same space as NFS uses.
* Note that the strings are not null-terminated so that the entire number
* of entries returned by the server should fit into the buffer.
*/
static int
nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
{
- struct nfs_entry *entry;
struct iovec *iov = req->rq_rvec;
int status, nr, len;
- char *string;
+ char *string, *start;
u32 *end;
+ __u32 fileid, cookie, *entry;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
@@ -396,10 +398,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
end = (u32 *) ((u8 *) p + iov[1].iov_len);
/* Get start and end of dirent buffer */
- entry = (struct nfs_entry *) res->buffer;
+ entry = (__u32 *) res->buffer;
+ start = (char *) res->buffer;
string = (char *) res->buffer + res->bufsiz;
- for (nr = 0; *p++; nr++, entry++) {
- entry->fileid = ntohl(*p++);
+ for (nr = 0; *p++; nr++) {
+ fileid = ntohl(*p++);
len = ntohl(*p++);
if ((p + QUADLEN(len) + 3) > end) {
@@ -413,27 +416,36 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
return -errno_NFSERR_IO;
}
string -= len;
- if ((void *) (entry+1) > (void *) string) {
- /* This may actually happen because an nfs_entry
- * will take up more space than the XDR data. On
- * 32bit machines that's due to 8byte alignment,
- * on 64bit machines that's because the char * takes
- * up 2 longs.
- *
- * THIS IS BAD!
+ if ((void *) (entry+3) > (void *) string) {
+ /*
+ * This error is impossible as long as the temp
+ * buffer is no larger than the user buffer. The
+ * current packing algorithm uses the same amount
+ * of space in the user buffer as in the XDR data,
+ * so it's guaranteed to fit.
*/
- printk(KERN_NOTICE "NFS: should not happen in %s!\n",
+ printk("NFS: incorrect buffer size in %s!\n",
__FUNCTION__);
break;
}
- entry->name = string;
- entry->length = len;
memmove(string, p, len);
p += QUADLEN(len);
- entry->cookie = ntohl(*p++);
- entry->eof = !p[0] && p[1];
+ cookie = ntohl(*p++);
+ /*
+ * To make everything fit, we encode the length, offset,
+ * and eof flag into 32 bits. This works for filenames
+ * up to 32K and PAGE_SIZE up to 64K.
+ */
+ status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
+ *entry++ = fileid;
+ *entry++ = cookie;
+ *entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
}
+#ifdef NFS_PARANOIA
+printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
+nr, ((char *) entry - start), (start + res->bufsiz - string));
+#endif
return nr;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f48a6217c..94096d928 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -384,17 +384,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
* to avoid a malloc of NFS_MAXNAMLEN+1 for each file name.
* After decoding, the layout in memory looks like this:
* entry1 entry2 ... entryN <space> stringN ... string2 string1
+ * Each entry consists of three __u32 values, the same space as NFS uses.
* Note that the strings are not null-terminated so that the entire number
* of entries returned by the server should fit into the buffer.
*/
static int
nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
{
- struct nfs_entry *entry;
struct iovec *iov = req->rq_rvec;
int status, nr, len;
- char *string;
+ char *string, *start;
u32 *end;
+ __u32 fileid, cookie, *entry;
if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status);
@@ -413,10 +414,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
return -errno_NFSERR_IO;
}
- string = (char *) res->buffer + res->bufsiz;
- entry = (struct nfs_entry *) res->buffer;
- for (nr = 0; *p++; nr++, entry++) {
- entry->fileid = ntohl(*p++);
+ entry = (__u32 *) res->buffer;
+ start = (char *) res->buffer;
+ string = start + res->bufsiz;
+ for (nr = 0; *p++; nr++) {
+ fileid = ntohl(*p++);
len = ntohl(*p++);
if ((p + QUADLEN(len) + 3) > end) {
@@ -430,22 +432,40 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
return -errno_NFSERR_IO;
}
string -= len;
- if ((void *) (entry+1) > (void *) string) {
- dprintk("NFS: shouldnothappen in readdirres_decode!\n");
- break; /* should not happen */
+ if ((void *) (entry+3) > (void *) string) {
+ /*
+ * This error is impossible as long as the temp
+ * buffer is no larger than the user buffer. The
+ * current packing algorithm uses the same amount
+ * of space in the user buffer as in the XDR data,
+ * so it's guaranteed to fit.
+ */
+ printk("NFS: incorrect buffer size in %s!\n",
+ __FUNCTION__);
+ break;
}
- entry->name = string;
- entry->length = len;
memmove(string, p, len);
p += QUADLEN(len);
- entry->cookie = ntohl(*p++);
- entry->eof = !p[0] && p[1];
+ cookie = ntohl(*p++);
+ /*
+ * To make everything fit, we encode the length, offset,
+ * and eof flag into 32 bits. This works for filenames
+ * up to 32K and PAGE_SIZE up to 64K.
+ */
+ status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */
+ *entry++ = fileid;
+ *entry++ = cookie;
+ *entry++ = ((string - start) << 16) | status | (len & 0x7FFF);
/*
dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n",
- len, string, entry->cookie, entry->eof);
+ len, string, cookie, status);
*/
}
+#ifdef NFS_PARANOIA
+printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n",
+nr, ((char *) entry - start), (start + res->bufsiz - string));
+#endif
return nr;
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 58dcd95d0..416ed294e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -250,25 +250,43 @@ nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name)
*/
int
nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
- u32 cookie, unsigned int size, struct nfs_entry *entry)
+ u32 cookie, unsigned int size, __u32 *entry)
{
struct nfs_readdirargs arg;
struct nfs_readdirres res;
void * buffer;
+ unsigned int buf_size = PAGE_SIZE;
int status;
/* First get a temp buffer for the readdir reply */
- while (!(buffer = (void *) get_free_page(GFP_USER))) {
- need_resched = 1;
- schedule();
- if (signalled())
- return -ERESTARTSYS;
- }
+ /* N.B. does this really need to be cleared? */
+ status = -ENOMEM;
+ buffer = (void *) get_free_page(GFP_KERNEL);
+ if (!buffer)
+ goto out;
+
+ /*
+ * Calculate the effective size the buffer. To make sure
+ * that the returned data will fit into the user's buffer,
+ * we decrease the buffer size as necessary.
+ *
+ * Note: NFS returns three __u32 values for each entry,
+ * and we assume that the data is packed into the user
+ * buffer with the same efficiency.
+ */
+ if (size < buf_size)
+ buf_size = size;
+ if (server->rsize < buf_size)
+ buf_size = server->rsize;
+#if 0
+printk("nfs_proc_readdir: user size=%d, rsize=%d, buf_size=%d\n",
+size, server->rsize, buf_size);
+#endif
arg.fh = fhandle;
arg.cookie = cookie;
arg.buffer = buffer;
- arg.bufsiz = server->rsize < PAGE_SIZE? server->rsize : PAGE_SIZE;
+ arg.bufsiz = buf_size;
res.buffer = entry;
res.bufsiz = size;
@@ -276,6 +294,7 @@ nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle,
status = rpc_call(server->client, NFSPROC_READDIR, &arg, &res, 0);
dprintk("NFS reply readdir: %d\n", status);
free_page((unsigned long) buffer);
+out:
return status;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ec5a1f7be..97663cc11 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -68,6 +68,8 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+int check_failed_request(struct inode *);
+
static void nfs_wback_lock(struct rpc_task *task);
static void nfs_wback_result(struct rpc_task *task);
@@ -120,6 +122,7 @@ struct nfs_wreq {
* Limit number of delayed writes
*/
static int nr_write_requests = 0;
+static int nr_failed_requests = 0;
static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain");
struct nfs_wreq * nfs_failed_requests = NULL;
@@ -196,22 +199,44 @@ nfs_writepage_sync(struct inode *inode, struct page *page,
clear_bit(PG_uptodate, &page->flags);
goto io_error;
}
+ if (result != wsize)
+ printk("NFS: short write, wsize=%u, result=%d\n",
+ wsize, result);
refresh = 1;
buffer += wsize;
offset += wsize;
written += wsize;
count -= wsize;
+ /*
+ * If we've extended the file, update the inode
+ * now so we don't invalidate the cache.
+ */
+ if (offset > inode->i_size)
+ inode->i_size = offset;
} while (count);
io_error:
+ /* N.B. do we want to refresh if there was an error?? (fattr valid?) */
if (refresh) {
/* See comments in nfs_wback_result */
+ /* N.B. I don't think this is right -- sync writes in order */
if (fattr.size < inode->i_size)
fattr.size = inode->i_size;
+ if (fattr.mtime.seconds < inode->i_mtime)
+ printk("nfs_writepage_sync: prior time??\n");
/* Solaris 2.5 server seems to send garbled
* fattrs occasionally */
- if (inode->i_ino == fattr.fileid)
+ if (inode->i_ino == fattr.fileid) {
+ /*
+ * We expect the mtime value to change, and
+ * don't want to invalidate the caches.
+ */
+ inode->i_mtime = fattr.mtime.seconds;
nfs_refresh_inode(inode, &fattr);
+ }
+ else
+ printk("nfs_writepage_sync: inode %ld, got %u?\n",
+ inode->i_ino, fattr.fileid);
}
nfs_unlock_page(page);
@@ -260,28 +285,73 @@ find_write_request(struct inode *inode, struct page *page)
/*
* Find a failed write request by pid
*/
-static inline struct nfs_wreq *
+static struct nfs_wreq *
find_failed_request(struct inode *inode, pid_t pid)
{
struct nfs_wreq *head, *req;
- if (!(req = head = nfs_failed_requests))
- return NULL;
- do {
- if (req->wb_inode == inode && req->wb_pid == pid)
+ req = head = nfs_failed_requests;
+ while (req != NULL) {
+ if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid))
return req;
- } while ((req = WB_NEXT(req)) != head);
+ if ((req = WB_NEXT(req)) == head)
+ break;
+ }
return NULL;
}
/*
+ * Add a request to the failed list.
+ */
+static void
+append_failed_request(struct nfs_wreq * req)
+{
+ static int old_max = 16;
+
+ append_write_request(&nfs_failed_requests, req);
+ nr_failed_requests++;
+ if (nr_failed_requests >= old_max) {
+ printk("NFS: %d failed requests\n", nr_failed_requests);
+ old_max = old_max << 1;
+ }
+}
+
+/*
+ * Remove a request from the failed list and free it.
+ */
+static void
+remove_failed_request(struct nfs_wreq * req)
+{
+ remove_write_request(&nfs_failed_requests, req);
+ kfree(req);
+ nr_failed_requests--;
+}
+
+/*
+ * Find and release all failed requests for this inode.
+ */
+int
+check_failed_request(struct inode * inode)
+{
+ struct nfs_wreq * req;
+ int found = 0;
+
+ while ((req = find_failed_request(inode, 0)) != NULL) {
+ remove_failed_request(req);
+ found++;
+ }
+ return found;
+}
+
+/*
* Try to merge adjacent write requests. This works only for requests
* issued by the same user.
*/
static inline int
-update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes)
+update_write_request(struct nfs_wreq *req, unsigned int first,
+ unsigned int bytes)
{
- unsigned rqfirst = req->wb_offset,
+ unsigned int rqfirst = req->wb_offset,
rqlast = rqfirst + req->wb_bytes,
last = first + bytes;
@@ -313,7 +383,7 @@ update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes)
*/
static inline struct nfs_wreq *
create_write_request(struct inode *inode, struct page *page,
- unsigned offset, unsigned bytes)
+ unsigned int offset, unsigned int bytes)
{
struct nfs_wreq *wreq;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -327,7 +397,7 @@ create_write_request(struct inode *inode, struct page *page,
wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER);
if (!wreq)
- return NULL;
+ goto out_fail;
memset(wreq, 0, sizeof(*wreq));
task = &wreq->wb_task;
@@ -336,11 +406,8 @@ create_write_request(struct inode *inode, struct page *page,
task->tk_action = nfs_wback_lock;
rpcauth_lookupcred(task); /* Obtain user creds */
- if (task->tk_status < 0) {
- rpc_release_task(task);
- kfree(wreq);
- return NULL;
- }
+ if (task->tk_status < 0)
+ goto out_req;
/* Put the task on inode's writeback request list. */
wreq->wb_inode = inode;
@@ -357,6 +424,12 @@ create_write_request(struct inode *inode, struct page *page,
rpc_wake_up_next(&write_queue);
return wreq;
+
+out_req:
+ rpc_release_task(task);
+ kfree(wreq);
+out_fail:
+ return NULL;
}
/*
@@ -423,7 +496,9 @@ wait_on_write_request(struct nfs_wreq *req)
}
remove_wait_queue(&page->wait, &wait);
current->state = TASK_RUNNING;
- atomic_dec(&page->count);
+ if (atomic_read(&page->count) == 1)
+ printk("NFS: page unused while waiting\n");
+ free_page(page_address(page));
return retval;
}
@@ -487,12 +562,13 @@ nfs_updatepage(struct inode *inode, struct page *page, const char *buffer,
}
/* Create the write request. */
- if (!(req = create_write_request(inode, page, offset, count))) {
- status = -ENOBUFS;
+ status = -ENOBUFS;
+ req = create_write_request(inode, page, offset, count);
+ if (!req)
goto done;
- }
/* Copy data to page buffer. */
+ /* N.B. should check for fault here ... */
copy_from_user(page_addr + offset, buffer, count);
/* Schedule request */
@@ -519,6 +595,7 @@ done:
transfer_page_lock(req);
/* rpc_execute(&req->wb_task); */
if (sync) {
+ /* N.B. if signalled, result not ready? */
wait_on_write_request(req);
if ((count = nfs_write_error(inode)) < 0)
status = count;
@@ -578,10 +655,20 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
if (rqoffset < end && offset < rqend
&& (pid == 0 || req->wb_pid == pid)) {
- if (!WB_HAVELOCK(req))
+ if (!WB_HAVELOCK(req)) {
+#ifdef NFS_PARANOIA
+printk("nfs_flush: flushing inode=%ld, %d @ %lu\n",
+req->wb_inode->i_ino, req->wb_bytes, rqoffset);
+#endif
nfs_flush_request(req);
+ }
last = req;
}
+ } else {
+#ifdef NFS_PARANOIA
+printk("nfs_flush_pages: in progress inode=%ld, %d @ %lu\n",
+req->wb_inode->i_ino, req->wb_bytes, rqoffset);
+#endif
}
if (invalidate)
req->wb_flags |= NFS_WRITE_INVALIDATE;
@@ -593,7 +680,11 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len,
}
/*
- * Cancel all writeback requests, both pending and in process.
+ * Cancel all writeback requests, both pending and in progress.
+ *
+ * N.B. This doesn't seem to wake up the tasks -- are we sure
+ * they will eventually complete? Also, this could overwrite a
+ * failed status code from an already-completed task.
*/
static void
nfs_cancel_dirty(struct inode *inode, pid_t pid)
@@ -602,7 +693,8 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid)
req = head = NFS_WRITEBACK(inode);
while (req != NULL) {
- if (req->wb_pid == pid) {
+ /* N.B. check for task already finished? */
+ if (pid == 0 || req->wb_pid == pid) {
req->wb_flags |= NFS_WRITE_CANCELLED;
rpc_exit(&req->wb_task, 0);
}
@@ -620,36 +712,43 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid)
* this isn't used by the nlm module yet.
*/
int
-nfs_flush_dirty_pages(struct inode *inode, off_t offset, off_t len)
+nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len)
{
struct nfs_wreq *last = NULL;
+ int result = 0, cancel = 0;
dprintk("NFS: flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n",
inode->i_dev, inode->i_ino, current->pid,
offset, len);
- if (IS_SOFT && signalled())
- nfs_cancel_dirty(inode, current->pid);
+ if (IS_SOFT && signalled()) {
+ nfs_cancel_dirty(inode, pid);
+ cancel = 1;
+ }
for (;;) {
- if (IS_SOFT && signalled())
- return -ERESTARTSYS;
+ if (IS_SOFT && signalled()) {
+ if (!cancel)
+ nfs_cancel_dirty(inode, pid);
+ result = -ERESTARTSYS;
+ break;
+ }
- /* Flush all pending writes for this pid and file region */
- last = nfs_flush_pages(inode, current->pid, offset, len, 0);
+ /* Flush all pending writes for the pid and file region */
+ last = nfs_flush_pages(inode, pid, offset, len, 0);
if (last == NULL)
break;
wait_on_write_request(last);
}
- return 0;
+ return result;
}
/*
* Flush out any pending write requests and flag that they be discarded
* after the write is complete.
*
- * This function is called from nfs_revalidate_inode just before it calls
+ * This function is called from nfs_refresh_inode just before it calls
* invalidate_inode_pages. After nfs_flush_pages returns, we can be sure
* that all dirty pages are locked, so that invalidate_inode_pages does
* not throw away any dirty pages.
@@ -705,15 +804,14 @@ nfs_check_error(struct inode *inode)
dprintk("nfs: checking for write error inode %04x/%ld\n",
inode->i_dev, inode->i_ino);
- if (!(req = find_failed_request(inode, current->pid)))
- return 0;
-
- dprintk("nfs: write error %d inode %04x/%ld\n",
+ req = find_failed_request(inode, current->pid);
+ if (req) {
+ dprintk("nfs: write error %d inode %04x/%ld\n",
req->wb_task.tk_status, inode->i_dev, inode->i_ino);
- status = req->wb_task.tk_status;
- remove_write_request(&nfs_failed_requests, req);
- kfree(req);
+ status = req->wb_task.tk_status;
+ remove_failed_request(req);
+ }
return status;
}
@@ -789,36 +887,45 @@ nfs_wback_result(struct rpc_task *task)
page = req->wb_page;
status = task->tk_status;
- /* Remove request from writeback list and wake up tasks
- * sleeping on it. */
- remove_write_request(&NFS_WRITEBACK(inode), req);
-
if (status < 0) {
/*
* An error occurred. Report the error back to the
- * application by adding the failed request to the
- * inode's error list.
+ * application by adding the request to the failed
+ * requests list.
*/
- if (find_failed_request(inode, req->wb_pid)) {
+ if (find_failed_request(inode, req->wb_pid))
status = 0;
- } else {
- dprintk("NFS: %4d saving write failure code\n",
- task->tk_pid);
- append_write_request(&nfs_failed_requests, req);
- }
clear_bit(PG_uptodate, &page->flags);
} else if (!WB_CANCELLED(req)) {
+ struct nfs_fattr *fattr = req->wb_fattr;
/* Update attributes as result of writeback.
* Beware: when UDP replies arrive out of order, we
* may end up overwriting a previous, bigger file size.
*/
- if (req->wb_fattr->size < inode->i_size)
- req->wb_fattr->size = inode->i_size;
- /* possible Solaris 2.5 server bug workaround */
- if (inode->i_ino == req->wb_fattr->fileid)
- nfs_refresh_inode(inode, req->wb_fattr);
+ if (fattr->mtime.seconds >= inode->i_mtime) {
+ if (fattr->size < inode->i_size)
+ fattr->size = inode->i_size;
+
+ /* possible Solaris 2.5 server bug workaround */
+ if (inode->i_ino == fattr->fileid) {
+ /*
+ * We expect these values to change, and
+ * don't want to invalidate the caches.
+ */
+ inode->i_size = fattr->size;
+ inode->i_mtime = fattr->mtime.seconds;
+ nfs_refresh_inode(inode, fattr);
+ }
+ else
+ printk("nfs_wback_result: inode %ld, got %u?\n",
+ inode->i_ino, fattr->fileid);
+ }
}
+ /*
+ * This call might block, so we defer removing the request
+ * from the inode's writeback list.
+ */
rpc_release_task(task);
if (WB_INVALIDATE(req))
@@ -830,8 +937,20 @@ nfs_wback_result(struct rpc_task *task)
kfree(req->wb_args);
req->wb_args = 0;
}
+
+ /*
+ * Now it's safe to remove the request from the inode's
+ * writeback list and wake up any tasks sleeping on it.
+ * If the request failed, add it to the failed list.
+ */
+ remove_write_request(&NFS_WRITEBACK(inode), req);
+
if (status >= 0)
kfree(req);
+ else {
+ dprintk("NFS: %4d saving write failure code\n", task->tk_pid);
+ append_failed_request(req);
+ }
free_page(page_address(page));
nr_write_requests--;