diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1997-12-06 23:51:34 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1997-12-06 23:51:34 +0000 |
commit | 230e5ab6a084ed50470f101934782dbf54b0d06b (patch) | |
tree | 5dd821c8d33f450470588e7a543f74bf74306e9e /fs/nfs | |
parent | c9b1c8a64c6444d189856f1e26bdcb8b4cd0113a (diff) |
Merge with Linux 2.1.67.
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/dir.c | 858 | ||||
-rw-r--r-- | fs/nfs/file.c | 112 | ||||
-rw-r--r-- | fs/nfs/inode.c | 384 | ||||
-rw-r--r-- | fs/nfs/nfs2xdr.c | 48 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 48 | ||||
-rw-r--r-- | fs/nfs/proc.c | 35 | ||||
-rw-r--r-- | fs/nfs/write.c | 231 |
7 files changed, 1195 insertions, 521 deletions
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index acee50754..9e1d936dd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -29,6 +29,13 @@ #include <asm/segment.h> /* for fs functions */ +#define NFS_MAX_AGE 10*HZ /* max age for dentry validation */ + +/* needed by smbfs as well ... move to dcache? */ +extern void nfs_renew_times(struct dentry *); + +#define NFS_PARANOIA 1 + /* * Head for a dircache entry. Currently still very simple; when * the cache grows larger, we will need a LRU list. @@ -36,18 +43,20 @@ struct nfs_dirent { dev_t dev; /* device number */ ino_t ino; /* inode number */ - u32 cookie; /* cooke of first entry */ + u32 cookie; /* cookie of first entry */ unsigned short valid : 1, /* data is valid */ locked : 1; /* entry locked */ unsigned int size; /* # of entries */ unsigned long age; /* last used */ unsigned long mtime; /* last attr stamp */ struct wait_queue * wait; - struct nfs_entry * entry; + __u32 * entry; /* three __u32's per entry */ }; -static int nfs_dir_open(struct inode * inode, struct file * file); -static long nfs_dir_read(struct inode *, struct file *, char *, unsigned long); +static int nfs_safe_remove(struct dentry *); + +static int nfs_dir_open(struct inode *, struct file *); +static ssize_t nfs_dir_read(struct file *, char *, size_t, loff_t *); static int nfs_readdir(struct file *, void *, filldir_t); static int nfs_lookup(struct inode *, struct dentry *); static int nfs_create(struct inode *, struct dentry *, int); @@ -57,7 +66,8 @@ static int nfs_unlink(struct inode *, struct dentry *); static int nfs_symlink(struct inode *, struct dentry *, const char *); static int nfs_link(struct inode *, struct inode *, struct dentry *); static int nfs_mknod(struct inode *, struct dentry *, int, int); -static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +static int nfs_rename(struct inode *, struct dentry *, + struct inode *, struct dentry *); static struct file_operations nfs_dir_operations = { NULL, /* lseek - default */ @@ -102,8 +112,8 @@ nfs_dir_open(struct inode *dir, struct file *file) return nfs_revalidate_inode(NFS_SERVER(dir), dir); } -static long -nfs_dir_read(struct inode *inode, struct file *filp, char *buf, unsigned long count) +static ssize_t +nfs_dir_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { return -EISDIR; } @@ -123,15 +133,16 @@ static struct nfs_dirent dircache[NFS_MAX_DIRCACHE]; static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { + struct inode *inode = filp->f_dentry->d_inode; static struct wait_queue *readdir_wait = NULL; struct wait_queue **waitp = NULL; struct nfs_dirent *cache, *free; - struct nfs_entry *entry; unsigned long age, dead; u32 cookie; int ismydir, result; int i, j, index = 0; - struct inode *inode = filp->f_dentry->d_inode; + __u32 *entry; + char *name, *start; dfprintk(VFS, "NFS: nfs_readdir(%x/%ld)\n", inode->i_dev, inode->i_ino); if (!inode || !S_ISDIR(inode->i_mode)) { @@ -148,7 +159,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) again: if (waitp) { interruptible_sleep_on(waitp); - if (current->signal & ~current->blocked) + if (signal_pending(current)) return -ERESTARTSYS; waitp = NULL; } @@ -174,7 +185,7 @@ again: goto again; } - if (ismydir && cache->mtime != NFS_OLDMTIME(inode)) + if (ismydir && cache->mtime != inode->i_mtime) cache->valid = 0; if (!cache->valid || cache->age < dead) { @@ -194,17 +205,15 @@ again: break; } for (j = 0; j < cache->size; j++) { - /* - dprintk("NFS: examing entry %.*s @%d\n", - (int) cache->entry[j].length, - cache->entry[j].name, - cache->entry[j].cookie); - */ - if (cache->entry[j].cookie != cookie) + __u32 *this_ent = cache->entry + j*3; + + if (*(this_ent+1) != cookie) continue; if (j < cache->size - 1) { - entry = cache->entry + (index = j + 1); - } else if (cache->entry[j].eof) { + index = j + 1; + entry = this_ent + 3; + } else if (*(this_ent+2) & (1 << 15)) { + /* eof */ return 0; } break; @@ -235,12 +244,10 @@ again: cache->dev = inode->i_dev; cache->ino = inode->i_ino; if (!cache->entry) { - cache->entry = (struct nfs_entry *) - get_free_page(GFP_KERNEL); - if (!cache->entry) { - result = -ENOMEM; + result = -ENOMEM; + cache->entry = (__u32 *) get_free_page(GFP_KERNEL); + if (!cache->entry) goto done; - } } result = nfs_proc_readdir(NFS_SERVER(inode), NFS_FH(inode), @@ -251,31 +258,35 @@ again: cache->valid = 1; entry = cache->entry + (index = 0); } - cache->mtime = NFS_OLDMTIME(inode); + cache->mtime = inode->i_mtime; cache->age = jiffies; /* * Yowza! We have a cache entry... */ + start = (char *) cache->entry; while (index < cache->size) { - int nextpos = entry->cookie; + __u32 fileid = *entry++; + __u32 nextpos = *entry++; /* cookie */ + __u32 length = *entry++; /* + * Unpack the eof flag, offset, and length + */ + result = length & (1 << 15); /* eof flag */ + name = start + ((length >> 16) & 0xFFFF); + length &= 0x7FFF; + /* dprintk("NFS: filldir(%p, %.*s, %d, %d, %x, eof %x)\n", entry, - (int) entry->length, entry->name, entry->length, + (int) length, name, length, (unsigned int) filp->f_pos, - entry->fileid, entry->eof); + fileid, result); */ - if (filldir(dirent, entry->name, entry->length, cookie, entry->fileid) < 0) + if (filldir(dirent, name, length, cookie, fileid) < 0) break; cookie = nextpos; - if (nextpos != entry->cookie) { - printk("nfs_readdir: shouldn't happen!\n"); - break; - } index++; - entry++; } filp->f_pos = cookie; result = 0; @@ -293,47 +304,69 @@ done: } /* - * Invalidate dircache entries for inode + * Invalidate dircache entries for an inode. */ void nfs_invalidate_dircache(struct inode *inode) { - struct nfs_dirent *cache; + struct nfs_dirent *cache = dircache; dev_t dev = inode->i_dev; ino_t ino = inode->i_ino; int i; dfprintk(DIRCACHE, "NFS: invalidate dircache for %x/%ld\n", dev, (long)ino); - for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) { - if (!cache->locked && cache->dev == dev && cache->ino == ino) - cache->valid = 0; /* brute force */ + for (i = NFS_MAX_DIRCACHE; i--; cache++) { + if (cache->ino != ino) + continue; + if (cache->dev != dev) + continue; + if (cache->locked) { + printk("NFS: cache locked for %s/%ld\n", + kdevname(dev), (long) ino); + continue; + } + cache->valid = 0; /* brute force */ } } /* - * Free directory cache memory - * Called from cleanup_module + * Invalidate the dircache for a super block (or all caches), + * and release the cache memory. */ void -nfs_free_dircache(void) +nfs_invalidate_dircache_sb(struct super_block *sb) { - struct nfs_dirent *cache; + struct nfs_dirent *cache = dircache; int i; - dfprintk(DIRCACHE, "NFS: freeing dircache\n"); - for (i = 0, cache = dircache; i < NFS_MAX_DIRCACHE; i++, cache++) { - cache->valid = 0; + for (i = NFS_MAX_DIRCACHE; i--; cache++) { + if (sb && sb->s_dev != cache->dev) + continue; if (cache->locked) { - printk("nfs_kfree_cache: locked entry in dircache!\n"); + printk("NFS: cache locked at umount %s\n", + (cache->entry ? "(lost a page!)" : "")); continue; } - if (cache->entry) + cache->valid = 0; /* brute force */ + if (cache->entry) { free_page((unsigned long) cache->entry); - cache->entry = NULL; + cache->entry = NULL; + } } } /* + * Free directory cache memory + * Called from cleanup_module + */ +void +nfs_free_dircache(void) +{ + dfprintk(DIRCACHE, "NFS: freeing dircache\n"); + nfs_invalidate_dircache_sb(NULL); +} + +/* * This is called every time the dcache has a lookup hit, * and we should check whether we can really trust that * lookup. @@ -350,20 +383,92 @@ static int nfs_lookup_revalidate(struct dentry * dentry) unsigned long time = jiffies - dentry->d_time; unsigned long max = 5*HZ; - if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) - max = 10*HZ; - return time < max; + if (dentry->d_inode) { + if (is_bad_inode(dentry->d_inode)) { +#ifdef NFS_PARANOIA +printk("nfs_lookup_validate: %s/%s has dud inode\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + goto bad; + } + if (S_ISDIR(dentry->d_inode->i_mode)) + max = NFS_MAX_AGE; + } + + return (time < max) || IS_ROOT(dentry); +bad: + return 0; } -static void nfs_silly_delete(struct dentry *); +/* + * This is called from dput() when d_count is going to 0. + * We use it to clean up silly-renamed files, and to check + * for dentries that have already expired. + */ +static void nfs_dentry_delete(struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + int error; + + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; +#ifdef NFS_DEBUG +printk("nfs_dentry_delete: unlinking %s/%s\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + /* Unhash it first */ + d_drop(dentry); + error = nfs_safe_remove(dentry); + if (error) + printk("NFS: can't silly-delete %s/%s, error=%d\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, error); + } + /* + * Check whether to expire the dentry ... + */ + else { + unsigned long age = jiffies - dentry->d_time; + if (age > NFS_MAX_AGE) + d_drop(dentry); + } + +#ifdef NFS_PARANOIA + /* + * Sanity check: if the dentry has been unhashed and the + * inode still has users, we could have problems ... + */ + if (list_empty(&dentry->d_hash) && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + if (inode->i_count > 1) { +printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, +inode->i_ino, inode->i_count, inode->i_nlink); + } + } +#endif +} static struct dentry_operations nfs_dentry_operations = { - nfs_lookup_revalidate, + nfs_lookup_revalidate, /* d_validate(struct dentry *) */ 0, /* d_hash */ 0, /* d_compare */ - nfs_silly_delete, + nfs_dentry_delete /* d_delete(struct dentry *) */ }; +/* + * Whenever a lookup succeeds, we know the parent directories + * are all valid, so we want to update the dentry timestamps. + */ +void nfs_renew_times(struct dentry * dentry) +{ + for (;;) { + dentry->d_time = jiffies; + if (dentry == dentry->d_parent) + break; + dentry = dentry->d_parent; + } +} + static int nfs_lookup(struct inode *dir, struct dentry * dentry) { struct inode *inode; @@ -373,38 +478,66 @@ static int nfs_lookup(struct inode *dir, struct dentry * dentry) int error; dfprintk(VFS, "NFS: lookup(%x/%ld, %.*s)\n", - dir->i_dev, dir->i_ino, len, dentry->d_name.name); + dir->i_dev, dir->i_ino, len, dentry->d_name.name); if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_lookup: inode is NULL or not a directory\n"); return -ENOENT; } + error = -ENAMETOOLONG; if (len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; - - error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name, &fhandle, &fattr); + goto out; + error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dir), + dentry->d_name.name, &fhandle, &fattr); inode = NULL; + if (error == -ENOENT) + goto no_entry; if (!error) { + error = -EACCES; inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (!inode) - return -EACCES; - } else if (error != -ENOENT) - return error; - - dentry->d_time = jiffies; - dentry->d_op = &nfs_dentry_operations; - d_add(dentry, inode); - return 0; + if (inode) { + no_entry: + dentry->d_op = &nfs_dentry_operations; + d_add(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + } +out: + return error; } +/* + * Code common to create, mkdir, and mknod. + */ +static int nfs_instantiate(struct inode *dir, struct dentry *dentry, + struct nfs_fattr *fattr, struct nfs_fh *fhandle) +{ + struct inode *inode; + int error = -EACCES; + + inode = nfs_fhget(dir->i_sb, fhandle, fattr); + if (inode) { + d_instantiate(dentry, inode); + nfs_renew_times(dentry); + error = 0; + } + return error; +} + +/* + * Following a failed create operation, we drop the dentry rather + * than retain a negative dentry. This avoids a problem in the event + * that the operation succeeded on the server, but an error in the + * reply path made it appear to have failed. + */ static int nfs_create(struct inode *dir, struct dentry * dentry, int mode) { struct nfs_sattr sattr; struct nfs_fattr fattr; struct nfs_fh fhandle; - struct inode *inode; int error; dfprintk(VFS, "NFS: create(%x/%ld, %s\n", @@ -415,33 +548,41 @@ static int nfs_create(struct inode *dir, struct dentry * dentry, int mode) return -ENOENT; } + error = -ENAMETOOLONG; if (dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; + goto out; sattr.mode = mode; sattr.uid = sattr.gid = sattr.size = (unsigned) -1; sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; - error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), - dentry->d_name.name, &sattr, &fhandle, &fattr); - - if (error) - return error; - - inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (!inode) - return -EACCES; + /* + * Invalidate the dir cache before the operation to avoid a race. + */ nfs_invalidate_dircache(dir); - d_instantiate(dentry, inode); - return 0; + error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), + dentry->d_name.name, &sattr, &fhandle, &fattr); + if (!error) + error = nfs_instantiate(dir, dentry, &fattr, &fhandle); + else { +#ifdef NFS_PARANOIA +printk("nfs_create: %s/%s failed, error=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, error); +#endif + d_drop(dentry); + } +out: + return error; } +/* + * See comments for nfs_proc_create regarding failed operations. + */ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev) { struct nfs_sattr sattr; struct nfs_fattr fattr; struct nfs_fh fhandle; - struct inode *inode; int error; dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n", @@ -459,29 +600,31 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde sattr.uid = sattr.gid = sattr.size = (unsigned) -1; if (S_ISCHR(mode) || S_ISBLK(mode)) sattr.size = rdev; /* get out your barf bag */ - sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; - error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), - dentry->d_name.name, &sattr, &fhandle, &fattr); - - if (error) - return error; - - inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (!inode) - return -EACCES; nfs_invalidate_dircache(dir); - d_instantiate(dentry, inode); - return 0; + error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dir), + dentry->d_name.name, &sattr, &fhandle, &fattr); + if (!error) + error = nfs_instantiate(dir, dentry, &fattr, &fhandle); + else { +#ifdef NFS_PARANOIA +printk("nfs_mknod: %s/%s failed, error=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, error); +#endif + d_drop(dentry); + } + return error; } +/* + * See comments for nfs_proc_create regarding failed operations. + */ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct nfs_sattr sattr; struct nfs_fattr fattr; struct nfs_fh fhandle; - struct inode * inode; int error; dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n", @@ -499,21 +642,29 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) sattr.uid = sattr.gid = sattr.size = (unsigned) -1; sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; - error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir), - dentry->d_name.name, &sattr, &fhandle, &fattr); - - if (error) - return error; - - inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (!inode) - return -EACCES; - nfs_invalidate_dircache(dir); - d_instantiate(dentry, inode); - return 0; + error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir), + dentry->d_name.name, &sattr, &fhandle, &fattr); + if (!error) + error = nfs_instantiate(dir, dentry, &fattr, &fhandle); + else { +#ifdef NFS_PARANOIA +printk("nfs_mkdir: %s/%s failed, error=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, error); +#endif + d_drop(dentry); + } + return error; } +/* + * To avoid retaining a stale inode reference, we check the dentry + * use count prior to the operation, and return EBUSY if it has + * multiple users. + * + * Update inode->i_nlink immediately after a successful operation. + * (See comments for nfs_unlink.) + */ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; @@ -526,16 +677,28 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOENT; } + error = -ENAMETOOLONG; if (dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; - + goto out; + + error = -EBUSY; + if (dentry->d_count > 1) { + /* Attempt to shrink child dentries ... */ + shrink_dcache_parent(dentry); + if (dentry->d_count > 1) + goto out; + } + /* Drop the dentry to force a new lookup */ + d_drop(dentry); error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name); - if (error) - return error; - - nfs_invalidate_dircache(dir); - d_delete(dentry); - return 0; + if (!error) { + if (dentry->d_inode->i_nlink) + dentry->d_inode->i_nlink --; + nfs_invalidate_dircache(dir); + nfs_renew_times(dentry); + } +out: + return error; } @@ -551,24 +714,22 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) * * Concerning my choice of the temp name: it is just nice to have * i_ino part of the temp name, as this offers another check whether - * somebody attempts to remove the "silly renamed" dentry - * itself. Which is something that I consider evil. Your opinion may - * vary. + * somebody attempts to remove the "silly renamed" dentry itself. + * Which is something that I consider evil. Your opinion may vary. * BUT: * Now that I compute the hash value right, it should be possible to simply * check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of * doing the string compare. * WHICH MEANS: * This offers the opportunity to shorten the temp name. Currently, I use - * the hex representation of i_ino + the hex value of jiffies. This - * sums up to as much as 36 characters for a 64 bit machine, and needs - * 20 chars on a 32 bit machine. Have a look at jiffiesize etc. + * the hex representation of i_ino + an event counter. This sums up to + * as much as 36 characters for a 64 bit machine, and needs 20 chars on + * a 32 bit machine. * QUINTESSENCE * The use of i_ino is simply cosmetic. All we need is a unique temp - * file name for the .nfs files. The hex representation of "jiffies" - * seemed to be adequate. And as we retry in case such a file already - * exists we are guaranteed to succed (after some jiffies have passed - * by :) + * file name for the .nfs files. The event counter seemed to be adequate. + * And as we retry in case such a file already exists, we are guaranteed + * to succeed. */ static @@ -576,14 +737,11 @@ struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen) { struct qstr sqstr; struct dentry *sdentry; - int i, error; + int error; sqstr.name = silly; sqstr.len = slen; - sqstr.hash = init_name_hash(); - for (i= 0; i < slen; i++) - sqstr.hash = partial_name_hash(silly[i], sqstr.hash); - sqstr.hash = end_name_hash(sqstr.hash); + sqstr.hash = full_name_hash(silly, slen); sdentry = d_lookup(parent, &sqstr); if (!sdentry) { sdentry = d_alloc(parent, &sqstr); @@ -605,19 +763,29 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) const int countersize = sizeof(sillycounter)*2; const int slen = strlen(".nfs") + i_inosize + countersize; char silly[slen+1]; - int error; struct dentry *sdentry; + int error = -EIO; + /* + * Note that a silly-renamed file can be deleted once it's + * no longer in use -- it's just an ordinary file now. + */ if (dentry->d_count == 1) { - return -EIO; /* No need to silly rename. */ + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + goto out; /* No need to silly rename. */ } - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - return -EBUSY; /* don't allow to unlink silly inode -- nope, - * think a bit: silly DENTRY, NOT inode -- - * itself - */ - } +#ifdef NFS_PARANOIA +if (!dentry->d_inode) +printk("NFS: silly-renaming %s/%s, negative dentry??\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + /* + * We don't allow a dentry to be silly-renamed twice. + */ + error = -EBUSY; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + goto out; sprintf(silly, ".nfs%*.*lx", i_inosize, i_inosize, dentry->d_inode->i_ino); @@ -634,54 +802,109 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) dentry->d_name.name, silly); sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen); - if (IS_ERR(sdentry)) { - return -EIO; /* FIXME ? */ - } + /* + * N.B. Better to return EBUSY here ... it could be + * dangerous to delete the file while it's in use. + */ + if (IS_ERR(sdentry)) + goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ error = nfs_proc_rename(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name, NFS_FH(dir), silly); - if (error) { - dput(sdentry); - return error; + if (!error) { + nfs_invalidate_dircache(dir); + nfs_renew_times(dentry); + d_move(dentry, sdentry); + dentry->d_flags |= DCACHE_NFSFS_RENAMED; + /* If we return 0 we don't unlink */ } - nfs_invalidate_dircache(dir); - d_move(dentry, sdentry); dput(sdentry); - dentry->d_flags |= DCACHE_NFSFS_RENAMED; - - return 0; /* don't unlink */ +out: + return error; } -static void nfs_silly_delete(struct dentry *dentry) +/* + * Remove a file after making sure there are no pending writes, + * and after checking that the file has only one user. + * + * Updating inode->i_nlink here rather than waiting for the next + * nfs_refresh_inode() is not merely cosmetic; once an object has + * been deleted, we want to get rid of the inode locally. The NFS + * server may reuse the fileid for a new inode, and we don't want + * that to be confused with this inode. + */ +static int nfs_safe_remove(struct dentry *dentry) { - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - struct inode *dir = dentry->d_parent->d_inode; - int error; + struct inode *dir = dentry->d_parent->d_inode; + struct inode *inode = dentry->d_inode; + int error, rehash = 0; - dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + error = -EBUSY; + if (inode) { + if (NFS_WRITEBACK(inode)) { + nfs_flush_dirty_pages(inode, 0, 0, 0); + if (NFS_WRITEBACK(inode)) { +#ifdef NFS_PARANOIA +printk("nfs_safe_remove: %s/%s writes pending, d_count=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count); +#endif + goto out; + } + } + } else { +#ifdef NFS_PARANOIA +printk("nfs_safe_remove: %s/%s already negative??\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + } - /* Unhash it first */ + if (dentry->d_count > 1) { +#ifdef NFS_PARANOIA +printk("nfs_safe_remove: %s/%s busy, d_count=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count); +#endif + goto out; + } + /* + * Unhash the dentry while we remove the file ... + */ + if (!list_empty(&dentry->d_hash)) { d_drop(dentry); - dfprintk(VFS, "trying to unlink %s\n", dentry->d_name.name); - error = nfs_proc_remove(NFS_SERVER(dir), + rehash = 1; + } + error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), dentry->d_name.name); - if (error < 0) - printk("NFS " __FUNCTION__ " failed (err = %d)\n", - -error); - dentry->d_inode->i_nlink --; + /* + * ... then restore the hashed state. This ensures that the + * dentry can't become busy after having its file deleted. + */ + if (rehash) { + d_add(dentry, inode); + } +#ifdef NFS_PARANOIA +if (dentry->d_count > 1) +printk("nfs_safe_remove: %s/%s busy after delete?? d_count=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count); +if (inode && inode->i_count > 1) +printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d\n", +dentry->d_parent->d_name.name, dentry->d_name.name, inode->i_count); +#endif + if (!error) { nfs_invalidate_dircache(dir); + if (inode && inode->i_nlink) + inode->i_nlink --; + d_delete(dentry); } +out: + return error; } /* We do silly rename. In case sillyrename() returns -EBUSY, the inode * belongs to an active ".nfs..." file and we return -EBUSY. * * If sillyrename() returns 0, we do nothing, otherwise we unlink. - * - * inode->i_nlink is updated here rather than waiting for the next - * nfs_refresh_inode() for cosmetic reasons only. */ static int nfs_unlink(struct inode *dir, struct dentry *dentry) { @@ -695,70 +918,77 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) return -ENOENT; } + error = -ENAMETOOLONG; if (dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; + goto out; error = nfs_sillyrename(dir, dentry); - - if (error == -EBUSY) { - return -EBUSY; - } else if (error < 0) { - error = nfs_proc_remove(NFS_SERVER(dir), - NFS_FH(dir), dentry->d_name.name); - if (error < 0) - return error; - - dentry->d_inode->i_nlink --; - nfs_invalidate_dircache(dir); - d_delete(dentry); + if (error && error != -EBUSY) { + error = nfs_safe_remove(dentry); + if (!error) { + nfs_renew_times(dentry); + } } - - return 0; +out: + return error; } -static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +static int +nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct nfs_sattr sattr; int error; dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n", - dir->i_dev, dir->i_ino, dentry->d_name.name, symname); + dir->i_dev, dir->i_ino, dentry->d_name.name, symname); if (!dir || !S_ISDIR(dir->i_mode)) { printk("nfs_symlink: inode is NULL or not a directory\n"); return -ENOENT; } + error = -ENAMETOOLONG; if (dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; + goto out; if (strlen(symname) > NFS_MAXPATHLEN) - return -ENAMETOOLONG; + goto out; - sattr.mode = S_IFLNK | S_IRWXUGO; /* SunOS 4.1.2 crashes without this! */ +#ifdef NFS_PARANOIA +if (dentry->d_inode) +printk("nfs_proc_symlink: %s/%s not negative!\n", +dentry->d_parent->d_name.name, dentry->d_name.name); +#endif + /* + * Fill in the sattr for the call. + * Note: SunOS 4.1.2 crashes if the mode isn't initialized! + */ + sattr.mode = S_IFLNK | S_IRWXUGO; sattr.uid = sattr.gid = sattr.size = (unsigned) -1; sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; - error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir), - dentry->d_name.name, symname, &sattr); - - if (error) - return error; - - nfs_invalidate_dircache(dir); - /* this looks _funny_ doesn't it? But: nfs_proc_symlink() - * only fills in sattr, not fattr. Thus nfs_fhget() cannot be - * called, it would be pointless, without a valid fattr - * argument. Other possibility: call nfs_proc_lookup() - * HERE. But why? If somebody wants to reference this - * symlink, the cached_lookup() will fail, and - * nfs_proc_symlink() will be called anyway. + /* + * Drop the dentry in advance to force a new lookup. + * Since nfs_proc_symlink doesn't return a fattr, we + * can't instantiate the new inode. */ d_drop(dentry); - return 0; + error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dir), + dentry->d_name.name, symname, &sattr); + if (!error) { + nfs_invalidate_dircache(dir); + nfs_renew_times(dentry->d_parent); + } else if (error == -EEXIST) { + printk("nfs_proc_symlink: %s/%s already exists??\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + } + +out: + return error; } -static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry) +static int +nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentry) { int error; @@ -771,20 +1001,20 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr return -ENOENT; } + error = -ENAMETOOLONG; if (dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; - - error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode), - NFS_FH(dir), dentry->d_name.name); - - if (error) - return error; + goto out; - nfs_invalidate_dircache(dir); - inode->i_count ++; - inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */ - d_instantiate(dentry, inode); - return 0; + error = nfs_proc_link(NFS_SERVER(inode), NFS_FH(inode), NFS_FH(dir), + dentry->d_name.name); + if (!error) { + nfs_invalidate_dircache(dir); + inode->i_count ++; + inode->i_nlink ++; /* no need to wait for nfs_refresh_inode() */ + d_instantiate(dentry, inode); + } +out: + return error; } /* @@ -804,16 +1034,25 @@ static int nfs_link(struct inode *inode, struct inode *dir, struct dentry *dentr * implementation that only depends on the dcache stuff instead of * using the inode layer * + * Unfortunately, things are a little more complicated than indicated + * above. For a cross-directory move, we want to make sure we can get + * rid of the old inode after the operation. This means there must be + * no pending writes (if it's a file), and the use count must be 1. + * If these conditions are met, we can drop the dentries before doing + * the rename. */ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - int error; - - dfprintk(VFS, "NFS: rename(%x/%ld, %s -> %x/%ld, %s)\n", - old_dir->i_dev, old_dir->i_ino, old_dentry->d_name.name, - new_dir->i_dev, new_dir->i_ino, new_dentry->d_name.name); - + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + int error, rehash = 0, update = 1; + +#ifdef NFS_DEBUG_VERBOSE +printk("nfs_rename: old %s/%s, count=%d, new %s/%s, count=%d\n", +old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count, +new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count); +#endif if (!old_dir || !S_ISDIR(old_dir->i_mode)) { printk("nfs_rename: old inode is NULL or not a directory\n"); return -ENOENT; @@ -824,98 +1063,109 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, return -ENOENT; } - if (old_dentry->d_name.len > NFS_MAXNAMLEN || new_dentry->d_name.len > NFS_MAXNAMLEN) - return -ENAMETOOLONG; + error = -ENAMETOOLONG; + if (old_dentry->d_name.len > NFS_MAXNAMLEN || + new_dentry->d_name.len > NFS_MAXNAMLEN) + goto out; - if (new_dir != old_dir) { - error = nfs_sillyrename(old_dir, old_dentry); + /* + * First check whether the target is busy ... we can't + * safely do _any_ rename if the target is in use. + */ + if (new_dentry->d_count > 1) { + if (new_inode && S_ISDIR(new_inode->i_mode)) + shrink_dcache_parent(new_dentry); + } + error = -EBUSY; + if (new_dentry->d_count > 1) { +#ifdef NFS_PARANOIA +printk("nfs_rename: target %s/%s busy, d_count=%d\n", +new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count); +#endif + goto out; + } - if (error == -EBUSY) { - return -EBUSY; - } else if (error == 0) { /* did silly rename stuff */ - error = nfs_link(old_dentry->d_inode, - new_dir, new_dentry); - - return error; + /* + * Check for within-directory rename ... no complications. + */ + if (new_dir == old_dir) + goto do_rename; + /* + * Cross-directory move ... check whether it's a file. + */ + if (S_ISREG(old_inode->i_mode)) { + if (NFS_WRITEBACK(old_inode)) { +#ifdef NFS_PARANOIA +printk("nfs_rename: %s/%s has pending writes\n", +old_dentry->d_parent->d_name.name, old_dentry->d_name.name); +#endif + nfs_flush_dirty_pages(old_inode, 0, 0, 0); + if (NFS_WRITEBACK(old_inode)) { +#ifdef NFS_PARANOIA +printk("nfs_rename: %s/%s has pending writes after flush\n", +old_dentry->d_parent->d_name.name, old_dentry->d_name.name); +#endif + goto out; + } } - /* no need for silly rename, proceed as usual */ } - error = nfs_proc_rename(NFS_SERVER(old_dir), - NFS_FH(old_dir), old_dentry->d_name.name, - NFS_FH(new_dir), new_dentry->d_name.name); - if (error) - return error; - - nfs_invalidate_dircache(old_dir); - nfs_invalidate_dircache(new_dir); + /* + * Moving a directory ... prune child dentries if needed. + */ + else if (old_dentry->d_count > 1) + shrink_dcache_parent(old_dentry); - /* Update the dcache */ - d_move(old_dentry, new_dentry); - return 0; -} + /* + * Now check the use counts ... we can't safely do the + * rename unless we can drop the dentries first. + */ + if (old_dentry->d_count > 1) { +#ifdef NFS_PARANOIA +printk("nfs_rename: old dentry %s/%s busy, d_count=%d\n", +old_dentry->d_parent->d_name.name,old_dentry->d_name.name,old_dentry->d_count); +#endif + goto out; + } + if (new_dentry->d_count > 1) { +#ifdef NFS_PARANOIA +printk("nfs_rename: new dentry %s/%s busy, d_count=%d\n", +new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count); +#endif + goto out; + } -/* - * Many nfs protocol calls return the new file attributes after - * an operation. Here we update the inode to reflect the state - * of the server's inode. - */ + d_drop(old_dentry); + update = 0; -void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - int was_empty; - - dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n", - inode->i_dev, inode->i_ino, inode->i_count); - - if (!inode || !fattr) { - printk("nfs_refresh_inode: inode or fattr is NULL\n"); - return; - } - if (inode->i_ino != fattr->fileid) { - printk("nfs_refresh_inode: inode number mismatch\n"); - return; - } - was_empty = (inode->i_mode == 0); - inode->i_mode = fattr->mode; - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; - - /* Size changed from outside: invalidate caches on next read */ - if (inode->i_size != fattr->size) { - dfprintk(PAGECACHE, "NFS: cacheinv(%x/%ld)\n", - inode->i_dev, inode->i_ino); - NFS_CACHEINV(inode); - } - if (NFS_OLDMTIME(inode) != fattr->mtime.seconds) { - dfprintk(PAGECACHE, "NFS: mtime change on %x/%ld\n", - inode->i_dev, inode->i_ino); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - } - inode->i_size = fattr->size; - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - inode->i_rdev = to_kdev_t(fattr->rdev); - else - inode->i_rdev = 0; - inode->i_blocks = fattr->blocks; - inode->i_atime = fattr->atime.seconds; - inode->i_mtime = fattr->mtime.seconds; - inode->i_ctime = fattr->ctime.seconds; - if (S_ISREG(inode->i_mode)) - inode->i_op = &nfs_file_inode_operations; - else if (S_ISDIR(inode->i_mode)) - inode->i_op = &nfs_dir_inode_operations; - else if (S_ISLNK(inode->i_mode)) - inode->i_op = &nfs_symlink_inode_operations; - else if (S_ISCHR(inode->i_mode)) - inode->i_op = &chrdev_inode_operations; - else if (S_ISBLK(inode->i_mode)) - inode->i_op = &blkdev_inode_operations; - else if (S_ISFIFO(inode->i_mode)) { - if (was_empty) - init_fifo(inode); - } else - inode->i_op = NULL; +do_rename: + /* + * We must prevent any new references to the target while + * the rename is in progress, so we unhash the dentry. + */ + if (!list_empty(&new_dentry->d_hash)) { + d_drop(new_dentry); + rehash = 1; + } + error = nfs_proc_rename(NFS_SERVER(old_dir), + NFS_FH(old_dir), old_dentry->d_name.name, + NFS_FH(new_dir), new_dentry->d_name.name); + if (rehash) { + d_add(new_dentry, new_inode); + } +#ifdef NFS_PARANOIA +if (new_dentry->d_count > 1) +printk("nfs_rename: %s/%s busy after rename, d_count=%d\n", +new_dentry->d_parent->d_name.name,new_dentry->d_name.name,new_dentry->d_count); +#endif + if (!error) { + nfs_invalidate_dircache(new_dir); + nfs_invalidate_dircache(old_dir); + /* Update the dcache if needed */ + if (update) + d_move(old_dentry, new_dentry); + } +out: + return error; } /* diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4587950ef..0f3bd5ed3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -33,9 +33,8 @@ #define NFSDBG_FACILITY NFSDBG_FILE static int nfs_file_mmap(struct file *, struct vm_area_struct *); -static long nfs_file_read(struct inode *, struct file *, char *, unsigned long); -static long nfs_file_write(struct inode *, struct file *, - const char *, unsigned long); +static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *); +static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *); static int nfs_file_close(struct inode *, struct file *); static int nfs_fsync(struct file *, struct dentry *dentry); @@ -84,67 +83,91 @@ struct inode_operations nfs_file_inode_operations = { # define IS_SWAPFILE(inode) (0) #endif - +/* + * Flush all dirty pages, and check for write errors. + * + * Note that since the file close operation is called only by the + * _last_ process to close the file, we need to flush _all_ dirty + * pages. This also means that there is little sense in checking + * for errors for this specific process -- we should probably just + * clear all errors. + */ static int nfs_file_close(struct inode *inode, struct file *file) { - int status; + int status, error; dfprintk(VFS, "nfs: close(%x/%ld)\n", inode->i_dev, inode->i_ino); - if ((status = nfs_flush_dirty_pages(inode, 0, 0)) < 0) - return status; - return nfs_write_error(inode); + status = nfs_flush_dirty_pages(inode, 0, 0, 0); + error = nfs_write_error(inode); + if (!status) + status = error; + return status; } -static long -nfs_file_read(struct inode * inode, struct file * file, - char * buf, unsigned long count) +static ssize_t +nfs_file_read(struct file * file, char * buf, size_t count, loff_t *ppos) { - int status; + struct inode * inode = file->f_dentry->d_inode; + ssize_t result; dfprintk(VFS, "nfs: read(%x/%ld, %lu@%lu)\n", inode->i_dev, inode->i_ino, count, - (unsigned long) file->f_pos); + (unsigned long) *ppos); - if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) - return status; - return generic_file_read(inode, file, buf, count); + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!result) + result = generic_file_read(file, buf, count, ppos); + return result; } static int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { - int status; struct inode *inode = file->f_dentry->d_inode; + int status; dfprintk(VFS, "nfs: mmap(%x/%ld)\n", inode->i_dev, inode->i_ino); - if ((status = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) - return status; - return generic_file_mmap(file, vma); + status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!status) + status = generic_file_mmap(file, vma); + return status; } -static int nfs_fsync(struct file *file, struct dentry *dentry) +/* + * Flush any dirty pages for this process, and check for write errors. + * The return status from this call provides a reliable indication of + * whether any write errors occurred for this process. + */ +static int +nfs_fsync(struct file *file, struct dentry *dentry) { struct inode *inode = dentry->d_inode; + int status, error; + dfprintk(VFS, "nfs: fsync(%x/%ld)\n", inode->i_dev, inode->i_ino); - return nfs_flush_dirty_pages(inode, 0, 0); + status = nfs_flush_dirty_pages(inode, current->pid, 0, 0); + error = nfs_write_error(inode); + if (!status) + status = error; + return status; } /* * Write to a file (through the page cache). */ -static long -nfs_file_write(struct inode *inode, struct file *file, - const char *buf, unsigned long count) +static ssize_t +nfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { - int result; + struct inode * inode = file->f_dentry->d_inode; + ssize_t result; dfprintk(VFS, "nfs: write(%x/%ld (%d), %lu@%lu)\n", inode->i_dev, inode->i_ino, inode->i_count, - count, (unsigned long) file->f_pos); + count, (unsigned long) *ppos); if (!inode) { printk("nfs_file_write: inode = NULL\n"); @@ -154,21 +177,26 @@ nfs_file_write(struct inode *inode, struct file *file, printk("NFS: attempt to write to active swap file!\n"); return -EBUSY; } - if ((result = nfs_revalidate_inode(NFS_SERVER(inode), inode)) < 0) - return result; + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (result) + goto out; + + /* N.B. This should be impossible now -- inodes can't change mode */ if (!S_ISREG(inode->i_mode)) { printk("nfs_file_write: write to non-file, mode %07o\n", inode->i_mode); return -EINVAL; } - if (count <= 0) - return 0; - - /* Return error from previous async call */ - if ((result = nfs_write_error(inode)) < 0) - return result; - - return generic_file_write(inode, file, buf, count); + result = count; + if (!count) + goto out; + + /* Check for an error from a previous async call */ + result = nfs_write_error(inode); + if (!result) + result = generic_file_write(file, buf, count, ppos); +out: + return result; } /* @@ -177,15 +205,15 @@ nfs_file_write(struct inode *inode, struct file *file, int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { + struct inode * inode = filp->f_dentry->d_inode; int status; - struct inode * inode; dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n", - filp->f_dentry->d_inode->i_dev, filp->f_dentry->d_inode->i_ino, + inode->i_dev, inode->i_ino, fl->fl_type, fl->fl_flags, fl->fl_start, fl->fl_end); - if (!(inode = filp->f_dentry->d_inode)) + if (!inode) return -EINVAL; /* No mandatory locks over NFS */ @@ -209,8 +237,8 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) /* If unlocking a file region, flush dirty pages (unless we've * been killed by a signal, that is). */ if (cmd == F_SETLK && fl->fl_type == F_UNLCK - && !(current->signal & ~current->blocked)) { - status = nfs_flush_dirty_pages(inode, + && !signal_pending(current)) { + status = nfs_flush_dirty_pages(inode, current->pid, fl->fl_start, fl->fl_end == NLM_OFFSET_MAX? 0 : fl->fl_end - fl->fl_start + 1); if (status < 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e91b34a34..c070d130b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -33,13 +33,17 @@ #include <asm/uaccess.h> #define NFSDBG_FACILITY NFSDBG_VFS +#define NFS_PARANOIA 1 -static int nfs_notify_change(struct inode *, struct iattr *); +extern void nfs_invalidate_dircache_sb(struct super_block *); +extern int check_failed_request(struct inode *); + +static void nfs_read_inode(struct inode *); static void nfs_put_inode(struct inode *); static void nfs_delete_inode(struct inode *); +static int nfs_notify_change(struct inode *, struct iattr *); static void nfs_put_super(struct super_block *); -static void nfs_read_inode(struct inode *); -static int nfs_statfs(struct super_block *, struct statfs *, int bufsiz); +static int nfs_statfs(struct super_block *, struct statfs *, int); static struct super_operations nfs_sops = { nfs_read_inode, /* read inode */ @@ -67,20 +71,51 @@ nfs_read_inode(struct inode * inode) { inode->i_blksize = inode->i_sb->s_blocksize; inode->i_mode = 0; + inode->i_rdev = 0; inode->i_op = NULL; NFS_CACHEINV(inode); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); } static void nfs_put_inode(struct inode * inode) { dprintk("NFS: put_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); + /* + * We want to get rid of unused inodes ... + */ + if (inode->i_count == 1) + inode->i_nlink = 0; } static void nfs_delete_inode(struct inode * inode) { + int failed; + dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); + /* + * Flush out any pending write requests ... + */ + if (NFS_WRITEBACK(inode) != NULL) { + unsigned long timeout = jiffies + 5*HZ; + printk("NFS: inode %ld, invalidating pending RPC requests\n", + inode->i_ino); + nfs_invalidate_pages(inode); + while (NFS_WRITEBACK(inode) != NULL && jiffies < timeout) { + current->state = TASK_INTERRUPTIBLE; + current->timeout = jiffies + HZ/10; + schedule(); + } + current->state = TASK_RUNNING; + if (NFS_WRITEBACK(inode) != NULL) + printk("NFS: Arghhh, stuck RPC requests!\n"); + } + + failed = check_failed_request(inode); + if (failed) + printk("NFS: inode %ld had %d failed requests\n", + inode->i_ino, failed); clear_inode(inode); } @@ -90,13 +125,21 @@ nfs_put_super(struct super_block *sb) struct nfs_server *server = &sb->u.nfs_sb.s_server; struct rpc_clnt *rpc; + /* + * Lock the super block while we bring down the daemons. + */ + lock_super(sb); if ((rpc = server->client) != NULL) rpc_shutdown_client(rpc); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ rpciod_down(); /* release rpciod */ - lock_super(sb); + /* + * Invalidate the dircache for this superblock. + */ + nfs_invalidate_dircache_sb(sb); + sb->s_dev = 0; unlock_super(sb); MOD_DEC_USE_COUNT; @@ -147,14 +190,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) unsigned int authflavor; int tcp; kdev_t dev = sb->s_dev; + struct inode *root_inode; MOD_INC_USE_COUNT; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - sb->s_dev = 0; - MOD_DEC_USE_COUNT; - return NULL; - } + if (!data) + goto out_miss_args; + if (data->version != NFS_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", data->version < NFS_MOUNT_VERSION ? "older" : "newer"); @@ -164,13 +205,19 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) data->bsize = 0; } + /* We now require that the mount process passes the remote address */ + memcpy(&srvaddr, &data->addr, sizeof(srvaddr)); + if (srvaddr.sin_addr.s_addr == INADDR_ANY) + goto out_no_remote; + lock_super(sb); - server = &sb->u.nfs_sb.s_server; sb->s_magic = NFS_SUPER_MAGIC; sb->s_dev = dev; sb->s_op = &nfs_sops; sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + sb->u.nfs_sb.s_root = data->root; + server = &sb->u.nfs_sb.s_server; server->rsize = nfs_block_size(data->rsize, NULL); server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags; @@ -179,15 +226,6 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; strcpy(server->hostname, data->hostname); - sb->u.nfs_sb.s_root = data->root; - - /* We now require that the mount process passes the remote address */ - memcpy(&srvaddr, &data->addr, sizeof(srvaddr)); - if (srvaddr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - MOD_DEC_USE_COUNT; - return NULL; - } /* Which protocol do we use? */ tcp = (data->flags & NFS_MOUNT_TCP); @@ -210,18 +248,13 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) /* Now create transport and client */ xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP, &srvaddr, &timeparms); - if (xprt == NULL) { - printk("NFS: cannot create RPC transport.\n"); - goto failure; - } + if (xprt == NULL) + goto out_no_xprt; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, NFS_VERSION, authflavor); - if (clnt == NULL) { - printk("NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); - goto failure; - } + if (clnt == NULL) + goto out_no_client; clnt->cl_intr = (data->flags & NFS_MOUNT_INTR)? 1 : 0; clnt->cl_softrtry = (data->flags & NFS_MOUNT_SOFT)? 1 : 0; @@ -229,29 +262,67 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) server->client = clnt; /* Fire up rpciod if not yet running */ +#ifdef RPCIOD_RESULT + if (rpciod_up()) + goto out_no_iod; +#else rpciod_up(); +#endif - /* Unlock super block and try to get root fh attributes */ + /* + * Keep the super block locked while we try to get + * the root fh attributes. + */ + root_inode = nfs_fhget(sb, &data->root, NULL); + if (!root_inode) + goto out_no_root; + sb->s_root = d_alloc_root(root_inode, NULL); + if (!sb->s_root) + goto out_no_root; + /* We're airborne */ unlock_super(sb); - sb->s_root = d_alloc_root(nfs_fhget(sb, &data->root, NULL), NULL); - if (sb->s_root != NULL) { - /* We're airborne */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return sb; - } + /* Check whether to start the lockd process */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; /* Yargs. It didn't work out. */ +out_no_root: printk("nfs_read_super: get root inode failed\n"); - rpc_shutdown_client(server->client); + iput(root_inode); rpciod_down(); +#ifdef RPCIOD_RESULT + goto out_shutdown; -failure: - MOD_DEC_USE_COUNT; - if (sb->s_lock) - unlock_super(sb); +out_no_iod: + printk("nfs_read_super: couldn't start rpciod!\n"); +out_shutdown: +#endif + rpc_shutdown_client(server->client); + goto out_unlock; + +out_no_client: + printk("NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto out_unlock; + +out_no_xprt: + printk("NFS: cannot create RPC transport.\n"); +out_unlock: + unlock_super(sb); + goto out_fail; + +out_no_remote: + printk("NFS: mount program didn't pass remote address!\n"); + goto out_fail; + +out_miss_args: + printk("nfs_read_super: missing data argument\n"); + +out_fail: sb->s_dev = 0; + MOD_DEC_USE_COUNT; return NULL; } @@ -312,14 +383,48 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, printk("nfs_fhget: iget failed\n"); return NULL; } - if (inode->i_dev == sb->s_dev) { - if (inode->i_ino != fattr->fileid) { - printk("nfs_fhget: unexpected inode from iget\n"); - return inode; - } - *NFS_FH(inode) = *fhandle; - nfs_refresh_inode(inode, fattr); +#ifdef NFS_PARANOIA +if (inode->i_dev != sb->s_dev) +printk("nfs_fhget: impossible\n"); +#endif + + if (inode->i_ino != fattr->fileid) { + printk("nfs_fhget: unexpected inode from iget\n"); + return inode; } + + /* + * Check whether the mode has been set, as we only want to + * do this once. (We don't allow inodes to change types.) + */ + if (inode->i_mode == 0) { + inode->i_mode = fattr->mode; + if (S_ISREG(inode->i_mode)) + inode->i_op = &nfs_file_inode_operations; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &nfs_dir_inode_operations; + else if (S_ISLNK(inode->i_mode)) + inode->i_op = &nfs_symlink_inode_operations; + else if (S_ISCHR(inode->i_mode)) { + inode->i_op = &chrdev_inode_operations; + inode->i_rdev = to_kdev_t(fattr->rdev); + } else if (S_ISBLK(inode->i_mode)) { + inode->i_op = &blkdev_inode_operations; + inode->i_rdev = to_kdev_t(fattr->rdev); + } else if (S_ISFIFO(inode->i_mode)) + init_fifo(inode); + else + inode->i_op = NULL; + /* + * Preset the size and mtime, as there's no need + * to invalidate the caches. + */ + inode->i_size = fattr->size; + inode->i_mtime = fattr->mtime.seconds; + NFS_OLDMTIME(inode) = fattr->mtime.seconds; + } + *NFS_FH(inode) = *fhandle; + nfs_refresh_inode(inode, fattr); dprintk("NFS: fhget(%x/%ld ct=%d)\n", inode->i_dev, inode->i_ino, inode->i_count); @@ -334,6 +439,17 @@ nfs_notify_change(struct inode *inode, struct iattr *attr) struct nfs_fattr fattr; int error; + /* + * Make sure the inode is up-to-date. + */ + error = nfs_revalidate(inode); + if (error) { +#ifdef NFS_PARANOIA +printk("nfs_notify_change: revalidate failed, error=%d\n", error); +#endif + goto out; + } + sattr.mode = (u32) -1; if (attr->ia_valid & ATTR_MODE) sattr.mode = attr->ia_mode; @@ -346,7 +462,6 @@ nfs_notify_change(struct inode *inode, struct iattr *attr) if (attr->ia_valid & ATTR_GID) sattr.gid = attr->ia_gid; - sattr.size = (u32) -1; if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) sattr.size = attr->ia_size; @@ -364,11 +479,25 @@ nfs_notify_change(struct inode *inode, struct iattr *attr) } error = nfs_proc_setattr(NFS_SERVER(inode), NFS_FH(inode), - &sattr, &fattr); - if (!error) { + &sattr, &fattr); + if (error) + goto out; + /* + * If we changed the size or mtime, update the inode + * now to avoid invalidating the page cache. + */ + if (sattr.size != (u32) -1) { + if (sattr.size != fattr.size) + printk("nfs_notify_change: sattr=%d, fattr=%d??\n", + sattr.size, fattr.size); nfs_truncate_dirty_pages(inode, sattr.size); - nfs_refresh_inode(inode, &fattr); + inode->i_size = sattr.size; + inode->i_mtime = fattr.mtime.seconds; } + if (sattr.mtime.seconds != (u32) -1) + inode->i_mtime = fattr.mtime.seconds; + error = nfs_refresh_inode(inode, &fattr); +out: return error; } @@ -384,57 +513,154 @@ nfs_revalidate(struct inode *inode) /* * This function is called whenever some part of NFS notices that * the cached attributes have to be refreshed. - * - * This is a bit tricky because we have to make sure all dirty pages - * have been sent off to the server before calling invalidate_inode_pages. - * To make sure no other process adds more write requests while we try - * our best to flush them, we make them sleep during the attribute refresh. - * - * A very similar scenario holds for the dir cache. */ int _nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { struct nfs_fattr fattr; - int status; + int status = 0; if (jiffies - NFS_READTIME(inode) < NFS_ATTRTIMEO(inode)) - return 0; + goto out; dfprintk(PAGECACHE, "NFS: revalidating %x/%ld inode\n", inode->i_dev, inode->i_ino); - NFS_READTIME(inode) = jiffies; - if ((status = nfs_proc_getattr(server, NFS_FH(inode), &fattr)) < 0) + status = nfs_proc_getattr(server, NFS_FH(inode), &fattr); + if (status) { +#ifdef NFS_PARANOIA +printk("nfs_revalidate_inode: getattr failed, error=%d\n", status); +#endif goto done; + } - nfs_refresh_inode(inode, &fattr); - if (fattr.mtime.seconds != NFS_OLDMTIME(inode)) { - if (!S_ISDIR(inode->i_mode)) { - /* This sends off all dirty pages off to the server. - * Note that this function must not sleep. */ - nfs_invalidate_pages(inode); - invalidate_inode_pages(inode); - } else { - nfs_invalidate_dircache(inode); - } - - NFS_OLDMTIME(inode) = fattr.mtime.seconds; - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - } else { + status = nfs_refresh_inode(inode, &fattr); + if (status) + goto done; + if (fattr.mtime.seconds == NFS_OLDMTIME(inode)) { /* Update attrtimeo value */ if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode)) NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode); } - status = 0; + NFS_OLDMTIME(inode) = fattr.mtime.seconds; done: dfprintk(PAGECACHE, "NFS: inode %x/%ld revalidation complete (status %d).\n", inode->i_dev, inode->i_ino, status); +out: return status; } /* + * Many nfs protocol calls return the new file attributes after + * an operation. Here we update the inode to reflect the state + * of the server's inode. + * + * This is a bit tricky because we have to make sure all dirty pages + * have been sent off to the server before calling invalidate_inode_pages. + * To make sure no other process adds more write requests while we try + * our best to flush them, we make them sleep during the attribute refresh. + * + * A very similar scenario holds for the dir cache. + */ +int +nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + int invalid = 0; + int error = -EIO; + + dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n", + inode->i_dev, inode->i_ino, inode->i_count); + + if (!inode || !fattr) { + printk("nfs_refresh_inode: inode or fattr is NULL\n"); + goto out; + } + if (inode->i_ino != fattr->fileid) { + printk("nfs_refresh_inode: inode number mismatch\n"); + goto out; + } + + /* + * Make sure the inode's type hasn't changed. + */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_changed; + + /* + * If the size or mtime changed from outside, we want + * to invalidate the local caches immediately. + */ + if (inode->i_size != fattr->size) { +#ifdef NFS_DEBUG_VERBOSE +printk("NFS: size change on %x/%ld\n", inode->i_dev, inode->i_ino); +#endif + invalid = 1; + } + if (inode->i_mtime != fattr->mtime.seconds) { +#ifdef NFS_DEBUG_VERBOSE +printk("NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino); +#endif + invalid = 1; + } + + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + + inode->i_size = fattr->size; + inode->i_blocks = fattr->blocks; + inode->i_atime = fattr->atime.seconds; + inode->i_mtime = fattr->mtime.seconds; + inode->i_ctime = fattr->ctime.seconds; + /* + * Update the read time so we don't revalidate too often. + */ + NFS_READTIME(inode) = jiffies; + error = 0; + if (invalid) + goto out_invalid; +out: + return error; + +out_changed: + /* + * Big trouble! The inode has become a different object. + */ +#ifdef NFS_PARANOIA +printk("nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", +inode->i_ino, inode->i_mode, fattr->mode); +#endif + fattr->mode = inode->i_mode; /* save mode */ + make_bad_inode(inode); + inode->i_mode = fattr->mode; /* restore mode */ + /* + * No need to worry about unhashing the dentry, as the + * lookup validation will know that the inode is bad. + * (But we fall through to invalidate the caches.) + */ + +out_invalid: + /* + * Invalidate the local caches + */ +#ifdef NFS_DEBUG_VERBOSE +printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages); +#endif + if (!S_ISDIR(inode->i_mode)) { + /* This sends off all dirty pages off to the server. + * Note that this function must not sleep. */ + nfs_invalidate_pages(inode); + invalidate_inode_pages(inode); + } else + nfs_invalidate_dircache(inode); + NFS_CACHEINV(inode); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + goto out; +} + +/* * File system information */ static struct file_system_type nfs_fs_type = { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5eec5eb65..0311b7d0b 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -23,6 +23,7 @@ #include <linux/nfs_fs.h> #define NFSDBG_FACILITY NFSDBG_XDR +/* #define NFS_PARANOIA 1 */ #define QUADLEN(len) (((len) + 3) >> 2) static int nfs_stat_to_errno(int stat); @@ -371,17 +372,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name. * After decoding, the layout in memory looks like this: * entry1 entry2 ... entryN <space> stringN ... string2 string1 + * Each entry consists of three __u32 values, the same space as NFS uses. * Note that the strings are not null-terminated so that the entire number * of entries returned by the server should fit into the buffer. */ static int nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) { - struct nfs_entry *entry; struct iovec *iov = req->rq_rvec; int status, nr, len; - char *string; + char *string, *start; u32 *end; + __u32 fileid, cookie, *entry; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); @@ -396,10 +398,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) end = (u32 *) ((u8 *) p + iov[1].iov_len); /* Get start and end of dirent buffer */ - entry = (struct nfs_entry *) res->buffer; + entry = (__u32 *) res->buffer; + start = (char *) res->buffer; string = (char *) res->buffer + res->bufsiz; - for (nr = 0; *p++; nr++, entry++) { - entry->fileid = ntohl(*p++); + for (nr = 0; *p++; nr++) { + fileid = ntohl(*p++); len = ntohl(*p++); if ((p + QUADLEN(len) + 3) > end) { @@ -413,27 +416,36 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) return -errno_NFSERR_IO; } string -= len; - if ((void *) (entry+1) > (void *) string) { - /* This may actually happen because an nfs_entry - * will take up more space than the XDR data. On - * 32bit machines that's due to 8byte alignment, - * on 64bit machines that's because the char * takes - * up 2 longs. - * - * THIS IS BAD! + if ((void *) (entry+3) > (void *) string) { + /* + * This error is impossible as long as the temp + * buffer is no larger than the user buffer. The + * current packing algorithm uses the same amount + * of space in the user buffer as in the XDR data, + * so it's guaranteed to fit. */ - printk(KERN_NOTICE "NFS: should not happen in %s!\n", + printk("NFS: incorrect buffer size in %s!\n", __FUNCTION__); break; } - entry->name = string; - entry->length = len; memmove(string, p, len); p += QUADLEN(len); - entry->cookie = ntohl(*p++); - entry->eof = !p[0] && p[1]; + cookie = ntohl(*p++); + /* + * To make everything fit, we encode the length, offset, + * and eof flag into 32 bits. This works for filenames + * up to 32K and PAGE_SIZE up to 64K. + */ + status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */ + *entry++ = fileid; + *entry++ = cookie; + *entry++ = ((string - start) << 16) | status | (len & 0x7FFF); } +#ifdef NFS_PARANOIA +printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n", +nr, ((char *) entry - start), (start + res->bufsiz - string)); +#endif return nr; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index f48a6217c..94096d928 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -384,17 +384,18 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name. * After decoding, the layout in memory looks like this: * entry1 entry2 ... entryN <space> stringN ... string2 string1 + * Each entry consists of three __u32 values, the same space as NFS uses. * Note that the strings are not null-terminated so that the entire number * of entries returned by the server should fit into the buffer. */ static int nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) { - struct nfs_entry *entry; struct iovec *iov = req->rq_rvec; int status, nr, len; - char *string; + char *string, *start; u32 *end; + __u32 fileid, cookie, *entry; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); @@ -413,10 +414,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) return -errno_NFSERR_IO; } - string = (char *) res->buffer + res->bufsiz; - entry = (struct nfs_entry *) res->buffer; - for (nr = 0; *p++; nr++, entry++) { - entry->fileid = ntohl(*p++); + entry = (__u32 *) res->buffer; + start = (char *) res->buffer; + string = start + res->bufsiz; + for (nr = 0; *p++; nr++) { + fileid = ntohl(*p++); len = ntohl(*p++); if ((p + QUADLEN(len) + 3) > end) { @@ -430,22 +432,40 @@ nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) return -errno_NFSERR_IO; } string -= len; - if ((void *) (entry+1) > (void *) string) { - dprintk("NFS: shouldnothappen in readdirres_decode!\n"); - break; /* should not happen */ + if ((void *) (entry+3) > (void *) string) { + /* + * This error is impossible as long as the temp + * buffer is no larger than the user buffer. The + * current packing algorithm uses the same amount + * of space in the user buffer as in the XDR data, + * so it's guaranteed to fit. + */ + printk("NFS: incorrect buffer size in %s!\n", + __FUNCTION__); + break; } - entry->name = string; - entry->length = len; memmove(string, p, len); p += QUADLEN(len); - entry->cookie = ntohl(*p++); - entry->eof = !p[0] && p[1]; + cookie = ntohl(*p++); + /* + * To make everything fit, we encode the length, offset, + * and eof flag into 32 bits. This works for filenames + * up to 32K and PAGE_SIZE up to 64K. + */ + status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */ + *entry++ = fileid; + *entry++ = cookie; + *entry++ = ((string - start) << 16) | status | (len & 0x7FFF); /* dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n", - len, string, entry->cookie, entry->eof); + len, string, cookie, status); */ } +#ifdef NFS_PARANOIA +printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n", +nr, ((char *) entry - start), (start + res->bufsiz - string)); +#endif return nr; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 58dcd95d0..416ed294e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -250,25 +250,43 @@ nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name) */ int nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle, - u32 cookie, unsigned int size, struct nfs_entry *entry) + u32 cookie, unsigned int size, __u32 *entry) { struct nfs_readdirargs arg; struct nfs_readdirres res; void * buffer; + unsigned int buf_size = PAGE_SIZE; int status; /* First get a temp buffer for the readdir reply */ - while (!(buffer = (void *) get_free_page(GFP_USER))) { - need_resched = 1; - schedule(); - if (signalled()) - return -ERESTARTSYS; - } + /* N.B. does this really need to be cleared? */ + status = -ENOMEM; + buffer = (void *) get_free_page(GFP_KERNEL); + if (!buffer) + goto out; + + /* + * Calculate the effective size the buffer. To make sure + * that the returned data will fit into the user's buffer, + * we decrease the buffer size as necessary. + * + * Note: NFS returns three __u32 values for each entry, + * and we assume that the data is packed into the user + * buffer with the same efficiency. + */ + if (size < buf_size) + buf_size = size; + if (server->rsize < buf_size) + buf_size = server->rsize; +#if 0 +printk("nfs_proc_readdir: user size=%d, rsize=%d, buf_size=%d\n", +size, server->rsize, buf_size); +#endif arg.fh = fhandle; arg.cookie = cookie; arg.buffer = buffer; - arg.bufsiz = server->rsize < PAGE_SIZE? server->rsize : PAGE_SIZE; + arg.bufsiz = buf_size; res.buffer = entry; res.bufsiz = size; @@ -276,6 +294,7 @@ nfs_proc_readdir(struct nfs_server *server, struct nfs_fh *fhandle, status = rpc_call(server->client, NFSPROC_READDIR, &arg, &res, 0); dprintk("NFS reply readdir: %d\n", status); free_page((unsigned long) buffer); +out: return status; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ec5a1f7be..97663cc11 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -68,6 +68,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE +int check_failed_request(struct inode *); + static void nfs_wback_lock(struct rpc_task *task); static void nfs_wback_result(struct rpc_task *task); @@ -120,6 +122,7 @@ struct nfs_wreq { * Limit number of delayed writes */ static int nr_write_requests = 0; +static int nr_failed_requests = 0; static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain"); struct nfs_wreq * nfs_failed_requests = NULL; @@ -196,22 +199,44 @@ nfs_writepage_sync(struct inode *inode, struct page *page, clear_bit(PG_uptodate, &page->flags); goto io_error; } + if (result != wsize) + printk("NFS: short write, wsize=%u, result=%d\n", + wsize, result); refresh = 1; buffer += wsize; offset += wsize; written += wsize; count -= wsize; + /* + * If we've extended the file, update the inode + * now so we don't invalidate the cache. + */ + if (offset > inode->i_size) + inode->i_size = offset; } while (count); io_error: + /* N.B. do we want to refresh if there was an error?? (fattr valid?) */ if (refresh) { /* See comments in nfs_wback_result */ + /* N.B. I don't think this is right -- sync writes in order */ if (fattr.size < inode->i_size) fattr.size = inode->i_size; + if (fattr.mtime.seconds < inode->i_mtime) + printk("nfs_writepage_sync: prior time??\n"); /* Solaris 2.5 server seems to send garbled * fattrs occasionally */ - if (inode->i_ino == fattr.fileid) + if (inode->i_ino == fattr.fileid) { + /* + * We expect the mtime value to change, and + * don't want to invalidate the caches. + */ + inode->i_mtime = fattr.mtime.seconds; nfs_refresh_inode(inode, &fattr); + } + else + printk("nfs_writepage_sync: inode %ld, got %u?\n", + inode->i_ino, fattr.fileid); } nfs_unlock_page(page); @@ -260,28 +285,73 @@ find_write_request(struct inode *inode, struct page *page) /* * Find a failed write request by pid */ -static inline struct nfs_wreq * +static struct nfs_wreq * find_failed_request(struct inode *inode, pid_t pid) { struct nfs_wreq *head, *req; - if (!(req = head = nfs_failed_requests)) - return NULL; - do { - if (req->wb_inode == inode && req->wb_pid == pid) + req = head = nfs_failed_requests; + while (req != NULL) { + if (req->wb_inode == inode && (pid == 0 || req->wb_pid == pid)) return req; - } while ((req = WB_NEXT(req)) != head); + if ((req = WB_NEXT(req)) == head) + break; + } return NULL; } /* + * Add a request to the failed list. + */ +static void +append_failed_request(struct nfs_wreq * req) +{ + static int old_max = 16; + + append_write_request(&nfs_failed_requests, req); + nr_failed_requests++; + if (nr_failed_requests >= old_max) { + printk("NFS: %d failed requests\n", nr_failed_requests); + old_max = old_max << 1; + } +} + +/* + * Remove a request from the failed list and free it. + */ +static void +remove_failed_request(struct nfs_wreq * req) +{ + remove_write_request(&nfs_failed_requests, req); + kfree(req); + nr_failed_requests--; +} + +/* + * Find and release all failed requests for this inode. + */ +int +check_failed_request(struct inode * inode) +{ + struct nfs_wreq * req; + int found = 0; + + while ((req = find_failed_request(inode, 0)) != NULL) { + remove_failed_request(req); + found++; + } + return found; +} + +/* * Try to merge adjacent write requests. This works only for requests * issued by the same user. */ static inline int -update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes) +update_write_request(struct nfs_wreq *req, unsigned int first, + unsigned int bytes) { - unsigned rqfirst = req->wb_offset, + unsigned int rqfirst = req->wb_offset, rqlast = rqfirst + req->wb_bytes, last = first + bytes; @@ -313,7 +383,7 @@ update_write_request(struct nfs_wreq *req, unsigned first, unsigned bytes) */ static inline struct nfs_wreq * create_write_request(struct inode *inode, struct page *page, - unsigned offset, unsigned bytes) + unsigned int offset, unsigned int bytes) { struct nfs_wreq *wreq; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -327,7 +397,7 @@ create_write_request(struct inode *inode, struct page *page, wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_USER); if (!wreq) - return NULL; + goto out_fail; memset(wreq, 0, sizeof(*wreq)); task = &wreq->wb_task; @@ -336,11 +406,8 @@ create_write_request(struct inode *inode, struct page *page, task->tk_action = nfs_wback_lock; rpcauth_lookupcred(task); /* Obtain user creds */ - if (task->tk_status < 0) { - rpc_release_task(task); - kfree(wreq); - return NULL; - } + if (task->tk_status < 0) + goto out_req; /* Put the task on inode's writeback request list. */ wreq->wb_inode = inode; @@ -357,6 +424,12 @@ create_write_request(struct inode *inode, struct page *page, rpc_wake_up_next(&write_queue); return wreq; + +out_req: + rpc_release_task(task); + kfree(wreq); +out_fail: + return NULL; } /* @@ -423,7 +496,9 @@ wait_on_write_request(struct nfs_wreq *req) } remove_wait_queue(&page->wait, &wait); current->state = TASK_RUNNING; - atomic_dec(&page->count); + if (atomic_read(&page->count) == 1) + printk("NFS: page unused while waiting\n"); + free_page(page_address(page)); return retval; } @@ -487,12 +562,13 @@ nfs_updatepage(struct inode *inode, struct page *page, const char *buffer, } /* Create the write request. */ - if (!(req = create_write_request(inode, page, offset, count))) { - status = -ENOBUFS; + status = -ENOBUFS; + req = create_write_request(inode, page, offset, count); + if (!req) goto done; - } /* Copy data to page buffer. */ + /* N.B. should check for fault here ... */ copy_from_user(page_addr + offset, buffer, count); /* Schedule request */ @@ -519,6 +595,7 @@ done: transfer_page_lock(req); /* rpc_execute(&req->wb_task); */ if (sync) { + /* N.B. if signalled, result not ready? */ wait_on_write_request(req); if ((count = nfs_write_error(inode)) < 0) status = count; @@ -578,10 +655,20 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len, if (rqoffset < end && offset < rqend && (pid == 0 || req->wb_pid == pid)) { - if (!WB_HAVELOCK(req)) + if (!WB_HAVELOCK(req)) { +#ifdef NFS_PARANOIA +printk("nfs_flush: flushing inode=%ld, %d @ %lu\n", +req->wb_inode->i_ino, req->wb_bytes, rqoffset); +#endif nfs_flush_request(req); + } last = req; } + } else { +#ifdef NFS_PARANOIA +printk("nfs_flush_pages: in progress inode=%ld, %d @ %lu\n", +req->wb_inode->i_ino, req->wb_bytes, rqoffset); +#endif } if (invalidate) req->wb_flags |= NFS_WRITE_INVALIDATE; @@ -593,7 +680,11 @@ nfs_flush_pages(struct inode *inode, pid_t pid, off_t offset, off_t len, } /* - * Cancel all writeback requests, both pending and in process. + * Cancel all writeback requests, both pending and in progress. + * + * N.B. This doesn't seem to wake up the tasks -- are we sure + * they will eventually complete? Also, this could overwrite a + * failed status code from an already-completed task. */ static void nfs_cancel_dirty(struct inode *inode, pid_t pid) @@ -602,7 +693,8 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid) req = head = NFS_WRITEBACK(inode); while (req != NULL) { - if (req->wb_pid == pid) { + /* N.B. check for task already finished? */ + if (pid == 0 || req->wb_pid == pid) { req->wb_flags |= NFS_WRITE_CANCELLED; rpc_exit(&req->wb_task, 0); } @@ -620,36 +712,43 @@ nfs_cancel_dirty(struct inode *inode, pid_t pid) * this isn't used by the nlm module yet. */ int -nfs_flush_dirty_pages(struct inode *inode, off_t offset, off_t len) +nfs_flush_dirty_pages(struct inode *inode, pid_t pid, off_t offset, off_t len) { struct nfs_wreq *last = NULL; + int result = 0, cancel = 0; dprintk("NFS: flush_dirty_pages(%x/%ld for pid %d %ld/%ld)\n", inode->i_dev, inode->i_ino, current->pid, offset, len); - if (IS_SOFT && signalled()) - nfs_cancel_dirty(inode, current->pid); + if (IS_SOFT && signalled()) { + nfs_cancel_dirty(inode, pid); + cancel = 1; + } for (;;) { - if (IS_SOFT && signalled()) - return -ERESTARTSYS; + if (IS_SOFT && signalled()) { + if (!cancel) + nfs_cancel_dirty(inode, pid); + result = -ERESTARTSYS; + break; + } - /* Flush all pending writes for this pid and file region */ - last = nfs_flush_pages(inode, current->pid, offset, len, 0); + /* Flush all pending writes for the pid and file region */ + last = nfs_flush_pages(inode, pid, offset, len, 0); if (last == NULL) break; wait_on_write_request(last); } - return 0; + return result; } /* * Flush out any pending write requests and flag that they be discarded * after the write is complete. * - * This function is called from nfs_revalidate_inode just before it calls + * This function is called from nfs_refresh_inode just before it calls * invalidate_inode_pages. After nfs_flush_pages returns, we can be sure * that all dirty pages are locked, so that invalidate_inode_pages does * not throw away any dirty pages. @@ -705,15 +804,14 @@ nfs_check_error(struct inode *inode) dprintk("nfs: checking for write error inode %04x/%ld\n", inode->i_dev, inode->i_ino); - if (!(req = find_failed_request(inode, current->pid))) - return 0; - - dprintk("nfs: write error %d inode %04x/%ld\n", + req = find_failed_request(inode, current->pid); + if (req) { + dprintk("nfs: write error %d inode %04x/%ld\n", req->wb_task.tk_status, inode->i_dev, inode->i_ino); - status = req->wb_task.tk_status; - remove_write_request(&nfs_failed_requests, req); - kfree(req); + status = req->wb_task.tk_status; + remove_failed_request(req); + } return status; } @@ -789,36 +887,45 @@ nfs_wback_result(struct rpc_task *task) page = req->wb_page; status = task->tk_status; - /* Remove request from writeback list and wake up tasks - * sleeping on it. */ - remove_write_request(&NFS_WRITEBACK(inode), req); - if (status < 0) { /* * An error occurred. Report the error back to the - * application by adding the failed request to the - * inode's error list. + * application by adding the request to the failed + * requests list. */ - if (find_failed_request(inode, req->wb_pid)) { + if (find_failed_request(inode, req->wb_pid)) status = 0; - } else { - dprintk("NFS: %4d saving write failure code\n", - task->tk_pid); - append_write_request(&nfs_failed_requests, req); - } clear_bit(PG_uptodate, &page->flags); } else if (!WB_CANCELLED(req)) { + struct nfs_fattr *fattr = req->wb_fattr; /* Update attributes as result of writeback. * Beware: when UDP replies arrive out of order, we * may end up overwriting a previous, bigger file size. */ - if (req->wb_fattr->size < inode->i_size) - req->wb_fattr->size = inode->i_size; - /* possible Solaris 2.5 server bug workaround */ - if (inode->i_ino == req->wb_fattr->fileid) - nfs_refresh_inode(inode, req->wb_fattr); + if (fattr->mtime.seconds >= inode->i_mtime) { + if (fattr->size < inode->i_size) + fattr->size = inode->i_size; + + /* possible Solaris 2.5 server bug workaround */ + if (inode->i_ino == fattr->fileid) { + /* + * We expect these values to change, and + * don't want to invalidate the caches. + */ + inode->i_size = fattr->size; + inode->i_mtime = fattr->mtime.seconds; + nfs_refresh_inode(inode, fattr); + } + else + printk("nfs_wback_result: inode %ld, got %u?\n", + inode->i_ino, fattr->fileid); + } } + /* + * This call might block, so we defer removing the request + * from the inode's writeback list. + */ rpc_release_task(task); if (WB_INVALIDATE(req)) @@ -830,8 +937,20 @@ nfs_wback_result(struct rpc_task *task) kfree(req->wb_args); req->wb_args = 0; } + + /* + * Now it's safe to remove the request from the inode's + * writeback list and wake up any tasks sleeping on it. + * If the request failed, add it to the failed list. + */ + remove_write_request(&NFS_WRITEBACK(inode), req); + if (status >= 0) kfree(req); + else { + dprintk("NFS: %4d saving write failure code\n", task->tk_pid); + append_failed_request(req); + } free_page(page_address(page)); nr_write_requests--; |