diff options
Diffstat (limited to 'fs')
71 files changed, 4659 insertions, 2486 deletions
diff --git a/fs/Config.in b/fs/Config.in index e9eb8099c..53029b7d3 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -10,9 +10,7 @@ tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS dep_tristate 'ADFS file system support' CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL -if [ "$CONFIG_ADFS_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' ADFS write support (DANGEROUS)' CONFIG_ADFS_FS_RW -fi +dep_mbool ' ADFS write support (DANGEROUS)' CONFIG_ADFS_FS_RW $CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL tristate 'Amiga FFS file system support' CONFIG_AFFS_FS @@ -28,21 +26,15 @@ dep_tristate ' VFAT (Windows-95) fs support' CONFIG_VFAT_FS $CONFIG_FAT_FS dep_tristate 'EFS file system support (read only) (EXPERIMENTAL)' CONFIG_EFS_FS $CONFIG_EXPERIMENTAL tristate 'Compressed ROM file system support' CONFIG_CRAMFS +tristate 'Simple RAM-based file system support' CONFIG_RAMFS tristate 'ISO 9660 CDROM file system support' CONFIG_ISO9660_FS -if [ "$CONFIG_ISO9660_FS" != "n" ]; then - bool ' Microsoft Joliet CDROM extensions' CONFIG_JOLIET -else - # needed by nls/Config.in - define_bool CONFIG_JOLIET n -fi +dep_mbool ' Microsoft Joliet CDROM extensions' CONFIG_JOLIET $CONFIG_ISO9660_FS tristate 'Minix fs support' CONFIG_MINIX_FS tristate 'NTFS file system support (read only)' CONFIG_NTFS_FS -if [ "$CONFIG_NTFS_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' NTFS write support (DANGEROUS)' CONFIG_NTFS_RW -fi +dep_mbool ' NTFS write support (DANGEROUS)' CONFIG_NTFS_RW $CONFIG_NTFS_FS $CONFIG_EXPERIMENTAL tristate 'OS/2 HPFS file system support' CONFIG_HPFS_FS @@ -58,44 +50,33 @@ dep_bool ' Debug devfs' CONFIG_DEVFS_DEBUG $CONFIG_DEVFS_FS dep_bool '/dev/pts file system for Unix98 PTYs' CONFIG_DEVPTS_FS $CONFIG_UNIX98_PTYS dep_tristate 'QNX4 file system support (read only) (EXPERIMENTAL)' CONFIG_QNX4FS_FS $CONFIG_EXPERIMENTAL -if [ "$CONFIG_QNX4FS_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' QNX4FS write support (DANGEROUS)' CONFIG_QNX4FS_RW -fi +dep_mbool ' QNX4FS write support (DANGEROUS)' CONFIG_QNX4FS_RW $CONFIG_QNX4FS_FS $CONFIG_EXPERIMENTAL tristate 'ROM file system support' CONFIG_ROMFS_FS tristate 'Second extended fs support' CONFIG_EXT2_FS tristate 'System V and Coherent file system support (read only)' CONFIG_SYSV_FS -if [ "$CONFIG_SYSV_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' SYSV file system write support (DANGEROUS)' CONFIG_SYSV_FS_WRITE -fi +dep_mbool ' SYSV file system write support (DANGEROUS)' CONFIG_SYSV_FS_WRITE $CONFIG_SYSV_FS $CONFIG_EXPERIMENTAL tristate 'UDF file system support (read only)' CONFIG_UDF_FS -if [ "$CONFIG_UDF_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' UDF write support (DANGEROUS)' CONFIG_UDF_RW -fi +dep_mbool ' UDF write support (DANGEROUS)' CONFIG_UDF_RW $CONFIG_UDF_FS $CONFIG_EXPERIMENTAL tristate 'UFS file system support (read only)' CONFIG_UFS_FS -if [ "$CONFIG_UFS_FS" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' UFS file system write support (DANGEROUS)' CONFIG_UFS_FS_WRITE -fi +dep_mbool ' UFS file system write support (DANGEROUS)' CONFIG_UFS_FS_WRITE $CONFIG_UFS_FS $CONFIG_EXPERIMENTAL if [ "$CONFIG_NET" = "y" ]; then -mainmenu_option next_comment -comment 'Network File Systems' - -if [ "$CONFIG_INET" = "y" ]; then - tristate 'Coda file system support (advanced network fs)' CONFIG_CODA_FS + mainmenu_option next_comment + comment 'Network File Systems' - tristate 'NFS file system support' CONFIG_NFS_FS + dep_tristate 'Coda file system support (advanced network fs)' CONFIG_CODA_FS $CONFIG_INET + dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET + dep_mbool ' Provide NFSv3 client support (EXPERIMENTAL)' CONFIG_NFS_V3 $CONFIG_NFS_FS dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP - tristate 'NFS server support' CONFIG_NFSD - if [ "$CONFIG_NFSD" != "n" ]; then - bool ' Provide NFSv3 server support (EXPERIMENTAL)' CONFIG_NFSD_V3 - fi + dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET + dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD if [ "$CONFIG_NFS_FS" = "y" -o "$CONFIG_NFSD" = "y" ]; then define_tristate CONFIG_SUNRPC y @@ -109,19 +90,24 @@ if [ "$CONFIG_INET" = "y" ]; then define_tristate CONFIG_LOCKD n fi fi - if [ "$CONFIG_NFSD_V3" = "y" ]; then + if [ "$CONFIG_NFSD_V3" = "y" -o "$CONFIG_NFS_V3" = "y" ]; then define_bool CONFIG_LOCKD_V4 y fi - tristate 'SMB file system support (to mount Windows shares etc.)' CONFIG_SMB_FS -fi -if [ "$CONFIG_IPX" != "n" -o "$CONFIG_INET" != "n" ]; then - tristate 'NCP file system support (to mount NetWare volumes)' CONFIG_NCP_FS - if [ "$CONFIG_NCP_FS" != "n" ]; then + + dep_tristate 'SMB file system support (to mount Windows shares etc.)' CONFIG_SMB_FS $CONFIG_INET + + if [ "$CONFIG_IPX" != "n" -o "$CONFIG_INET" != "n" ]; then + tristate 'NCP file system support (to mount NetWare volumes)' CONFIG_NCP_FS source fs/ncpfs/Config.in + else + # for fs/nls/Config.in + define_bool CONFIG_NCPFS_NLS n fi -fi + endmenu -endmenu +else + # for fs/nls/Config.in + define_bool CONFIG_NCPFS_NLS n fi mainmenu_option next_comment diff --git a/fs/Makefile b/fs/Makefile index 74fc394ed..9219a138f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -19,7 +19,7 @@ MOD_LIST_NAME := FS_MODULES ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \ hpfs sysv smbfs ncpfs ufs efs affs romfs autofs hfs lockd \ nfsd nls devpts devfs adfs partitions qnx4 udf bfs cramfs \ - openpromfs autofs4 + openpromfs autofs4 ramfs SUB_DIRS := partitions @@ -45,6 +45,14 @@ else endif endif +ifeq ($(CONFIG_RAMFS),y) +SUB_DIRS += ramfs +else + ifeq ($(CONFIG_RAMFS),m) + MOD_SUB_DIRS += ramfs + endif +endif + ifeq ($(CONFIG_CODA_FS),y) SUB_DIRS += coda else diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index c9226513e..a5fcc1a54 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -30,7 +30,7 @@ static int adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_dentry->d_inode; - struct super_block *sb = filp->f_dentry->d_sb; + struct super_block *sb = inode->i_sb; struct adfs_dir_ops *ops = sb->u.adfs_sb.s_dir; struct object_info obj; struct adfs_dir dir; diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index d861dc59d..b5626e5bb 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -78,8 +78,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, /* Make sure entry is mounted and unused; note that dentry will point to the mounted-on-top root. */ - if ( !S_ISDIR(dentry->d_inode->i_mode) - || dentry->d_mounts == dentry ) { + if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 1540ceda8..06e2e86ea 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -63,7 +63,7 @@ static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldi /* fall through */ default: while ( onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent) ) { - if ( !ent->dentry || ent->dentry->d_mounts != ent->dentry ) { + if ( !ent->dentry || d_mountpoint(ent->dentry) ) { if (filldir(dirent,ent->name,ent->len,onr,ent->ino) < 0) return 0; filp->f_pos = nr; @@ -117,7 +117,7 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str /* If this is a directory that isn't a mount point, bitch at the daemon and fix it in user space */ - if ( S_ISDIR(dentry->d_inode->i_mode) && dentry->d_mounts == dentry ) { + if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { return !autofs_wait(sbi, &dentry->d_name); } @@ -157,7 +157,7 @@ static int autofs_revalidate(struct dentry * dentry, int flags) return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT); /* Check for a non-mountpoint directory */ - if ( S_ISDIR(dentry->d_inode->i_mode) && dentry->d_mounts == dentry ) { + if ( S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) ) { if (autofs_oz_mode(sbi)) return 1; else diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index 129d5917f..3b90a077e 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -18,10 +18,10 @@ static int autofs_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, s); } -static struct dentry *autofs_follow_link(struct dentry *dentry, struct dentry *base, unsigned flags) +static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; - return vfs_follow_link(dentry, base, flags, s); + return vfs_follow_link(nd, s); } struct inode_operations autofs_symlink_inode_operations = { diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 2318b9aec..dd3695648 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -47,8 +47,7 @@ static int is_tree_busy(struct dentry *root) } /* Mountpoints don't count */ - if (root->d_mounts != root || - root->d_covers != root) { + if (d_mountpoint(root)) { DPRINTK(("is_tree_busy: mountpoint\n")); count--; } @@ -77,8 +76,7 @@ resume: count += (dentry->d_count - 1); /* Mountpoints don't count */ - if (dentry->d_mounts != dentry || - dentry->d_covers != dentry) { + if (d_mountpoint(dentry)) { DPRINTK(("is_tree_busy: mountpoint\n")); adj++; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 8ff33b344..41dc98984 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -181,7 +181,7 @@ static int try_to_fill_dentry(struct dentry *dentry, /* If this is an unused directory that isn't a mount point, bitch at the daemon and fix it in user space */ if (S_ISDIR(dentry->d_inode->i_mode) && - dentry->d_mounts == dentry && + !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK(("try_to_fill_entry: mounting existing dir\n")); return autofs4_wait(sbi, &dentry->d_name, NFY_MOUNT) == 0; @@ -226,7 +226,7 @@ static int autofs4_root_revalidate(struct dentry * dentry, int flags) /* Check for a non-mountpoint directory with no contents */ if (S_ISDIR(dentry->d_inode->i_mode) && - dentry->d_mounts == dentry && + !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { DPRINTK(("autofs_root_revalidate: dentry=%p %.*s, emptydir\n", dentry, dentry->d_name.len, dentry->d_name.name)); diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index b86b400cf..0bcb312e0 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c @@ -19,13 +19,11 @@ static int autofs4_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, ino->u.symlink); } -static struct dentry * autofs4_follow_link(struct dentry *dentry, - struct dentry *base, - unsigned int flags) +static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct autofs_info *ino = autofs4_dentry_ino(dentry); - return vfs_follow_link(dentry, base, flags, ino->u.symlink); + return vfs_follow_link(nd, ino->u.symlink); } struct inode_operations autofs4_symlink_inode_operations = { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index adb26f415..1744c5eef 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -15,10 +15,11 @@ * so that a bad root inode can at least be unmounted. To do this * we must dput() the base and return the dentry with a dget(). */ -static struct dentry * bad_follow_link(struct dentry *dent, struct dentry *base, unsigned int follow) +static int bad_follow_link(struct dentry *dent, struct nameidata *nd) { - dput(base); - return dget(dent); + dput(nd->dentry); + nd->dentry = dget(dent); + return 0; } static int return_EIO(void) @@ -64,7 +65,7 @@ struct inode_operations bad_inode_ops = }; -/* +/* * When a filesystem is unable to read an inode due to an I/O error in * its read_inode() function, it can call make_bad_inode() to return a * set of stubs which will return EIO errors as required. @@ -72,6 +73,16 @@ struct inode_operations bad_inode_ops = * We only need to do limited initialisation: all other fields are * preinitialised to zero automatically. */ + +/** + * make_bad_inode - mark an inode bad due to an I/O error + * @inode: Inode to mark bad + * + * When an inode cannot be read due to a media or remote network + * failure this function makes the inode 'bad' and causes I/O operations + * on it to fail from this point on + */ + void make_bad_inode(struct inode * inode) { inode->i_mode = S_IFREG; @@ -85,6 +96,14 @@ void make_bad_inode(struct inode * inode) * &bad_inode_ops to cover the case of invalidated inodes as well as * those created by make_bad_inode() above. */ + +/** + * is_bad_inode - is an inode errored + * @inode: inode to test + * + * Returns true if the inode in question has been marked as bad + */ + int is_bad_inode(struct inode * inode) { return (inode->i_op == &bad_inode_ops); diff --git a/fs/dcache.c b/fs/dcache.c index d4aef49e7..24e2f2568 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -30,11 +30,6 @@ /* Right now the dcache depends on the kernel lock */ #define check_lock() if (!kernel_locked()) BUG() -/* For managing the dcache */ -extern unsigned long num_physpages, page_cache_size; -extern int inodes_stat[]; -#define nr_inodes (inodes_stat[0]) - kmem_cache_t *dentry_cache; /* @@ -87,8 +82,8 @@ static inline void dentry_iput(struct dentry * dentry) } } -/* - * dput() +/* + * dput * * This is complicated by the fact that we do not want to put * dentries that are no longer on any hash chain on the unused @@ -103,6 +98,17 @@ static inline void dentry_iput(struct dentry * dentry) * on the compiler to always get this right (gcc generally doesn't). * Real recursion would eat up our stack space. */ + +/* + * dput - release a dentry + * @dentry: dentry to release + * + * Release a dentry. This will drop the usage count and if appropriate + * call the dentry unlink method as well as removing it from the queues and + * releasing its resources. If the parent dentries were scheduled for release + * they too may now get deleted. + */ + void dput(struct dentry *dentry) { int count; @@ -166,11 +172,16 @@ out: BUG(); } -/* +/** + * d_invalidate - invalidate a dentry + * @dentry: dentry to invalidate + * * Try to invalidate the dentry if it turns out to be * possible. If there are other dentries that can be - * reached through this one we can't delete it. + * reached through this one we can't delete it and we + * return -EBUSY. On success we return 0. */ + int d_invalidate(struct dentry * dentry) { check_lock(); @@ -225,12 +236,19 @@ static inline void prune_one_dentry(struct dentry * dentry) dput(parent); } -/* +/** + * prune_dcache - shrink the dcache + * @count: number of entries to try and free + * * Shrink the dcache. This is done when we need * more memory, or simply when we need to unmount * something (at which point we need to unuse * all dentries). + * + * This function may fail to free any resources if + * all the dentries are in use. */ + void prune_dcache(int count) { check_lock(); @@ -264,6 +282,16 @@ void prune_dcache(int count) * each dput(), but since the target dentries are all at * the end, it's really just a single traversal. */ + +/** + * shrink_dcache_sb - shrink dcache for a superblock + * @sb: superblock + * + * Shrink the dcache for the specified super block. This + * is used to free the dcache before unmounting a file + * system + */ + void shrink_dcache_sb(struct super_block * sb) { struct list_head *tmp, *next; @@ -307,11 +335,17 @@ repeat: } } -/* +/** + * is_root_busy - check if a root dentry could be freed + * @root: Dentry to work down from + * * Check whether a root dentry would be in use if all of its * child dentries were freed. This allows a non-destructive * test for unmounting a device. + * + * Return non zero if the root is still busy. */ + int is_root_busy(struct dentry *root) { struct dentry *this_parent = root; @@ -353,12 +387,21 @@ resume: * We descend to the next level whenever the d_subdirs * list is non-empty and continue searching. */ + +/** + * have_submounts - check for mounts over a dentry + * @parent: dentry to check. + * + * Return true if the parent or its subdirectories contain + * a mount point + */ + int have_submounts(struct dentry *parent) { struct dentry *this_parent = parent; struct list_head *next; - if (parent->d_mounts != parent) + if (d_mountpoint(parent)) return 1; repeat: next = this_parent->d_subdirs.next; @@ -368,7 +411,7 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; /* Have we found a mount point ? */ - if (dentry->d_mounts != dentry) + if (d_mountpoint(dentry)) return 1; if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; @@ -440,9 +483,13 @@ this_parent->d_parent->d_name.name, this_parent->d_name.name, found); return found; } -/* +/** + * shrink_dcache_parent - prune dcache + * @parent: parent of entries to prune + * * Prune the dcache to remove unused children of the parent dentry. */ + void shrink_dcache_parent(struct dentry * parent) { int found; @@ -482,6 +529,16 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask, zone_t * zone) #define NAME_ALLOC_LEN(len) ((len+16) & ~15) +/** + * d_alloc - allocate a dcache entry + * @parent: parent of entry to allocate + * @name: qstr of the name + * + * Allocates a dentry. It returns NULL if there is insufficient memory + * available. On a success the dentry is returned. The name passed in is + * copied and the copy passed in may be reused after this call. + */ + struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) { char * str; @@ -530,7 +587,11 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) return dentry; } -/* +/** + * d_instantiate - fill in inode information for a dentry + * @entry: dentry to complete + * @inode: inode to attacheto this dentry + * * Fill in inode information in the entry. * * This turns negative dentries into productive full members @@ -540,6 +601,7 @@ struct dentry * d_alloc(struct dentry * parent, const struct qstr *name) * (or otherwise set) by the caller to indicate that it is now * in use by the dcache.. */ + void d_instantiate(struct dentry *entry, struct inode * inode) { if (inode) @@ -547,6 +609,15 @@ void d_instantiate(struct dentry *entry, struct inode * inode) entry->d_inode = inode; } +/** + * d_alloc_root - allocate root dentry + * @root_inode: inode to allocate the root for + * + * Allocate a root ('/') dentry for the inode given. The inode is + * instantiated and returned. NULL is returned if there is insufficient + * memory or the inode passed is NULL. + */ + struct dentry * d_alloc_root(struct inode * root_inode) { struct dentry *res = NULL; @@ -569,6 +640,17 @@ static inline struct list_head * d_hash(struct dentry * parent, unsigned long ha return dentry_hashtable + (hash & D_HASHMASK); } +/** + * d_lookup - search for a dentry + * @parent: parent dentry + * @name: qstr of name we wish to find + * + * Searches the children of the parent dentry for the name in question. If + * the dentry is found its reference count is incremented and the dentry + * is returned. The caller must use d_put to free the entry when it has + * finished using it. NULL is returned on failure. + */ + struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -602,16 +684,23 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) return NULL; } -/* - * An insecure source has sent us a dentry, here we verify it. +/** + * d_validate - verify dentry provided from insecure source + * @dentry: The dentry alleged to be valid + * @dparent: The parent dentry + * @hash: Hash of the dentry + * @len: Length of the name * + * An insecure source has sent us a dentry, here we verify it. * This is used by ncpfs in its readdir implementation. + * Zero is returned in the dentry is invalid. * - * NOTE! Do _not_ dereference the pointers before we have + * NOTE: This function does _not_ dereference the pointers before we have * validated them. We can test the pointer values, but we * must not actually use them until we have found a valid * copy of the pointer in kernel space.. */ + int d_validate(struct dentry *dentry, struct dentry *dparent, unsigned int hash, unsigned int len) { @@ -659,12 +748,19 @@ out: * it from the hash queues and waiting for * it to be deleted later when it has no users */ + +/** + * d_delete - delete a dentry + * @dentry: The dentry to delete + * + * Turn the dentry into a negative dentry if possible, otherwise + * remove it from the hash queues so it can be deleted later + */ + void d_delete(struct dentry * dentry) { check_lock(); - check_lock(); - /* * Are we the only user? */ @@ -680,6 +776,13 @@ void d_delete(struct dentry * dentry) d_drop(dentry); } +/** + * d_rehash - add an entry back to the hash + * @entry: dentry to add to the hash + * + * Adds a dentry to the hash according to its name + */ + void d_rehash(struct dentry * entry) { struct dentry * parent = entry->d_parent; @@ -733,6 +836,16 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target) * the fact that any list-entry can be a head of the list. * Think about it. */ + +/** + * d_move - move a dentry + * @dentry: entry to move + * @target: new dentry + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. + */ + void d_move(struct dentry * dentry, struct dentry * target) { check_lock(); @@ -762,14 +875,24 @@ void d_move(struct dentry * dentry, struct dentry * target) list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); } -/* +/** + * d_path - return the path of a dentry + * @dentry: dentry to report + * @buffer: buffer to return value in + * @buflen: buffer length + * + * Convert a dentry into an ascii path name. If the entry has been deleted + * the string ' (deleted)' is appended. Note that this is ambiguous. Returns + * the buffer. + * * "buflen" should be PAGE_SIZE or more. */ -char * d_path(struct dentry *dentry, char *buffer, int buflen) +char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, + char *buffer, int buflen) { char * end = buffer+buflen; char * retval; - struct dentry * root = current->fs->root; *--end = '\0'; buflen--; @@ -839,7 +962,7 @@ asmlinkage long sys_getcwd(char *buf, unsigned long size) char * cwd; lock_kernel(); - cwd = d_path(pwd, page, PAGE_SIZE); + cwd = d_path(pwd, current->fs->pwdmnt, page, PAGE_SIZE); unlock_kernel(); error = -ERANGE; @@ -860,6 +983,16 @@ asmlinkage long sys_getcwd(char *buf, unsigned long size) * * Trivially implemented using the dcache structure */ + +/** + * is_subdir - is new dentry a subdirectory of old_dentry + * @new_dentry: new dentry + * @old_dentry: old dentry + * + * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). + * Returns 0 otherwise. + */ + int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) { int result; @@ -879,14 +1012,20 @@ int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) return result; } -/* +/** + * find_inode_number - check for dentry with name + * @dir: directory to check + * @name: Name to find. + * * Check whether a dentry already exists for the given name, - * and return the inode number if it has an inode. + * and return the inode number if it has an inode. Otherwise + * 0 is returned. * * This routine is used to post-process directory listings for * filesystems using synthetic inode numbers, and is necessary * to keep getcwd() working. */ + ino_t find_inode_number(struct dentry *dir, struct qstr *name) { struct dentry * dentry; diff --git a/fs/devfs/base.c b/fs/devfs/base.c index ab1e9ee83..eb38eff34 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -3091,16 +3091,14 @@ static int devfs_readlink (struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, name); } /* End Function devfs_readlink */ -static struct dentry *devfs_follow_link (struct dentry *dentry, - struct dentry *base, - unsigned int follow) +static int devfs_follow_link (struct dentry *dentry, struct nameidata *nd) { struct devfs_inode *di=get_devfs_inode_from_vfs_inode(dentry->d_inode); char *name = ERR_PTR(-ENOENT); if (di && di->de->registered) name = di->de->u.symlink.linkname; - return vfs_follow_link(dentry, base, follow, name); + return vfs_follow_link(nd, name); } /* End Function devfs_follow_link */ static struct inode_operations devfs_iops = @@ -3393,12 +3391,11 @@ int __init init_devfs_fs (void) void __init mount_devfs_fs (void) { int err; - extern int do_mount (struct block_device *bdev, const char *dev_name, - const char *dir_name, const char * type, int flags, - void * data); + extern long do_sys_mount (char *dev_name, char *dir_name, + char * type, int flags, void * data); if ( (boot_options & OPTION_NOMOUNT) ) return; - err = do_mount (NULL, "none", "/dev", "devfs", 0, ""); + err = do_sys_mount ("none", "/dev", "devfs", 0, ""); if (err == 0) printk ("Mounted devfs on /dev\n"); else printk ("Warning: unable to mount devfs, err: %d\n", err); } /* End Function mount_devfs_fs */ diff --git a/fs/dquot.c b/fs/dquot.c index c602697f5..64416f9ee 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -716,15 +716,16 @@ int remove_inode_dquot_ref(struct inode *inode, short type, struct list_head *to } inode->i_flags &= ~S_QUOTA; put_it: - if (dquot != NODQUOT) + if (dquot != NODQUOT) { if (dqput_blocks(dquot)) { if (dquot->dq_count != 1) printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", dquot->dq_count); list_add(&dquot->dq_free, tofree_head); /* As dquot must have currently users it can't be on the free list... */ return 1; - } - else + } else { dqput(dquot); /* We have guaranteed we won't block */ + } + } return 0; } @@ -793,20 +794,12 @@ static inline int need_print_warning(struct dquot *dquot, int flag) static void print_warning(struct dquot *dquot, int flag, const char *fmtstr) { - struct dentry *root; - char *path, *buffer; - if (!need_print_warning(dquot, flag)) return; - root = dquot->dq_sb->s_root; - dget(root); - buffer = (char *) __get_free_page(GFP_KERNEL); - path = buffer ? d_path(root, buffer, PAGE_SIZE) : "?"; - sprintf(quotamessage, fmtstr, path, quotatypes[dquot->dq_type]); - free_page((unsigned long) buffer); + sprintf(quotamessage, fmtstr, + bdevname(dquot->dq_sb->s_dev), quotatypes[dquot->dq_type]); tty_write_message(current->tty, quotamessage); dquot->dq_flags |= flag; - dput(root); } static inline char ignore_hardlimit(struct dquot *dquot) @@ -1469,7 +1462,7 @@ static int quota_on(struct super_block *sb, short type, char *path) if (IS_ERR(tmp)) goto out_lock; - f = filp_open(tmp, O_RDWR, 0600, NULL); + f = filp_open(tmp, O_RDWR, 0600); putname(tmp); error = PTR_ERR(f); @@ -112,7 +112,7 @@ asmlinkage long sys_uselib(const char * library) goto out; file = fget(fd); retval = -ENOEXEC; - if (file && file->f_dentry && file->f_op && file->f_op->read) { + if (file && file->f_op && file->f_op->read) { spin_lock(&binfmt_lock); for (fmt = formats ; fmt ; fmt = fmt->next) { if (!fmt->load_shlib) @@ -315,27 +315,30 @@ int setup_arg_pages(struct linux_binprm *bprm) return 0; } +/* MOUNT_REWRITE: &mnt should be passed to lookup_dentry */ struct file *open_exec(const char *name) { struct dentry *dentry; + struct vfsmount *mnt = NULL; struct file *file; lock_kernel(); - dentry = lookup_dentry(name, NULL, LOOKUP_FOLLOW); + dentry = lookup_dentry(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE); file = (struct file*) dentry; if (!IS_ERR(dentry)) { file = ERR_PTR(-EACCES); - if (dentry->d_inode && S_ISREG(dentry->d_inode->i_mode)) { + if (S_ISREG(dentry->d_inode->i_mode)) { int err = permission(dentry->d_inode, MAY_EXEC); file = ERR_PTR(err); if (!err) { - file = dentry_open(dentry, O_RDONLY); + file = dentry_open(dentry, mnt, O_RDONLY); out: unlock_kernel(); return file; } } dput(dentry); + mntput(mnt); } goto out; } @@ -860,7 +863,6 @@ int do_coredump(long signr, struct pt_regs * regs) struct linux_binfmt * binfmt; char corename[6+sizeof(current->comm)]; struct file * file; - struct dentry * dentry; struct inode * inode; lock_kernel(); @@ -879,17 +881,16 @@ int do_coredump(long signr, struct pt_regs * regs) #else corename[4] = '\0'; #endif - file = filp_open(corename, O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600, NULL); + file = filp_open(corename, O_CREAT | 2 | O_TRUNC | O_NOFOLLOW, 0600); if (IS_ERR(file)) goto fail; - dentry = file->f_dentry; - inode = dentry->d_inode; + inode = file->f_dentry->d_inode; if (inode->i_nlink > 1) goto close_fail; /* multiple links - don't dump */ if (!S_ISREG(inode->i_mode)) goto close_fail; - if (!inode->i_fop) + if (!file->f_op) goto close_fail; if (!file->f_op->write) goto close_fail; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 79caca33b..c5bc9471d 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -182,61 +182,71 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry) return NULL; } +static inline void ext2_set_de_type(struct super_block *sb, + struct ext2_dir_entry_2 *de, + umode_t mode) { + if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) + return; + if (S_ISREG(mode)) + de->file_type = EXT2_FT_REG_FILE; + else if (S_ISDIR(mode)) + de->file_type = EXT2_FT_DIR; + else if (S_ISLNK(mode)) + de->file_type = EXT2_FT_SYMLINK; + else if (S_ISSOCK(mode)) + de->file_type = EXT2_FT_SOCK; + else if (S_ISFIFO(mode)) + de->file_type = EXT2_FT_FIFO; + else if (S_ISCHR(mode)) + de->file_type = EXT2_FT_CHRDEV; + else if (S_ISBLK(mode)) + de->file_type = EXT2_FT_BLKDEV; +} + /* * ext2_add_entry() * - * adds a file entry to the specified directory, using the same - * semantics as ext2_find_entry(). It returns NULL if it failed. - * - * NOTE!! The inode part of 'de' is left at 0 - which means you - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. + * adds a file entry to the specified directory. */ -static struct buffer_head * ext2_add_entry (struct inode * dir, - const char * name, int namelen, - struct ext2_dir_entry_2 ** res_dir, - int *err) +int ext2_add_entry (struct inode * dir, const char * name, int namelen, + struct inode *inode) { unsigned long offset; unsigned short rec_len; struct buffer_head * bh; struct ext2_dir_entry_2 * de, * de1; struct super_block * sb; + int retval; - *err = -EINVAL; - *res_dir = NULL; if (!dir || !dir->i_nlink) - return NULL; + return -EINVAL; sb = dir->i_sb; if (!namelen) - return NULL; + return -EINVAL; /* * Is this a busy deleted directory? Can't create new files if so */ if (dir->i_size == 0) { - *err = -ENOENT; - return NULL; + return -ENOENT; } - bh = ext2_bread (dir, 0, 0, err); + bh = ext2_bread (dir, 0, 0, &retval); if (!bh) - return NULL; + return retval; rec_len = EXT2_DIR_REC_LEN(namelen); offset = 0; de = (struct ext2_dir_entry_2 *) bh->b_data; - *err = -ENOSPC; while (1) { if ((char *)de >= sb->s_blocksize + bh->b_data) { brelse (bh); bh = NULL; - bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, err); + bh = ext2_bread (dir, offset >> EXT2_BLOCK_SIZE_BITS(sb), 1, &retval); if (!bh) - return NULL; + return retval; if (dir->i_size <= offset) { if (dir->i_size == 0) { - *err = -ENOENT; - return NULL; + return -ENOENT; } ext2_debug ("creating next block\n"); @@ -256,14 +266,12 @@ static struct buffer_head * ext2_add_entry (struct inode * dir, } if (!ext2_check_dir_entry ("ext2_add_entry", dir, de, bh, offset)) { - *err = -ENOENT; brelse (bh); - return NULL; + return -ENOENT; } if (ext2_match (namelen, name, de)) { - *err = -EEXIST; brelse (bh); - return NULL; + return -EEXIST; } if ((le32_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) || (le16_to_cpu(de->rec_len) >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) { @@ -276,7 +284,11 @@ static struct buffer_head * ext2_add_entry (struct inode * dir, de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(de->name_len)); de = de1; } - de->inode = 0; + if (inode) { + de->inode = cpu_to_le32(inode->i_ino); + ext2_set_de_type(dir->i_sb, de, inode->i_mode); + } else + de->inode = 0; de->name_len = namelen; de->file_type = 0; memcpy (de->name, name, namelen); @@ -296,22 +308,26 @@ static struct buffer_head * ext2_add_entry (struct inode * dir, mark_inode_dirty(dir); dir->i_version = ++event; mark_buffer_dirty(bh, 1); - *res_dir = de; - *err = 0; - return bh; + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + brelse(bh); + return 0; } offset += le16_to_cpu(de->rec_len); de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); } brelse (bh); - return NULL; + return -ENOSPC; } /* * ext2_delete_entry deletes a directory entry by merging it with the * previous entry */ -static int ext2_delete_entry (struct ext2_dir_entry_2 * dir, +static int ext2_delete_entry (struct inode * dir, + struct ext2_dir_entry_2 * de_del, struct buffer_head * bh) { struct ext2_dir_entry_2 * de, * pde; @@ -324,13 +340,19 @@ static int ext2_delete_entry (struct ext2_dir_entry_2 * dir, if (!ext2_check_dir_entry ("ext2_delete_entry", NULL, de, bh, i)) return -EIO; - if (de == dir) { + if (de == de_del) { if (pde) pde->rec_len = cpu_to_le16(le16_to_cpu(pde->rec_len) + - le16_to_cpu(dir->rec_len)); + le16_to_cpu(de->rec_len)); else - dir->inode = 0; + de->inode = 0; + dir->i_version = ++event; + mark_buffer_dirty(bh, 1); + if (IS_SYNC(dir)) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } return 0; } i += le16_to_cpu(de->rec_len); @@ -340,27 +362,6 @@ static int ext2_delete_entry (struct ext2_dir_entry_2 * dir, return -ENOENT; } -static inline void ext2_set_de_type(struct super_block *sb, - struct ext2_dir_entry_2 *de, - umode_t mode) { - if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) - return; - if (S_ISREG(mode)) - de->file_type = EXT2_FT_REG_FILE; - else if (S_ISDIR(mode)) - de->file_type = EXT2_FT_DIR; - else if (S_ISLNK(mode)) - de->file_type = EXT2_FT_SYMLINK; - else if (S_ISSOCK(mode)) - de->file_type = EXT2_FT_SOCK; - else if (S_ISFIFO(mode)) - de->file_type = EXT2_FT_FIFO; - else if (S_ISCHR(mode)) - de->file_type = EXT2_FT_CHRDEV; - else if (S_ISBLK(mode)) - de->file_type = EXT2_FT_BLKDEV; -} - /* * By the time this is called, we already have created * the directory cache entry for the new file, but it @@ -372,38 +373,28 @@ static inline void ext2_set_de_type(struct super_block *sb, static int ext2_create (struct inode * dir, struct dentry * dentry, int mode) { struct inode * inode; - struct buffer_head * bh; - struct ext2_dir_entry_2 * de; - int err = -EIO; + int err; /* * N.B. Several error exits in ext2_new_inode don't set err. */ inode = ext2_new_inode (dir, mode, &err); if (!inode) - return err; + return -EIO; inode->i_op = &ext2_file_inode_operations; inode->i_fop = &ext2_file_operations; inode->i_mapping->a_ops = &ext2_aops; inode->i_mode = mode; mark_inode_dirty(inode); - bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); - if (!bh) { + err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, + inode); + if (err) { inode->i_nlink--; mark_inode_dirty(inode); iput (inode); return err; } - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type(dir->i_sb, de, S_IFREG); - dir->i_version = ++event; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } - brelse (bh); d_instantiate(dentry, inode); return 0; } @@ -411,56 +402,42 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode) static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) { struct inode * inode; - struct buffer_head * bh; - struct ext2_dir_entry_2 * de; - int err = -EIO; + int err; inode = ext2_new_inode (dir, mode, &err); if (!inode) - goto out; + return -EIO; inode->i_uid = current->fsuid; init_special_inode(inode, mode, rdev); - bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); - if (!bh) + err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, + inode); + if (err) goto out_no_entry; - de->inode = cpu_to_le32(inode->i_ino); - dir->i_version = ++event; - ext2_set_de_type(dir->i_sb, de, inode->i_mode); mark_inode_dirty(inode); - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } d_instantiate(dentry, inode); - brelse(bh); - err = 0; -out: - return err; + return 0; out_no_entry: inode->i_nlink--; mark_inode_dirty(inode); iput(inode); - goto out; + return err; } static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) { struct inode * inode; - struct buffer_head * bh, * dir_block; + struct buffer_head * dir_block; struct ext2_dir_entry_2 * de; int err; - err = -EMLINK; if (dir->i_nlink >= EXT2_LINK_MAX) - goto out; + return -EMLINK; - err = -EIO; inode = ext2_new_inode (dir, S_IFDIR, &err); if (!inode) - goto out; + return -EIO; inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; @@ -471,7 +448,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_nlink--; /* is this nlink == 0? */ mark_inode_dirty(inode); iput (inode); - return err; + return -EIO; } de = (struct ext2_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); @@ -492,31 +469,21 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_mode & S_ISGID) inode->i_mode |= S_ISGID; mark_inode_dirty(inode); - bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); - if (!bh) + err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, + inode); + if (err) goto out_no_entry; - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type(dir->i_sb, de, S_IFDIR); - dir->i_version = ++event; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } dir->i_nlink++; dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); d_instantiate(dentry, inode); - brelse (bh); - err = 0; -out: - return err; + return 0; out_no_entry: inode->i_nlink = 0; mark_inode_dirty(inode); iput (inode); - goto out; + return err; } /* @@ -604,15 +571,9 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry) if (!empty_dir (inode)) goto end_rmdir; - retval = ext2_delete_entry (de, bh); - dir->i_version = ++event; + retval = ext2_delete_entry(dir, de, bh); if (retval) goto end_rmdir; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } if (inode->i_nlink != 2) ext2_warning (inode->i_sb, "ext2_rmdir", "empty directory has nlink!=2 (%d)", @@ -657,15 +618,9 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) inode->i_ino, inode->i_nlink); inode->i_nlink = 1; } - retval = ext2_delete_entry (de, bh); + retval = ext2_delete_entry(dir, de, bh); if (retval) goto end_unlink; - dir->i_version = ++event; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } dir->i_ctime = dir->i_mtime = CURRENT_TIME; dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); @@ -683,18 +638,14 @@ end_unlink: static int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * symname) { struct inode * inode; - struct ext2_dir_entry_2 * de; - struct buffer_head * bh = NULL; int l, err; - err = -ENAMETOOLONG; l = strlen(symname)+1; if (l > dir->i_sb->s_blocksize) - goto out; + return -ENAMETOOLONG; - err = -EIO; if (!(inode = ext2_new_inode (dir, S_IFLNK, &err))) - goto out; + return -EIO; inode->i_mode = S_IFLNK | S_IRWXUGO; @@ -711,36 +662,24 @@ static int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * } mark_inode_dirty(inode); - bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); - if (!bh) + err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, + inode); + if (err) goto out_no_entry; - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type(dir->i_sb, de, S_IFLNK); - dir->i_version = ++event; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } - brelse (bh); d_instantiate(dentry, inode); - err = 0; -out: - return err; + return 0; out_no_entry: inode->i_nlink--; mark_inode_dirty(inode); iput (inode); - goto out; + return err; } static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - struct ext2_dir_entry_2 * de; - struct buffer_head * bh; int err; if (S_ISDIR(inode->i_mode)) @@ -748,20 +687,12 @@ static int ext2_link (struct dentry * old_dentry, if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; - - bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); - if (!bh) + + err = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, + inode); + if (err) return err; - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type(dir->i_sb, de, inode->i_mode); - dir->i_version = ++event; - mark_buffer_dirty(bh, 1); - if (IS_SYNC(dir)) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); - } - brelse (bh); inode->i_nlink++; inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); @@ -829,14 +760,26 @@ static int ext2_rename (struct inode * old_dir, struct dentry *old_dentry, goto end_rename; } if (!new_bh) { - new_bh = ext2_add_entry (new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, &new_de, - &retval); - if (!new_bh) + retval = ext2_add_entry (new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, + old_inode); + if (retval) goto end_rename; + } else { + new_de->inode = le32_to_cpu(old_inode->i_ino); + if (EXT2_HAS_INCOMPAT_FEATURE(new_dir->i_sb, + EXT2_FEATURE_INCOMPAT_FILETYPE)) + new_de->file_type = old_de->file_type; + new_dir->i_version = ++event; + mark_buffer_dirty(new_bh, 1); + if (IS_SYNC(new_dir)) { + ll_rw_block (WRITE, 1, &new_bh); + wait_on_buffer (new_bh); + } + brelse(new_bh); + new_bh = NULL; } - new_dir->i_version = ++event; - + /* * Like most other Unix systems, set the ctime for inodes on a * rename. @@ -847,14 +790,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry *old_dentry, /* * ok, that's it */ - new_de->inode = le32_to_cpu(old_inode->i_ino); - if (EXT2_HAS_INCOMPAT_FEATURE(new_dir->i_sb, - EXT2_FEATURE_INCOMPAT_FILETYPE)) - new_de->file_type = old_de->file_type; - - ext2_delete_entry (old_de, old_bh); + ext2_delete_entry(old_dir, old_de, old_bh); - old_dir->i_version = ++event; if (new_inode) { new_inode->i_nlink--; new_inode->i_ctime = CURRENT_TIME; @@ -877,16 +814,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry *old_dentry, mark_inode_dirty(new_dir); } } - mark_buffer_dirty(old_bh, 1); - if (IS_SYNC(old_dir)) { - ll_rw_block (WRITE, 1, &old_bh); - wait_on_buffer (old_bh); - } - mark_buffer_dirty(new_bh, 1); - if (IS_SYNC(new_dir)) { - ll_rw_block (WRITE, 1, &new_bh); - wait_on_buffer (new_bh); - } retval = 0; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index a68289d71..aa6a599fc 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -197,6 +197,8 @@ static int parse_options (char * options, unsigned long * sb_block, set_opt (*mount_options, GRPID); else if (!strcmp (this_char, "minixdf")) set_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nocheck")) + clear_opt (*mount_options, CHECK); else if (!strcmp (this_char, "nogrpid") || !strcmp (this_char, "sysvgroups")) clear_opt (*mount_options, GRPID); @@ -300,13 +302,6 @@ static void ext2_setup_super (struct super_block * sb, } #endif } -#if 0 /* ibasket's still have unresolved bugs... -DaveM */ - - /* [T. Schoebel-Theuer] This limit should be maintained on disk. - * This is just provisionary. - */ - sb->s_ibasket_max = 100; -#endif } static int ext2_check_descriptors (struct super_block * sb) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index d56ef4c62..d992efbe7 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -25,10 +25,10 @@ static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, s); } -static struct dentry *ext2_follow_link(struct dentry *dentry, struct dentry *base, unsigned flags) +static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s = (char *)dentry->d_inode->u.ext2_i.i_data; - return vfs_follow_link(dentry, base, flags, s); + return vfs_follow_link(nd, s); } struct inode_operations ext2_fast_symlink_inode_operations = { diff --git a/fs/file_table.c b/fs/file_table.c index 3f795576e..ee7be9890 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -122,13 +122,17 @@ int init_private_file(struct file *filp, struct dentry *dentry, int mode) static void __fput(struct file *filp) { struct dentry * dentry = filp->f_dentry; + struct vfsmount * mnt = filp->f_vfsmnt; struct inode * inode = dentry->d_inode; if (filp->f_op && filp->f_op->release) filp->f_op->release(inode, filp); filp->f_dentry = NULL; + filp->f_vfsmnt = NULL; if (filp->f_mode & FMODE_WRITE) put_write_access(inode); + if (mnt) + mntput(mnt); dput(dentry); } diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index debe0a967..07f4ab93d 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -314,14 +314,10 @@ int hfs_rmdir(struct inode * parent, struct dentry *dentry) if (!d_unhashed(dentry)) goto hfs_rmdir_put; - if (/* we only have to worry about 2 and 3 for mount points */ - (victim->sys_entry[2] && - (victim->sys_entry[2]->d_mounts != - victim->sys_entry[2]->d_covers)) || - (victim->sys_entry[3] && - (victim->sys_entry[3]->d_mounts != - victim->sys_entry[3]->d_covers)) - ) + /* we only have to worry about 2 and 3 for mount points */ + if (victim->sys_entry[2] && d_mountpoint(victim->sys_entry[2])) + goto hfs_rmdir_put; + if (victim->sys_entry[3] && d_mountpoint(victim->sys_entry[3])) goto hfs_rmdir_put; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index a2f33d56d..51d519d7f 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -23,7 +23,7 @@ loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) loff_t pos; struct quad_buffer_head qbh; struct inode *i = filp->f_dentry->d_inode; - struct super_block *s = filp->f_dentry->d_sb; + struct super_block *s = i->i_sb; /*printk("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; hpfs_lock_inode(i); diff --git a/fs/inode.c b/fs/inode.c index c4915bba1..de7267c84 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -111,6 +111,14 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) * In short, make sure you hash any inodes _before_ * you start marking them dirty.. */ + +/** + * __mark_inode_dirty - internal function + * @inode: inode to mark + * + * Mark an inode as dirty. Callers should use mark_inode_dirty + */ + void __mark_inode_dirty(struct inode *inode) { struct super_block * sb = inode->i_sb; @@ -202,10 +210,14 @@ static inline void sync_list(struct list_head *head) sync_one(list_entry(tmp, struct inode, i_list)); } -/* - * "sync_inodes()" goes through the super block's dirty list, - * writes them out, and puts them back on the normal list. +/** + * sync_inodes + * @dev: device to sync the inodes from. + * + * sync_inodes goes through the super block's dirty list, + * writes them out, and puts them back on the normal list. */ + void sync_inodes(kdev_t dev) { struct super_block * sb = sb_entry(super_blocks.next); @@ -241,9 +253,14 @@ static void sync_all_inodes(void) } } -/* - * Needed by knfsd +/** + * write_inode_now - write an inode to disk + * @inode: inode to write to disk + * + * This function commits an inode to disk immediately if it is + * dirty. This is primarily needed by knfsd. */ + void write_inode_now(struct inode *inode) { struct super_block * sb = inode->i_sb; @@ -258,11 +275,15 @@ void write_inode_now(struct inode *inode) printk("write_inode_now: no super block\n"); } -/* +/** + * clear_inode - clear an inode + * @inode: inode to clear + * * This is called by the filesystem to tell us * that the inode is no longer useful. We just * terminate it with extreme prejudice. */ + void clear_inode(struct inode *inode) { if (inode->i_data.nrpages) @@ -346,6 +367,16 @@ static int invalidate_list(struct list_head *head, struct super_block * sb, stru * is because we don't want to sleep while messing * with the global lists.. */ + +/** + * invalidate_inodes - discard the inodes on a device + * @sb: superblock + * + * Discard all of the inodes for a given superblock. If the discard + * fails because there are busy inodes then a non zero value is returned. + * If the discard is successful all the inodes are dicarded. + */ + int invalidate_inodes(struct super_block * sb) { int busy; @@ -490,12 +521,19 @@ static void clean_inode(struct inode *inode) inode->i_mapping = &inode->i_data; } -/* +/** + * get_empty_inode - obtain an inode + * * This is called by things like the networking layer * etc that want to get an inode without any inode * number, or filesystems that allocate new inodes with * no pre-existing information. + * + * On a successful return the inode pointer is returned. On a failure + * a NULL pointer is returned. The returned inode is not on any superblock + * lists. */ + struct inode * get_empty_inode(void) { static unsigned long last_ino = 0; @@ -585,6 +623,22 @@ static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) } /* Yeah, I know about quadratic hash. Maybe, later. */ + +/** + * iunique - get a unique inode number + * @sb: superblock + * @max_reserved: highest reserved inode number + * + * Obtain an inode number that is unique on the system for a given + * superblock. This is used by file systems that have no natural + * permanent inode numbering system. An inode number is returned that + * is higher than the reserved limit but unique. + * + * BUGS: + * With a large number of inodes live on the file system this function + * currently becomes quite slow. + */ + ino_t iunique(struct super_block *sb, ino_t max_reserved) { static ino_t counter = 0; @@ -625,6 +679,7 @@ struct inode *igrab(struct inode *inode) return inode; } + struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) { struct list_head * head = inode_hashtable + hash(sb,ino); @@ -647,6 +702,14 @@ struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find return get_new_inode(sb, ino, head, find_actor, opaque); } +/** + * insert_inode_hash - hash an inode + * @inode: unhashed inode + * + * Add an inode to the inode hash for this superblock. If the inode + * has no superblock it is added to a seperate anonymous chain + */ + void insert_inode_hash(struct inode *inode) { struct list_head *head = &anon_hash_chain; @@ -657,6 +720,13 @@ void insert_inode_hash(struct inode *inode) spin_unlock(&inode_lock); } +/** + * remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock or anonymous hash + */ + void remove_inode_hash(struct inode *inode) { spin_lock(&inode_lock); @@ -665,6 +735,14 @@ void remove_inode_hash(struct inode *inode) spin_unlock(&inode_lock); } +/** + * iput - put an inode + * @inode: inode to put + * + * Puts an inode, dropping its usage count. If the inode use count hits + * zero the inode is also then freed and may be destroyed. + */ + void iput(struct inode *inode) { if (inode) { @@ -686,11 +764,12 @@ void iput(struct inode *inode) inode->i_state|=I_FREEING; spin_unlock(&inode_lock); + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + destroy = 1; if (op && op->delete_inode) { void (*delete)(struct inode *) = op->delete_inode; - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); /* s_op->delete_inode internally recalls clear_inode() */ delete(inode); } else @@ -743,6 +822,18 @@ kdevname(inode->i_dev), inode->i_ino, atomic_read(&inode->i_sem.count)); } } +/** + * bmap - find a block number in a file + * @inode: inode of file + * @block: block to find + * + * Returns the block number on the device holding the inode that + * is the disk block number for the block of the file requested. + * That is asked for block 4 of inode 1 the function will return the + * disk block relative to the disk start that holds that block of the + * file + */ + int bmap(struct inode * inode, int block) { int res = 0; @@ -774,13 +865,22 @@ void __init inode_init(void) panic("cannot create inode slab cache"); } +/** + * update_atime - update the access time + * @inode: inode accessed + * + * Update the accessed time on an inode and mark it for writeback. + * This function automatically handles read only file systems and media, + * as well as the noatime flag and inode specific noatime markers + */ + void update_atime (struct inode *inode) { - if ( IS_NOATIME (inode) ) return; - if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return; - if ( IS_RDONLY (inode) ) return; - inode->i_atime = CURRENT_TIME; - mark_inode_dirty (inode); + if ( IS_NOATIME (inode) ) return; + if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return; + if ( IS_RDONLY (inode) ) return; + inode->i_atime = CURRENT_TIME; + mark_inode_dirty (inode); } /* End Function update_atime */ diff --git a/fs/ioctl.c b/fs/ioctl.c index a02bbec67..614cdaf67 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -98,9 +98,7 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) default: error = -ENOTTY; - if (!filp->f_dentry || !filp->f_dentry->d_inode) - error = -ENOENT; - else if (S_ISREG(filp->f_dentry->d_inode->i_mode)) + if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); else if (filp->f_op && filp->f_op->ioctl) error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 7a4674d85..f89188d12 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -152,7 +152,7 @@ nlmclnt_recovery(struct nlm_host *host, u32 newstate) host->h_monitored = 0; host->h_nsmstate = newstate; host->h_state++; - host->h_count++; + nlm_get_host(host); kernel_thread(reclaimer, host, 0); } } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 20b9bb490..376eed9cc 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -106,10 +106,18 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) struct nlm_rqst reqst, *call = &reqst; sigset_t oldset; unsigned long flags; - int status; + int status, proto, vers; - /* Always use NLM version 1 over UDP for now... */ - if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), IPPROTO_UDP, 1))) + vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; + if (NFS_PROTO(inode)->version > 3) { + printk(KERN_NOTICE "NFSv4 file locking not implemented!\n"); + return -ENOLCK; + } + + /* Retrieve transport protocol from NFS client */ + proto = NFS_CLIENT(inode)->cl_xprt->prot; + + if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers))) return -ENOLCK; /* Create RPC client handle if not there, and copy soft @@ -142,6 +150,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) spin_unlock_irqrestore(¤t->sigmask_lock, flags); call = nlmclnt_alloc_call(); + if (!call) { + status = -ENOMEM; + goto out_restore; + } call->a_flags = RPC_TASK_ASYNC; } else { spin_unlock_irqrestore(¤t->sigmask_lock, flags); @@ -165,8 +177,9 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) } if (status < 0 && (call->a_flags & RPC_TASK_ASYNC)) - rpc_free(call); + kfree(call); + out_restore: spin_lock_irqsave(¤t->sigmask_lock, flags); current->blocked = oldset; recalc_sigpending(current); @@ -200,8 +213,7 @@ nlmclnt_alloc_call(void) struct nlm_rqst *call; while (!signalled()) { - call = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC, - sizeof(struct nlm_rqst)); + call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL); if (call) return call; printk("nlmclnt_alloc_call: failed, waiting for memory\n"); @@ -246,6 +258,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc) case -ECONNREFUSED: case -ETIMEDOUT: case -ENOTCONN: + nlm_rebind_host(host); status = -EAGAIN; break; case -ERESTARTSYS: @@ -253,10 +266,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc) default: break; } - if (req->a_args.block) - nlm_rebind_host(host); - else - break; + break; } else if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) { dprintk("lockd: server in grace period\n"); @@ -290,7 +300,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc) * Generic NLM call, async version. */ int -nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -313,9 +323,20 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) msg.rpc_cred = NULL; status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); - /* If the async call is proceeding, increment host refcount */ - if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC)) - host->h_count++; + return status; +} + +int +nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +{ + struct nlm_host *host = req->a_host; + int status; + + /* Increment host refcount */ + nlm_get_host(host); + status = nlmsvc_async_call(req, proc, callback); + if (status < 0) + nlm_release_host(host); return status; } @@ -347,6 +368,20 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) return 0; } +static +void nlmclnt_insert_lock_callback(struct file_lock *fl) +{ + nlm_get_host(fl->fl_u.nfs_fl.host); +} +static +void nlmclnt_remove_lock_callback(struct file_lock *fl) +{ + if (fl->fl_u.nfs_fl.host) { + nlm_release_host(fl->fl_u.nfs_fl.host); + fl->fl_u.nfs_fl.host = NULL; + } +} + /* * LOCK: Try to create a lock * @@ -380,7 +415,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) return -ENOLCK; } - while (1) { + do { if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { if (resp->status != NLM_LCK_BLOCKED) break; @@ -388,11 +423,14 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } if (status < 0) return status; - } + } while (resp->status == NLM_LCK_BLOCKED); if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED; + fl->fl_u.nfs_fl.host = host; + fl->fl_insert = nlmclnt_insert_lock_callback; + fl->fl_remove = nlmclnt_remove_lock_callback; } return nlm_stat_to_errno(resp->status); @@ -444,15 +482,9 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) static int nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { - struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; int status; - /* No monitor, no lock: see nlmclnt_lock(). - * Since this is an UNLOCK, don't try to setup monitoring here. */ - if (!host->h_monitored) - return -ENOLCK; - /* Clean the GRANTED flag now so the lock doesn't get * reclaimed while we're stuck in the unlock call. */ fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED; @@ -487,9 +519,7 @@ nlmclnt_unlock_callback(struct rpc_task *task) if (task->tk_status < 0) { dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); - nlm_rebind_host(req->a_host); - rpc_restart_call(task); - return; + goto retry_unlock; } if (status != NLM_LCK_GRANTED && status != NLM_LCK_DENIED_GRACE_PERIOD) { @@ -497,7 +527,12 @@ nlmclnt_unlock_callback(struct rpc_task *task) } die: - rpc_release_task(task); + nlm_release_host(req->a_host); + kfree(req); + return; + retry_unlock: + nlm_rebind_host(req->a_host); + rpc_restart_call(task); } /* @@ -520,10 +555,9 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) recalc_sigpending(current); spin_unlock_irqrestore(¤t->sigmask_lock, flags); - do { - req = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC, - sizeof(*req)); - } while (req == NULL); + req = nlmclnt_alloc_call(); + if (!req) + return -ENOMEM; req->a_host = host; req->a_flags = RPC_TASK_ASYNC; @@ -532,7 +566,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) status = nlmclnt_async_call(req, NLMPROC_CANCEL, nlmclnt_cancel_callback); if (status < 0) - rpc_free(req); + kfree(req); spin_lock_irqsave(¤t->sigmask_lock, flags); current->blocked = oldset; @@ -573,7 +607,6 @@ nlmclnt_cancel_callback(struct rpc_task *task) } die: - rpc_release_task(task); nlm_release_host(req->a_host); kfree(req); return; @@ -582,7 +615,6 @@ retry_cancel: nlm_rebind_host(req->a_host); rpc_restart_call(task); rpc_delay(task, 30 * HZ); - return; } /* diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 02cfb07f4..dcd33c19b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -15,6 +15,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> +#include <linux/lockd/sm_inter.h> #define NLMDBG_FACILITY NLMDBG_HOSTCACHE @@ -105,8 +106,7 @@ nlm_lookup_host(struct svc_client *clnt, struct sockaddr_in *sin, host->h_next = nlm_hosts[hash]; nlm_hosts[hash] = host; } - host->h_expires = jiffies + NLM_HOST_EXPIRE; - host->h_count++; + nlm_get_host(host); up(&nlm_host_sema); return host; } @@ -172,9 +172,12 @@ nlm_bind_host(struct nlm_host *host) down(&host->h_sema); /* If we've already created an RPC client, check whether - * RPC rebind is required */ + * RPC rebind is required + * Note: why keep rebinding if we're on a tcp connection? + */ if ((clnt = host->h_rpcclnt) != NULL) { - if (time_after_eq(jiffies, host->h_nextrebind)) { + xprt = clnt->cl_xprt; + if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) { clnt->cl_port = 0; host->h_nextrebind = jiffies + NLM_HOST_REBIND; dprintk("lockd: next rebind in %ld jiffies\n", @@ -230,13 +233,27 @@ nlm_rebind_host(struct nlm_host *host) } /* + * Increment NLM host count + */ +struct nlm_host * nlm_get_host(struct nlm_host *host) +{ + if (host) { + dprintk("lockd: get host %s\n", host->h_name); + host->h_count ++; + host->h_expires = jiffies + NLM_HOST_EXPIRE; + } + return host; +} + +/* * Release NLM host after use */ -void -nlm_release_host(struct nlm_host *host) +void nlm_release_host(struct nlm_host *host) { - dprintk("lockd: release host %s\n", host->h_name); - host->h_count -= 1; + if (host && host->h_count) { + dprintk("lockd: release host %s\n", host->h_name); + host->h_count --; + } } /* @@ -308,6 +325,8 @@ nlm_gc_hosts(void) } dprintk("lockd: delete host %s\n", host->h_name); *q = host->h_next; + if (host->h_monitored) + nsm_unmonitor(host); if ((clnt = host->h_rpcclnt) != NULL) { if (clnt->cl_users) { printk(KERN_WARNING diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 55dee3886..283d66e97 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -30,14 +30,12 @@ u32 nsm_local_state = 0; * Common procedure for SM_MON/SM_UNMON calls */ static int -nsm_mon_unmon(struct nlm_host *host, char *what, u32 proc) +nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res) { struct rpc_clnt *clnt; int status; struct nsm_args args; - struct nsm_res res; - dprintk("lockd: nsm_%s(%s)\n", what, host->h_name); status = -EACCES; clnt = nsm_create(); if (!clnt) @@ -47,23 +45,15 @@ nsm_mon_unmon(struct nlm_host *host, char *what, u32 proc) args.prog = NLM_PROGRAM; args.vers = 1; args.proc = NLMPROC_NSM_NOTIFY; + memset(res, 0, sizeof(*res)); - status = rpc_call(clnt, proc, &args, &res, 0); - if (status < 0) { + status = rpc_call(clnt, proc, &args, res, 0); + if (status < 0) printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n", status); - goto out; - } - - status = -EACCES; - if (res.status != 0) { - printk(KERN_NOTICE "lockd: cannot %s %s\n", what, host->h_name); - goto out; - } - - nsm_local_state = res.state; - status = 0; -out: + else + status = 0; + out: return status; } @@ -73,10 +63,16 @@ out: int nsm_monitor(struct nlm_host *host) { + struct nsm_res res; int status; - status = nsm_mon_unmon(host, "monitor", SM_MON); - if (status >= 0) + dprintk("lockd: nsm_monitor(%s)\n", host->h_name); + + status = nsm_mon_unmon(host, SM_MON, &res); + + if (status < 0 || res.status != 0) + printk(KERN_NOTICE "lockd: cannot monitor %s\n", host->h_name); + else host->h_monitored = 1; return status; } @@ -87,9 +83,15 @@ nsm_monitor(struct nlm_host *host) int nsm_unmonitor(struct nlm_host *host) { + struct nsm_res res; int status; - if ((status = nsm_mon_unmon(host, "unmonitor", SM_UNMON)) >= 0) + dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name); + + status = nsm_mon_unmon(host, SM_UNMON, &res); + if (status < 0) + printk(KERN_NOTICE "lockd: cannot unmonitor %s\n", host->h_name); + else host->h_monitored = 0; return status; } @@ -187,7 +189,7 @@ xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp) static int xdr_decode_stat(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp) { - resp->status = ntohl(*p++); + resp->state = ntohl(*p++); return 0; } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index b690eb97e..a1e30454e 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -470,7 +470,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) host = nlmclnt_lookup_host(&rqstp->rq_addr, rqstp->rq_prot, rqstp->rq_vers); if (!host) { - rpc_free(call); + kfree(call); return rpc_system_err; } @@ -478,12 +478,14 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); -/* FIXME this should become nlmSVC_async_call when that code gets - merged in XXX */ - if (nlmclnt_async_call(call, proc, nlm4svc_callback_exit) < 0) - return rpc_system_err; + if (nlmsvc_async_call(call, proc, nlm4svc_callback_exit) < 0) + goto error; return rpc_success; + error: + kfree(call); + nlm_release_host(host); + return rpc_system_err; } static void @@ -496,7 +498,7 @@ nlm4svc_callback_exit(struct rpc_task *task) task->tk_pid, -task->tk_status); } nlm_release_host(call->a_host); - rpc_free(call); + kfree(call); } /* diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 1bed616cb..97a9d27ef 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -94,14 +94,16 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove) struct nlm_block **head, *block; struct file_lock *fl; - dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %ld-%ld ty=%d\n", - file, lock->fl.fl_pid, lock->fl.fl_start, - lock->fl.fl_end, lock->fl.fl_type); + dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n", + file, lock->fl.fl_pid, + (long long)lock->fl.fl_start, + (long long)lock->fl.fl_end, lock->fl.fl_type); for (head = &nlm_blocked; (block = *head); head = &block->b_next) { fl = &block->b_call.a_args.lock.fl; - dprintk("lockd: check f=%p pd=%d %ld-%ld ty=%d cookie=%x\n", - block->b_file, fl->fl_pid, fl->fl_start, - fl->fl_end, fl->fl_type, + dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%x\n", + block->b_file, fl->fl_pid, + (long long)fl->fl_start, + (long long)fl->fl_end, fl->fl_type, *(unsigned int*)(block->b_call.a_args.cookie.data)); if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) { if (remove) @@ -286,12 +288,12 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_block *block; int error; - dprintk("lockd: nlmsvc_lock(%04x/%ld, ty=%d, pi=%d, %ld-%ld, bl=%d)\n", + dprintk("lockd: nlmsvc_lock(%04x/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", file->f_file.f_dentry->d_inode->i_dev, file->f_file.f_dentry->d_inode->i_ino, lock->fl.fl_type, lock->fl.fl_pid, - lock->fl.fl_start, - lock->fl.fl_end, + (long long)lock->fl.fl_start, + (long long)lock->fl.fl_end, wait); /* Lock file against concurrent access */ @@ -365,16 +367,17 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, { struct file_lock *fl; - dprintk("lockd: nlmsvc_testlock(%04x/%ld, ty=%d, %ld-%ld)\n", + dprintk("lockd: nlmsvc_testlock(%04x/%ld, ty=%d, %Ld-%Ld)\n", file->f_file.f_dentry->d_inode->i_dev, file->f_file.f_dentry->d_inode->i_ino, lock->fl.fl_type, - lock->fl.fl_start, - lock->fl.fl_end); + (long long)lock->fl.fl_start, + (long long)lock->fl.fl_end); if ((fl = posix_test_lock(&file->f_file, &lock->fl)) != NULL) { - dprintk("lockd: conflicting lock(ty=%d, %ld-%ld)\n", - fl->fl_type, fl->fl_start, fl->fl_end); + dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", + fl->fl_type, (long long)fl->fl_start, + (long long)fl->fl_end); conflock->caller = "somehost"; /* FIXME */ conflock->oh.len = 0; /* don't return OH info */ conflock->fl = *fl; @@ -396,12 +399,12 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock) { int error; - dprintk("lockd: nlmsvc_unlock(%04x/%ld, pi=%d, %ld-%ld)\n", + dprintk("lockd: nlmsvc_unlock(%04x/%ld, pi=%d, %Ld-%Ld)\n", file->f_file.f_dentry->d_inode->i_dev, file->f_file.f_dentry->d_inode->i_ino, lock->fl.fl_pid, - lock->fl.fl_start, - lock->fl.fl_end); + (long long)lock->fl.fl_start, + (long long)lock->fl.fl_end); /* First, cancel any lock that might be there */ nlmsvc_cancel_blocked(file, lock); @@ -424,12 +427,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) { struct nlm_block *block; - dprintk("lockd: nlmsvc_cancel(%04x/%ld, pi=%d, %ld-%ld)\n", + dprintk("lockd: nlmsvc_cancel(%04x/%ld, pi=%d, %Ld-%Ld)\n", file->f_file.f_dentry->d_inode->i_dev, file->f_file.f_dentry->d_inode->i_ino, lock->fl.fl_pid, - lock->fl.fl_start, - lock->fl.fl_end); + (long long)lock->fl.fl_start, + (long long)lock->fl.fl_end); down(&file->f_sema); if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) @@ -576,7 +579,6 @@ nlmsvc_grant_callback(struct rpc_task *task) block->b_incall = 0; nlm_release_host(call->a_host); - rpc_release_task(task); } /* diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index c92358dec..b0cbd4a50 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -503,10 +503,14 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); - if (nlmclnt_async_call(call, proc, nlmsvc_callback_exit) < 0) - return rpc_system_err; + if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0) + goto error; return rpc_success; + error: + nlm_release_host(host); + kfree(call); + return rpc_system_err; } static void @@ -519,7 +523,6 @@ nlmsvc_callback_exit(struct rpc_task *task) task->tk_pid, -task->tk_status); } nlm_release_host(call->a_host); - rpc_release_task(task); kfree(call); } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index c6395a54f..1d5b5382c 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -34,7 +34,7 @@ static inline unsigned int file_hash(struct nfs_fh *f) { unsigned int tmp=0; int i; - for (i=0; i<NFS_FHSIZE;i++) + for (i=0; i<NFS2_FHSIZE;i++) tmp += f->data[i]; return tmp & (FILE_NRHASH - 1); } @@ -175,7 +175,7 @@ again: lock.fl_type = F_UNLCK; lock.fl_start = 0; - lock.fl_end = NLM_OFFSET_MAX; + lock.fl_end = OFFSET_MAX; if (posix_lock_file(&file->f_file, &lock, 0) < 0) { printk("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index eba3aaef6..9a179d530 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -22,6 +22,25 @@ #define NLMDBG_FACILITY NLMDBG_XDR +static inline loff_t +s32_to_loff_t(__s32 offset) +{ + return (loff_t)offset; +} + +static inline __s32 +loff_t_to_s32(loff_t offset) +{ + __s32 res; + if (offset >= NLM_OFFSET_MAX) + res = NLM_OFFSET_MAX; + else if (offset <= -NLM_OFFSET_MAX) + res = -NLM_OFFSET_MAX; + else + res = offset; + return res; +} + /* * XDR functions for basic NLM types */ @@ -65,22 +84,23 @@ nlm_decode_fh(u32 *p, struct nfs_fh *f) { unsigned int len; - if ((len = ntohl(*p++)) != sizeof(*f)) { + if ((len = ntohl(*p++)) != NFS2_FHSIZE) { printk(KERN_NOTICE "lockd: bad fhandle size %x (should be %Zu)\n", - len, sizeof(*f)); + len, NFS2_FHSIZE); return NULL; } - memcpy(f, p, sizeof(*f)); - return p + XDR_QUADLEN(sizeof(*f)); + f->size = NFS2_FHSIZE; + memcpy(f->data, p, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); } static inline u32 * nlm_encode_fh(u32 *p, struct nfs_fh *f) { - *p++ = htonl(sizeof(*f)); - memcpy(p, f, sizeof(*f)); - return p + XDR_QUADLEN(sizeof(*f)); + *p++ = htonl(NFS2_FHSIZE); + memcpy(p, f->data, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); } /* @@ -102,7 +122,7 @@ static inline u32 * nlm_decode_lock(u32 *p, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - int len; + s32 start, len, end; if (!(p = xdr_decode_string(p, &lock->caller, &len, NLM_MAXSTRLEN)) || !(p = nlm_decode_fh(p, &lock->fh)) @@ -114,10 +134,16 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock) fl->fl_pid = ntohl(*p++); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ - fl->fl_start = ntohl(*p++); + start = ntohl(*p++); len = ntohl(*p++); - if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0) - fl->fl_end = NLM_OFFSET_MAX; + end = start + len - 1; + + fl->fl_start = s32_to_loff_t(start); + + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = s32_to_loff_t(end); return p; } @@ -128,18 +154,26 @@ static u32 * nlm_encode_lock(u32 *p, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; + __s32 start, len; if (!(p = xdr_encode_string(p, lock->caller)) || !(p = nlm_encode_fh(p, &lock->fh)) || !(p = nlm_encode_oh(p, &lock->oh))) return NULL; - *p++ = htonl(fl->fl_pid); - *p++ = htonl(lock->fl.fl_start); - if (lock->fl.fl_end == NLM_OFFSET_MAX) - *p++ = xdr_zero; + if (fl->fl_start > NLM_OFFSET_MAX + || (fl->fl_end > NLM_OFFSET_MAX && fl->fl_end != OFFSET_MAX)) + return NULL; + + start = loff_t_to_s32(fl->fl_start); + if (fl->fl_end == OFFSET_MAX) + len = 0; else - *p++ = htonl(lock->fl.fl_end - lock->fl.fl_start + 1); + len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); + + *p++ = htonl(fl->fl_pid); + *p++ = htonl(start); + *p++ = htonl(len); return p; } @@ -150,6 +184,8 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock) static u32 * nlm_encode_testres(u32 *p, struct nlm_res *resp) { + s32 start, len; + if (!(p = nlm_encode_cookie(p, &resp->cookie))) return 0; *p++ = resp->status; @@ -164,11 +200,14 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp) if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) return 0; - *p++ = htonl(fl->fl_start); - if (fl->fl_end == NLM_OFFSET_MAX) - *p++ = xdr_zero; + start = loff_t_to_s32(fl->fl_start); + if (fl->fl_end == OFFSET_MAX) + len = 0; else - *p++ = htonl(fl->fl_end - fl->fl_start + 1); + len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); + + *p++ = htonl(start); + *p++ = htonl(len); } return p; @@ -387,7 +426,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) resp->status = ntohl(*p++); if (resp->status == NLM_LCK_DENIED) { struct file_lock *fl = &resp->lock.fl; - u32 excl, len; + u32 excl; + s32 start, len, end; memset(&resp->lock, 0, sizeof(resp->lock)); excl = ntohl(*p++); @@ -397,10 +437,15 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) fl->fl_flags = FL_POSIX; fl->fl_type = excl? F_WRLCK : F_RDLCK; - fl->fl_start = ntohl(*p++); + start = ntohl(*p++); len = ntohl(*p++); - if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0) - fl->fl_end = NLM_OFFSET_MAX; + end = start + len - 1; + + fl->fl_start = s32_to_loff_t(start); + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = s32_to_loff_t(end); } return 0; } @@ -487,7 +532,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) #define NLM_caller_sz 1+QUADLEN(sizeof(system_utsname.nodename)) #define NLM_netobj_sz 1+QUADLEN(XDR_MAX_NETOBJ) /* #define NLM_owner_sz 1+QUADLEN(NLM_MAXOWNER) */ -#define NLM_fhandle_sz 1+QUADLEN(NFS_FHSIZE) +#define NLM_fhandle_sz 1+QUADLEN(NFS2_FHSIZE) #define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz #define NLM_holder_sz 4+NLM_netobj_sz diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 025e3c5b0..7ffa0a433 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -20,14 +20,25 @@ #include <linux/lockd/sm_inter.h> #define NLMDBG_FACILITY NLMDBG_XDR -#define OFFSET_MAX ((off_t)LONG_MAX) +static inline loff_t +s64_to_loff_t(__s64 offset) +{ + return (loff_t)offset; +} -static inline off_t -size_to_off_t(__s64 size) + +static inline s64 +loff_t_to_s64(loff_t offset) { - size = (size > (__s64)LONG_MAX) ? (off_t)LONG_MAX : (off_t) size; - return (size < (__s64)-LONG_MAX) ? (off_t)-LONG_MAX : (off_t) size; + s64 res; + if (offset > NLM4_OFFSET_MAX) + res = NLM4_OFFSET_MAX; + else if (offset < -NLM4_OFFSET_MAX) + res = -NLM4_OFFSET_MAX; + else + res = offset; + return res; } /* @@ -73,36 +84,24 @@ static u32 * nlm4_decode_fh(u32 *p, struct nfs_fh *f) { memset(f->data, 0, sizeof(f->data)); -#ifdef NFS_MAXFHSIZE f->size = ntohl(*p++); if (f->size > NFS_MAXFHSIZE) { printk(KERN_NOTICE - "lockd: bad fhandle size %x (should be %d)\n", + "lockd: bad fhandle size %d (should be <=%d)\n", f->size, NFS_MAXFHSIZE); return NULL; } memcpy(f->data, p, f->size); return p + XDR_QUADLEN(f->size); -#else - if (ntohl(*p++) != NFS_FHSIZE) - return NULL; /* for now, all filehandles are 32 bytes */ - memcpy(f->data, p, NFS_FHSIZE); - return p + XDR_QUADLEN(NFS_FHSIZE); -#endif } static u32 * nlm4_encode_fh(u32 *p, struct nfs_fh *f) { -#ifdef NFS_MAXFHSIZE *p++ = htonl(f->size); + if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */ memcpy(p, f->data, f->size); return p + XDR_QUADLEN(f->size); -#else - *p++ = htonl(NFS_FHSIZE); - memcpy(p, f->data, NFS_FHSIZE); - return p + XDR_QUADLEN(NFS_FHSIZE); -#endif } /* @@ -141,11 +140,12 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock) p = xdr_decode_hyper(p, &len); end = start + len - 1; - fl->fl_start = size_to_off_t(start); - fl->fl_end = size_to_off_t(end); + fl->fl_start = s64_to_loff_t(start); - if (len == 0 || fl->fl_end < 0) + if (len == 0 || end < 0) fl->fl_end = OFFSET_MAX; + else + fl->fl_end = s64_to_loff_t(end); return p; } @@ -156,18 +156,26 @@ static u32 * nlm4_encode_lock(u32 *p, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; + __s64 start, len; if (!(p = xdr_encode_string(p, lock->caller)) || !(p = nlm4_encode_fh(p, &lock->fh)) || !(p = nlm4_encode_oh(p, &lock->oh))) return NULL; - *p++ = htonl(fl->fl_pid); - p = xdr_encode_hyper(p, fl->fl_start); + if (fl->fl_start > NLM4_OFFSET_MAX + || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX)) + return NULL; + + start = loff_t_to_s64(fl->fl_start); if (fl->fl_end == OFFSET_MAX) - p = xdr_encode_hyper(p, 0); + len = 0; else - p = xdr_encode_hyper(p, fl->fl_end - fl->fl_start + 1); + len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1); + + *p++ = htonl(fl->fl_pid); + p = xdr_encode_hyper(p, start); + p = xdr_encode_hyper(p, len); return p; } @@ -178,6 +186,8 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock) static u32 * nlm4_encode_testres(u32 *p, struct nlm_res *resp) { + s64 start, len; + dprintk("xdr: before encode_testres (p %p resp %p)\n", p, resp); if (!(p = nlm4_encode_cookie(p, &resp->cookie))) return 0; @@ -193,12 +203,17 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp) if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) return 0; - p = xdr_encode_hyper(p, fl->fl_start); + start = loff_t_to_s64(fl->fl_start); if (fl->fl_end == OFFSET_MAX) - p = xdr_encode_hyper(p, 0); + len = 0; else - p = xdr_encode_hyper(p, fl->fl_end - fl->fl_start + 1); - dprintk("xdr: encode_testres (status %d pid %d type %d start %ld end %ld)\n", resp->status, fl->fl_pid, fl->fl_type, fl->fl_start, fl->fl_end); + len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1); + + p = xdr_encode_hyper(p, start); + p = xdr_encode_hyper(p, len); + dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n", + resp->status, fl->fl_pid, fl->fl_type, + (long long)fl->fl_start, (long long)fl->fl_end); } dprintk("xdr: after encode_testres (p %p resp %p)\n", p, resp); @@ -434,10 +449,11 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp) p = xdr_decode_hyper(p, &len); end = start + len - 1; - fl->fl_start = size_to_off_t(start); - fl->fl_end = size_to_off_t(end); - if (len == 0 || fl->fl_end < 0) + fl->fl_start = s64_to_loff_t(start); + if (len == 0 || end < 0) fl->fl_end = OFFSET_MAX; + else + fl->fl_end = s64_to_loff_t(end); } return 0; } diff --git a/fs/locks.c b/fs/locks.c index 1661a4a5c..18ee63e92 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -111,8 +111,6 @@ #include <asm/uaccess.h> -#define OFFSET_MAX ((off_t)LONG_MAX) /* FIXME: move elsewhere? */ - static int flock_make_lock(struct file *filp, struct file_lock *fl, unsigned int cmd); static int posix_make_lock(struct file *filp, struct file_lock *fl, @@ -195,9 +193,9 @@ static void locks_insert_block(struct file_lock *blocker, if (waiter->fl_prevblock) { printk(KERN_ERR "locks_insert_block: remove duplicated lock " - "(pid=%d %ld-%ld type=%d)\n", - waiter->fl_pid, waiter->fl_start, - waiter->fl_end, waiter->fl_type); + "(pid=%d %Ld-%Ld type=%d)\n", + waiter->fl_pid, (long long)waiter->fl_start, + (long long)waiter->fl_end, waiter->fl_type); locks_delete_block(waiter->fl_prevblock, waiter); } @@ -338,10 +336,6 @@ int fcntl_getlk(unsigned int fd, struct flock *l) if (!filp) goto out; - error = -EINVAL; - if (!filp->f_dentry || !filp->f_dentry->d_inode) - goto out_putf; - if (!posix_make_lock(filp, &file_lock, &flock)) goto out_putf; @@ -385,7 +379,6 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) struct file *filp; struct file_lock file_lock; struct flock flock; - struct dentry * dentry; struct inode *inode; int error; @@ -405,10 +398,7 @@ int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l) goto out; error = -EINVAL; - if (!(dentry = filp->f_dentry)) - goto out_putf; - if (!(inode = dentry->d_inode)) - goto out_putf; + inode = filp->f_dentry->d_inode; /* Don't allow mandatory locks on files that may be memory mapped * and shared. @@ -616,7 +606,7 @@ repeat: /* Block for writes against a "read" lock, * and both reads and writes against a "write" lock. */ - if (posix_locks_conflict(fl, &tfl)) { + if (posix_locks_conflict(&tfl, fl)) { error = -EAGAIN; if (filp && (filp->f_flags & O_NONBLOCK)) break; @@ -650,7 +640,7 @@ repeat: static int posix_make_lock(struct file *filp, struct file_lock *fl, struct flock *l) { - off_t start; + loff_t start; memset(fl, 0, sizeof(*fl)); @@ -683,8 +673,11 @@ static int posix_make_lock(struct file *filp, struct file_lock *fl, if (((start += l->l_start) < 0) || (l->l_len < 0)) return (0); + fl->fl_end = start + l->l_len - 1; + if (l->l_len > 0 && fl->fl_end < 0) + return (0); fl->fl_start = start; /* we record the absolute position */ - if ((l->l_len == 0) || ((fl->fl_end = start + l->l_len - 1) < 0)) + if (l->l_len == 0) fl->fl_end = OFFSET_MAX; fl->fl_file = filp; @@ -703,8 +696,6 @@ static int flock_make_lock(struct file *filp, struct file_lock *fl, memset(fl, 0, sizeof(*fl)); init_waitqueue_head(&fl->fl_wait); - if (!filp->f_dentry) /* just in case */ - return (0); switch (cmd & ~LOCK_NB) { case LOCK_SH: @@ -1128,6 +1119,8 @@ static struct file_lock *locks_init_lock(struct file_lock *new, new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; new->fl_notify = fl->fl_notify; + new->fl_insert = fl->fl_insert; + new->fl_remove = fl->fl_remove; new->fl_u = fl->fl_u; } return new; @@ -1146,6 +1139,9 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) fl->fl_next = *pos; /* insert into file's list */ *pos = fl; + if (fl->fl_insert) + fl->fl_insert(fl); + return; } @@ -1173,6 +1169,9 @@ static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait) prevfl->fl_nextlink = nextfl; else file_lock_table = nextfl; + + if (thisfl->fl_remove) + thisfl->fl_remove(thisfl); locks_wake_up_blocks(thisfl, wait); locks_free_lock(thisfl); @@ -1201,10 +1200,10 @@ static char *lock_get_status(struct file_lock *fl, int id, char *pfx) p += sprintf(p, "FLOCK ADVISORY "); } p += sprintf(p, "%s ", (fl->fl_type == F_RDLCK) ? "READ " : "WRITE"); - p += sprintf(p, "%d %s:%ld %ld %ld ", + p += sprintf(p, "%d %s:%ld %Ld %Ld ", fl->fl_pid, - kdevname(inode->i_dev), inode->i_ino, fl->fl_start, - fl->fl_end); + kdevname(inode->i_dev), inode->i_ino, + (long long)fl->fl_start, (long long)fl->fl_end); sprintf(p, "%08lx %08lx %08lx %08lx %08lx\n", (long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink, (long)fl->fl_next, (long)fl->fl_nextblock); @@ -1212,7 +1211,7 @@ static char *lock_get_status(struct file_lock *fl, int id, char *pfx) } static inline int copy_lock_status(char *p, char **q, off_t pos, int len, - off_t offset, off_t length) + off_t offset, int length) { off_t i; @@ -1236,7 +1235,7 @@ static inline int copy_lock_status(char *p, char **q, off_t pos, int len, return (1); } -int get_locks_status(char *buffer, char **start, off_t offset, off_t length) +int get_locks_status(char *buffer, char **start, off_t offset, int length) { struct file_lock *fl; struct file_lock *bfl; diff --git a/fs/namei.c b/fs/namei.c index 8675e28c5..58f7a590a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -192,31 +192,6 @@ void put_write_access(struct inode * inode) } /* - * "." and ".." are special - ".." especially so because it has to be able - * to know about the current root directory and parent relationships - */ -static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name) -{ - struct dentry *result = NULL; - if (name->name[0] == '.') { - switch (name->len) { - default: - break; - case 2: - if (name->name[1] != '.') - break; - - if (parent != current->fs->root) - parent = parent->d_covers->d_parent; - /* fallthrough */ - case 1: - result = parent; - } - } - return dget(result); -} - -/* * Internal lookup() using the new generic dcache. */ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) @@ -279,39 +254,28 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, i return result; } -static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow) +static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct inode * inode = dentry->d_inode; - - if ((follow & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { - if (current->link_count < 32) { - struct dentry * result; - - current->link_count++; - /* This eats the base */ - result = inode->i_op->follow_link(dentry, base, follow); - current->link_count--; - dput(dentry); - return result; - } - dput(dentry); - dentry = ERR_PTR(-ELOOP); - } - dput(base); - return dentry; + int err; + if (current->link_count >= 32) + goto loop; + current->link_count++; + UPDATE_ATIME(dentry->d_inode); + err = dentry->d_inode->i_op->follow_link(dentry, nd); + current->link_count--; + return err; +loop: + dput(nd->dentry); + mntput(nd->mnt); + return -ELOOP; } -static inline struct dentry * follow_mount(struct dentry * dentry) +static inline int follow_down(struct dentry ** dentry, struct vfsmount **mnt) { - struct dentry * mnt = dentry->d_mounts; - - if (mnt != dentry) { - dget(mnt); - dput(dentry); - dentry = mnt; - } - return dentry; + struct dentry * parent = dget((*dentry)->d_mounts); + dput(*dentry); + *dentry = parent; + return 1; } /* @@ -319,36 +283,31 @@ static inline struct dentry * follow_mount(struct dentry * dentry) * * This is the basic name resolution function, turning a pathname * into the final dentry. + * + * We expect 'base' to be positive and a directory. */ -struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags) +int walk_name(const char * name, unsigned lookup_flags, struct nameidata *nd) { - struct dentry * dentry; + struct dentry *dentry; struct inode *inode; + int err; - if (*name == '/') { - if (base) - dput(base); - do { - name++; - } while (*name == '/'); - __prefix_lookup_dentry(name, lookup_flags); - base = dget(current->fs->root); - } else if (!base) { - base = dget(current->fs->pwd); - } - + while (*name=='/') + name++; if (!*name) goto return_base; - inode = base->d_inode; - lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK; + inode = nd->dentry->d_inode; + if (current->link_count) + lookup_flags = LOOKUP_FOLLOW; + + lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | + LOOKUP_SLASHOK | LOOKUP_POSITIVE | LOOKUP_PARENT; /* At this point we know we have a real path component. */ for(;;) { - int err; unsigned long hash; struct qstr this; - unsigned int flags; unsigned int c; err = permission(inode, MAY_EXEC); @@ -369,99 +328,240 @@ struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned this.hash = end_name_hash(hash); /* remove trailing slashes? */ - flags = lookup_flags; - if (c) { - char tmp; - - flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - do { - tmp = *++name; - } while (tmp == '/'); - if (tmp) - flags |= LOOKUP_CONTINUE; - } + if (!c) + goto last_component; + while (*++name == '/'); + if (!*name) + goto last_with_slashes; /* + * "." and ".." are special - ".." especially so because it has + * to be able to know about the current root directory and + * parent relationships. + */ + if (this.name[0] == '.') switch (this.len) { + default: + break; + case 2: + if (this.name[1] != '.') + break; + if (nd->dentry != current->fs->root) { + dentry = dget(nd->dentry->d_covers->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + inode = dentry->d_inode; + } + /* fallthrough */ + case 1: + continue; + } + /* * See if the low-level filesystem might want * to use its own hash.. */ - if (base->d_op && base->d_op->d_hash) { - int error; - error = base->d_op->d_hash(base, &this); - if (error < 0) { - dentry = ERR_PTR(error); + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { + err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (err < 0) break; - } } - /* This does the actual lookups.. */ - dentry = reserved_lookup(base, &this); + dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); if (!dentry) { - dentry = cached_lookup(base, &this, flags); - if (!dentry) { - dentry = real_lookup(base, &this, flags); - if (IS_ERR(dentry)) - break; - } - - /* Check mountpoints.. */ - dentry = follow_mount(dentry); + dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + break; } + /* Check mountpoints.. */ + while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt)) + ; - base = do_follow_link(base, dentry, flags); - if (IS_ERR(base)) - goto return_base; + err = -ENOENT; + inode = dentry->d_inode; + if (!inode) + break; + err = -ENOTDIR; + if (!inode->i_op) + break; - inode = base->d_inode; - if (flags & LOOKUP_DIRECTORY) { + if (inode->i_op->follow_link) { + err = do_follow_link(dentry, nd); + dput(dentry); + if (err) + goto return_err; + err = -ENOENT; + inode = nd->dentry->d_inode; if (!inode) - goto no_inode; - dentry = ERR_PTR(-ENOTDIR); + break; + err = -ENOTDIR; + if (!inode->i_op) + break; + } else { + dput(nd->dentry); + nd->dentry = dentry; + } + err = -ENOTDIR; + if (!inode->i_op->lookup) + break; + continue; + /* here ends the main loop */ + +last_with_slashes: + lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +last_component: + if (lookup_flags & LOOKUP_PARENT) + goto lookup_parent; + if (this.name[0] == '.') switch (this.len) { + default: + break; + case 2: + if (this.name[1] != '.') + break; + if (nd->dentry != current->fs->root) { + dentry = dget(nd->dentry->d_covers->d_parent); + dput(nd->dentry); + nd->dentry = dentry; + inode = dentry->d_inode; + } + /* fallthrough */ + case 1: + goto return_base; + } + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { + err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (err < 0) + break; + } + dentry = cached_lookup(nd->dentry, &this, 0); + if (!dentry) { + dentry = real_lookup(nd->dentry, &this, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + break; + } + while (d_mountpoint(dentry) && follow_down(&dentry, &nd->mnt)) + ; + inode = dentry->d_inode; + if ((lookup_flags & LOOKUP_FOLLOW) + && inode && inode->i_op && inode->i_op->follow_link) { + err = do_follow_link(dentry, nd); + dput(dentry); + if (err) + goto return_err; + inode = nd->dentry->d_inode; + } else { + dput(nd->dentry); + nd->dentry = dentry; + } + err = -ENOENT; + if (!inode) + goto no_inode; + if (lookup_flags & LOOKUP_DIRECTORY) { + err = -ENOTDIR; if (!inode->i_op || !inode->i_op->lookup) break; - if (flags & LOOKUP_CONTINUE) - continue; } -return_base: - return base; -/* - * The case of a nonexisting file is special. - * - * In the middle of a pathname lookup (ie when - * LOOKUP_CONTINUE is set), it's an obvious - * error and returns ENOENT. - * - * At the end of a pathname lookup it's legal, - * and we return a negative dentry. However, we - * get here only if there were trailing slashes, - * which is legal only if we know it's supposed - * to be a directory (ie "mkdir"). Thus the - * LOOKUP_SLASHOK flag. - */ + goto return_base; no_inode: - dentry = ERR_PTR(-ENOENT); - if (flags & LOOKUP_CONTINUE) + err = -ENOENT; + if (lookup_flags & LOOKUP_POSITIVE) break; - if (flags & LOOKUP_SLASHOK) - goto return_base; - break; + if (lookup_flags & LOOKUP_DIRECTORY) + if (!(lookup_flags & LOOKUP_SLASHOK)) + break; + goto return_base; +lookup_parent: + nd->last = this; +return_base: + return 0; } - dput(base); - return dentry; + dput(nd->dentry); + mntput(nd->mnt); +return_err: + return err; +} + +/* returns 1 if everything is done */ +static int __emul_lookup_dentry(const char *name, int lookup_flags, + struct nameidata *nd) +{ + char *emul = __emul_prefix(); + + if (!emul) + return 0; + + nd->mnt = mntget(current->fs->rootmnt); + nd->dentry = dget(current->fs->root); + if (walk_name(emul,LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE,nd)) + return 0; + if (walk_name(name, lookup_flags, nd)) + return 0; + + if (!nd->dentry->d_inode) { + struct nameidata nd_root; + nd_root.last.len = 0; + nd_root.mnt = mntget(current->fs->rootmnt); + nd_root.dentry = dget(current->fs->root); + if (walk_name(name, lookup_flags, &nd_root)) + return 1; + if (nd_root.dentry->d_inode) { + dput(nd->dentry); + mntput(nd->mnt); + nd->dentry = nd_root.dentry; + nd->mnt = nd_root.mnt; + nd->last = nd_root.last; + return 1; + } + dput(nd_root.dentry); + mntput(nd_root.mnt); + } + return 1; +} + +static inline int +walk_init_root(const char *name, unsigned flags, struct nameidata *nd) +{ + if (current->personality != PER_LINUX) + if (__emul_lookup_dentry(name,flags,nd)); + return 0; + nd->mnt = mntget(current->fs->rootmnt); + nd->dentry = dget(current->fs->root); + return 1; +} + +int walk_init(const char *name,unsigned int flags,struct nameidata *nd) +{ + nd->last.len = 0; + if (*name=='/') + return walk_init_root(name,flags,nd); + nd->mnt = mntget(current->fs->pwdmnt); + nd->dentry = dget(current->fs->pwd); + return 1; +} + +struct dentry * lookup_dentry(const char * name, unsigned int lookup_flags) +{ + struct nameidata nd; + int err = 0; + + if (walk_init(name, lookup_flags, &nd)) + err = walk_name(name, lookup_flags, &nd); + if (!err) { + mntput(nd.mnt); + return nd.dentry; + } + return ERR_PTR(err); } /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. */ -struct dentry * lookup_one(const char * name, struct dentry * base) +static inline struct dentry * lookup_hash(struct qstr *name, struct dentry * base) { struct dentry * dentry; struct inode *inode; int err; - unsigned long hash; - struct qstr this; - unsigned int c; inode = base->d_inode; err = permission(inode, MAY_EXEC); @@ -469,36 +569,20 @@ struct dentry * lookup_one(const char * name, struct dentry * base) if (err) goto out; - this.name = name; - c = *(const unsigned char *)name; - if (!c) - goto access; - - hash = init_name_hash(); - do { - name++; - if (c == '/') - goto access; - hash = partial_name_hash(c, hash); - c = *(const unsigned char *)name; - } while (c); - this.len = name - (const char *) this.name; - this.hash = end_name_hash(hash); - /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_op && base->d_op->d_hash) { - err = base->d_op->d_hash(base, &this); + err = base->d_op->d_hash(base, name); dentry = ERR_PTR(err); if (err < 0) goto out; } - dentry = cached_lookup(base, &this, 0); + dentry = cached_lookup(base, name, 0); if (!dentry) { - struct dentry *new = d_alloc(base, &this); + struct dentry *new = d_alloc(base, name); dentry = ERR_PTR(-ENOMEM); if (!new) goto out; @@ -515,9 +599,33 @@ struct dentry * lookup_one(const char * name, struct dentry * base) out: dput(base); return dentry; +} + +struct dentry * lookup_one(const char * name, struct dentry * base) +{ + unsigned long hash; + struct qstr this; + unsigned int c; + + this.name = name; + c = *(const unsigned char *)name; + if (!c) + goto access; + + hash = init_name_hash(); + do { + name++; + if (c == '/') + goto access; + hash = partial_name_hash(c, hash); + c = *(const unsigned char *)name; + } while (c); + this.len = name - (const char *) this.name; + this.hash = end_name_hash(hash); + + return lookup_hash(&this, base); access: - dentry = ERR_PTR(-EACCES); - goto out; + return ERR_PTR(-EACCES); } /* @@ -538,14 +646,8 @@ struct dentry * __namei(const char *pathname, unsigned int lookup_flags) name = getname(pathname); dentry = (struct dentry *) name; if (!IS_ERR(name)) { - dentry = lookup_dentry(name, NULL, lookup_flags); + dentry = lookup_dentry(name,lookup_flags|LOOKUP_POSITIVE); putname(name); - if (!IS_ERR(dentry)) { - if (!dentry->d_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOENT); - } - } } return dentry; } @@ -600,7 +702,7 @@ static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - if (victim->d_mounts != victim->d_covers) + if (d_mountpoint(victim)) return -EBUSY; } else if (S_ISDIR(victim->d_inode->i_mode)) return -EISDIR; @@ -639,7 +741,7 @@ static inline int lookup_flags(unsigned int f) if (f & O_DIRECTORY) retval |= LOOKUP_DIRECTORY; - + return retval; } @@ -679,64 +781,92 @@ exit_lock: * which is a lot more logical, and also allows the "no perm" needed * for symlinks (where the permissions are checked later). */ -struct dentry * __open_namei(const char * pathname, int flag, int mode, struct dentry * dir) +int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { - int acc_mode, error; + int acc_mode, error = 0; struct inode *inode; struct dentry *dentry; - dentry = lookup_dentry(pathname, dir, lookup_flags(flag)); - if (IS_ERR(dentry)) - return dentry; - acc_mode = ACC_MODE(flag); - if (flag & O_CREAT) { + if (!(flag & O_CREAT)) { + if (walk_init(pathname, lookup_flags(flag), nd)) + error = walk_name(pathname, lookup_flags(flag), nd); + if (error) + return error; + + dentry = nd->dentry; + } else { struct dentry *dir; - if (dentry->d_inode) { - if (!(flag & O_EXCL)) - goto nocreate; - error = -EEXIST; + if (walk_init(pathname, LOOKUP_PARENT, nd)) + error = walk_name(pathname, LOOKUP_PARENT, nd); + if (error) + return error; + /* + * It's not obvious that open(".", O_CREAT, foo) should + * fail, but it's even less obvious that it should succeed. + * Since O_CREAT means an intention to create the thing and + * open(2) had never created directories, count it as caller's + * luserdom and let him sod off - -EISDIR it is. + */ + error = -EISDIR; + if (!nd->last.len || (nd->last.name[0] == '.' && + (nd->last.len == 1 || + (nd->last.name[1] == '.' && nd->last.len == 2)))) + goto exit; + /* same for foo/ */ + if (nd->last.name[nd->last.len]) goto exit; - } - dir = lock_parent(dentry); - if (!check_parent(dir, dentry)) { - /* - * Really nasty race happened. What's the - * right error code? We had a dentry, but - * before we could use it it was removed - * by somebody else. We could just re-try - * everything, I guess. - * - * ENOENT is definitely wrong. - */ - error = -ENOENT; - unlock_dir(dir); + dir = dget(nd->dentry); + down(&dir->d_inode->i_sem); + + dentry = lookup_hash(&nd->last, dget(nd->dentry)); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + up(&dir->d_inode->i_sem); + dput(dir); goto exit; } - /* - * Somebody might have created the file while we - * waited for the directory lock.. So we have to - * re-do the existence test. - */ if (dentry->d_inode) { - error = 0; + up(&dir->d_inode->i_sem); + dput(dir); + error = -EEXIST; if (flag & O_EXCL) - error = -EEXIST; + goto exit; + if (dentry->d_inode->i_op && + dentry->d_inode->i_op->follow_link) { + /* + * With O_EXCL it would be -EEXIST. + * If symlink is a dangling one it's -ENOENT. + * Otherwise we open the object it points to. + */ + error = do_follow_link(dentry, nd); + dput(dentry); + if (error) + return error; + dentry = nd->dentry; + } else { + dput(nd->dentry); + nd->dentry = dentry; + } + error = -EISDIR; + if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) + goto exit; } else { - error = vfs_create(dir->d_inode, dentry,mode); + error = vfs_create(dir->d_inode, dentry, mode); /* Don't check for write permission, don't truncate */ acc_mode = 0; flag &= ~O_TRUNC; + dput(nd->dentry); + nd->dentry = dentry; + unlock_dir(dir); + if (error) + goto exit; } - unlock_dir(dir); - if (error) - goto exit; } -nocreate: error = -ENOENT; inode = dentry->d_inode; if (!inode) @@ -804,11 +934,45 @@ nocreate: if (flag & FMODE_WRITE) DQUOT_INIT(inode); - return dentry; + return 0; exit: + dput(nd->dentry); + mntput(nd->mnt); + return error; +} + +static struct dentry *lookup_create(const char *name, int is_dir) +{ + struct nameidata nd; + struct dentry *dentry; + int err = 0; + if (walk_init(name, LOOKUP_PARENT, &nd)) + err = walk_name(name, LOOKUP_PARENT, &nd); + dentry = ERR_PTR(err); + if (err) + goto out; + down(&nd.dentry->d_inode->i_sem); + dentry = ERR_PTR(-EEXIST); + if (!nd.last.len || (nd.last.name[0] == '.' && + (nd.last.len == 1 || (nd.last.name[1] == '.' && nd.last.len == 2)))) + goto fail; + dentry = lookup_hash(&nd.last, dget(nd.dentry)); + if (IS_ERR(dentry)) + goto fail; + if (!is_dir && nd.last.name[nd.last.len] && !dentry->d_inode) + goto enoent; +out_dput: + dput(nd.dentry); + mntput(nd.mnt); +out: + return dentry; +enoent: dput(dentry); - return ERR_PTR(error); + dentry = ERR_PTR(-ENOENT); +fail: + up(&nd.dentry->d_inode->i_sem); + goto out_dput; } int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) @@ -842,18 +1006,14 @@ struct dentry * do_mknod(const char * filename, int mode, dev_t dev) struct dentry *dir; struct dentry *dentry, *retval; - dentry = lookup_dentry(filename, NULL, 0); + dentry = lookup_create(filename, 0); if (IS_ERR(dentry)) return dentry; - dir = lock_parent(dentry); - error = -ENOENT; - if (!check_parent(dir, dentry)) - goto exit_lock; + dir = dget(dentry->d_parent); error = vfs_mknod(dir->d_inode, dentry, mode, dev); -exit_lock: retval = ERR_PTR(error); if (!error) retval = dget(dentry); @@ -875,14 +1035,11 @@ asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) return PTR_ERR(tmp); lock_kernel(); - dentry = lookup_dentry(tmp, NULL, 0); + dentry = lookup_create(tmp, 0); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out; - dir = lock_parent(dentry); - error = -ENOENT; - if (!check_parent(dir, dentry)) - goto out_unlock; + dir = dget(dentry->d_parent); switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(dir->d_inode, dentry, mode); @@ -896,7 +1053,6 @@ asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev) default: error = -EINVAL; } -out_unlock: unlock_dir(dir); dput(dentry); out: @@ -928,50 +1084,28 @@ exit_lock: return error; } -static inline int do_mkdir(const char * pathname, int mode) -{ - int error; - struct dentry *dir; - struct dentry *dentry; - - dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto exit; - - /* - * EEXIST is kind of a strange error code to - * return, but basically if the dentry was moved - * or unlinked while we locked the parent, we - * do know that it _did_ exist before, and as - * such it makes perfect sense.. In contrast, - * ENOENT doesn't make sense for mkdir. - */ - dir = lock_parent(dentry); - error = -EEXIST; - if (!check_parent(dir, dentry)) - goto exit_lock; - - error = vfs_mkdir(dir->d_inode, dentry, mode); - -exit_lock: - unlock_dir(dir); - dput(dentry); -exit: - return error; -} - asmlinkage long sys_mkdir(const char * pathname, int mode) { int error; char * tmp; tmp = getname(pathname); - if(IS_ERR(tmp)) - return PTR_ERR(tmp); - lock_kernel(); - error = do_mkdir(tmp,mode); - unlock_kernel(); + error = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + struct dentry *dir; + struct dentry *dentry; + + lock_kernel(); + dentry = lookup_create(tmp, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + dir = dget(dentry->d_parent); + error = vfs_mkdir(dir->d_inode, dentry, mode); + unlock_dir(dir); + dput(dentry); + } + unlock_kernel(); + } putname(tmp); return error; @@ -1033,7 +1167,7 @@ static inline int do_rmdir(const char * name) struct dentry *dir; struct dentry *dentry; - dentry = lookup_dentry(name, NULL, 0); + dentry = lookup_dentry(name, LOOKUP_POSITIVE); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit; @@ -1082,13 +1216,13 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) return error; } -int do_unlink(const char * name, struct dentry * base) +static int do_unlink(const char * name) { int error; struct dentry *dir; struct dentry *dentry; - dentry = lookup_dentry(name, base, 0); + dentry = lookup_dentry(name, LOOKUP_POSITIVE); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit; @@ -1113,7 +1247,7 @@ asmlinkage long sys_unlink(const char * pathname) if(IS_ERR(tmp)) return PTR_ERR(tmp); lock_kernel(); - error = do_unlink(tmp, NULL); + error = do_unlink(tmp); unlock_kernel(); putname(tmp); @@ -1141,32 +1275,6 @@ exit_lock: return error; } -static inline int do_symlink(const char * oldname, const char * newname) -{ - int error; - struct dentry *dir; - struct dentry *dentry; - - dentry = lookup_dentry(newname, NULL, 0); - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto exit; - - dir = lock_parent(dentry); - error = -ENOENT; - if (!check_parent(dir, dentry)) - goto exit_lock; - - error = vfs_symlink(dir->d_inode, dentry, oldname); - -exit_lock: - unlock_dir(dir); - dput(dentry); -exit: - return error; -} - asmlinkage long sys_symlink(const char * oldname, const char * newname) { int error; @@ -1179,8 +1287,18 @@ asmlinkage long sys_symlink(const char * oldname, const char * newname) to = getname(newname); error = PTR_ERR(to); if (!IS_ERR(to)) { + struct dentry *dir; + struct dentry *dentry; + lock_kernel(); - error = do_symlink(from,to); + dentry = lookup_create(to, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + dir = dget(dentry->d_parent); + error = vfs_symlink(dir->d_inode, dentry, from); + unlock_dir(dir); + dput(dentry); + } unlock_kernel(); putname(to); } @@ -1224,51 +1342,20 @@ exit_lock: return error; } -static inline int do_link(const char * oldname, const char * newname) -{ - struct dentry *old_dentry, *new_dentry, *dir; - int error; - - /* - * Hardlinks are often used in delicate situations. We avoid - * security-related surprises by not following symlinks on the - * newname. --KAB - * - * We don't follow them on the oldname either to be compatible - * with linux 2.0, and to avoid hard-linking to directories - * and other special files. --ADM - */ - old_dentry = lookup_dentry(oldname, NULL, 0); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit; - - new_dentry = lookup_dentry(newname, NULL, 0); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit_old; - - dir = lock_parent(new_dentry); - error = -ENOENT; - if (!check_parent(dir, new_dentry)) - goto exit_lock; - - error = vfs_link(old_dentry, dir->d_inode, new_dentry); - -exit_lock: - unlock_dir(dir); - dput(new_dentry); -exit_old: - dput(old_dentry); -exit: - return error; -} - +/* + * Hardlinks are often used in delicate situations. We avoid + * security-related surprises by not following symlinks on the + * newname. --KAB + * + * We don't follow them on the oldname either to be compatible + * with linux 2.0, and to avoid hard-linking to directories + * and other special files. --ADM + */ asmlinkage long sys_link(const char * oldname, const char * newname) { int error; char * from; - char * to; + char * to; from = getname(oldname); if(IS_ERR(from)) @@ -1276,8 +1363,24 @@ asmlinkage long sys_link(const char * oldname, const char * newname) to = getname(newname); error = PTR_ERR(to); if (!IS_ERR(to)) { + struct dentry *old_dentry, *new_dentry, *dir; + lock_kernel(); - error = do_link(from,to); + old_dentry = lookup_dentry(from, LOOKUP_POSITIVE); + error = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + goto exit; + + new_dentry = lookup_create(to, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { + dir = dget(new_dentry->d_parent); + error = vfs_link(old_dentry, dir->d_inode, new_dentry); + unlock_dir(dir); + dput(new_dentry); + } + dput(old_dentry); +exit: unlock_kernel(); putname(to); } @@ -1435,21 +1538,17 @@ static inline int do_rename(const char * oldname, const char * newname) struct dentry * old_dir, * new_dir; struct dentry * old_dentry, *new_dentry; - old_dentry = lookup_dentry(oldname, NULL, 0); + old_dentry = lookup_dentry(oldname, LOOKUP_POSITIVE); error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit; - error = -ENOENT; - if (!old_dentry->d_inode) - goto exit_old; - { unsigned int flags = 0; if (S_ISDIR(old_dentry->d_inode->i_mode)) flags = LOOKUP_SLASHOK; - new_dentry = lookup_dentry(newname, NULL, flags); + new_dentry = lookup_dentry(newname, flags); } error = PTR_ERR(new_dentry); @@ -1512,29 +1611,30 @@ out: return len; } -static inline struct dentry * -__vfs_follow_link(struct dentry *dentry, struct dentry *base, - unsigned follow, const char *link) +static inline int +__vfs_follow_link(struct nameidata *nd, const char *link) { - struct dentry *result; - UPDATE_ATIME(dentry->d_inode); - if (IS_ERR(link)) goto fail; - result = lookup_dentry(link, base, follow); - return result; + if (*link == '/') { + dput(nd->dentry); + mntput(nd->mnt); + if (!walk_init_root(link, LOOKUP_FOLLOW, nd)) + /* weird __emul_prefix() stuff did it */ + return 0; + } + return walk_name(link, LOOKUP_FOLLOW, nd); fail: - dput(base); - return (struct dentry *)link; + dput(nd->dentry); + mntput(nd->mnt); + return PTR_ERR(link); } -struct dentry * -vfs_follow_link(struct dentry *dentry, struct dentry *base, -unsigned int follow, const char *link) +int vfs_follow_link(struct nameidata *nd, const char *link) { - return __vfs_follow_link(dentry,base,follow,link); + return __vfs_follow_link(nd, link); } /* get the link contents into pagecache */ @@ -1572,12 +1672,11 @@ int page_readlink(struct dentry *dentry, char *buffer, int buflen) return res; } -struct dentry * -page_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow) +int page_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; char *s = page_getlink(dentry, &page); - struct dentry *res = __vfs_follow_link(dentry,base,follow,s); + int res = __vfs_follow_link(nd, s); if (page) { kunmap(page); page_cache_release(page); diff --git a/fs/ncpfs/Config.in b/fs/ncpfs/Config.in index a9909cb49..104f5a3c7 100644 --- a/fs/ncpfs/Config.in +++ b/fs/ncpfs/Config.in @@ -1,13 +1,13 @@ # # NCP Filesystem configuration # -bool ' Packet signatures' CONFIG_NCPFS_PACKET_SIGNING -bool ' Proprietary file locking' CONFIG_NCPFS_IOCTL_LOCKING -bool ' Clear remove/delete inhibit when needed' CONFIG_NCPFS_STRONG -bool ' Use NFS namespace if available' CONFIG_NCPFS_NFS_NS -bool ' Use LONG (OS/2) namespace if available' CONFIG_NCPFS_OS2_NS -bool ' Lowercase DOS filenames' CONFIG_NCPFS_SMALLDOS -bool ' Allow mounting of volume subdirectories' CONFIG_NCPFS_MOUNT_SUBDIR -bool ' NDS authentication support' CONFIG_NCPFS_NDS_DOMAINS -bool ' Use Native Language Support' CONFIG_NCPFS_NLS -bool ' Enable symbolic links and execute flags' CONFIG_NCPFS_EXTRAS +dep_mbool ' Packet signatures' CONFIG_NCPFS_PACKET_SIGNING $CONFIG_NCP_FS +dep_mbool ' Proprietary file locking' CONFIG_NCPFS_IOCTL_LOCKING $CONFIG_NCP_FS +dep_mbool ' Clear remove/delete inhibit when needed' CONFIG_NCPFS_STRONG $CONFIG_NCP_FS +dep_mbool ' Use NFS namespace if available' CONFIG_NCPFS_NFS_NS $CONFIG_NCP_FS +dep_mbool ' Use LONG (OS/2) namespace if available' CONFIG_NCPFS_OS2_NS $CONFIG_NCP_FS +dep_mbool ' Lowercase DOS filenames' CONFIG_NCPFS_SMALLDOS $CONFIG_NCP_FS +dep_mbool ' Allow mounting of volume subdirectories' CONFIG_NCPFS_MOUNT_SUBDIR $CONFIG_NCP_FS +dep_mbool ' NDS authentication support' CONFIG_NCPFS_NDS_DOMAINS $CONFIG_NCP_FS +dep_mbool ' Use Native Language Support' CONFIG_NCPFS_NLS $CONFIG_NCP_FS +dep_mbool ' Enable symbolic links and execute flags' CONFIG_NCPFS_EXTRAS $CONFIG_NCP_FS diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 3c8aac510..64ef9274b 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -14,6 +14,9 @@ O_OBJS := inode.o file.o read.o write.o dir.o symlink.o proc.o \ ifdef CONFIG_ROOT_NFS O_OBJS += nfsroot.o mount_clnt.o endif +ifdef CONFIG_NFS_V3 + O_OBJS += nfs3proc.o nfs3xdr.o +endif M_OBJS := $(O_TARGET) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3ca240129..7d80e6468 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -17,7 +17,6 @@ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM */ -#define NFS_NEED_XDR_TYPES #include <linux/sched.h> #include <linux/errno.h> #include <linux/stat.h> @@ -28,7 +27,7 @@ #include <linux/mm.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> -#include <linux/nfs.h> +#include <linux/nfs_mount.h> #include <linux/pagemap.h> #include <asm/segment.h> /* for fs functions */ @@ -71,202 +70,131 @@ struct inode_operations nfs_dir_inode_operations = { setattr: nfs_notify_change, }; -/* Each readdir response is composed of entries which look - * like the following, as per the NFSv2 RFC: - * - * __u32 not_end zero if end of response - * __u32 file ID opaque ino_t - * __u32 namelen size of name string - * VAR name string the string, padded to modulo 4 bytes - * __u32 cookie opaque ID of next entry - * - * When you hit not_end being zero, the next __u32 is non-zero if - * this is the end of the complete set of readdir entires for this - * directory. This can be used, for example, to initiate pre-fetch. - * - * In order to know what to ask the server for, we only need to know - * the final cookie of the previous page, and offset zero has cookie - * zero, so we cache cookie to page offset translations in chunks. - */ -#define COOKIES_PER_CHUNK (8 - ((sizeof(void *) / sizeof(__u32)))) -struct nfs_cookie_table { - struct nfs_cookie_table *next; - __u32 cookies[COOKIES_PER_CHUNK]; -}; -static kmem_cache_t *nfs_cookie_cachep; +typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int); -/* This whole scheme relies on the fact that dirent cookies - * are monotonically increasing. - * - * Another invariant is that once we have a valid non-zero - * EOF marker cached, we also have the complete set of cookie - * table entries. +/* + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the next entry. * - * We return the page offset assosciated with the page where - * cookie must be if it exists at all, however if we can not - * figure that out conclusively, we return < 0. + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. */ -static long __nfs_readdir_offset(struct inode *inode, __u32 cookie) +static inline +long find_dirent(struct page *page, loff_t offset, + struct nfs_entry *entry, + decode_dirent_t decode, int plus, int use_cookie) { - struct nfs_cookie_table *p; - unsigned long ret = 0; - - for(p = NFS_COOKIES(inode); p != NULL; p = p->next) { - int i; - - for (i = 0; i < COOKIES_PER_CHUNK; i++) { - __u32 this_cookie = p->cookies[i]; - - /* End of known cookies, EOF is our only hope. */ - if (!this_cookie) - goto check_eof; - - /* Next cookie is larger, must be in previous page. */ - if (this_cookie > cookie) - return ret; - - ret += 1; - - /* Exact cookie match, it must be in this page :-) */ - if (this_cookie == cookie) - return ret; + u8 *p = (u8 *)kmap(page), + *start = p; + unsigned long base = page_offset(page), + pg_offset = 0; + int loop_count = 0; + + if (!p) + return -EIO; + for(;;) { + p = (u8*)decode((__u32*)p, entry, plus); + if (IS_ERR(p)) + break; + pg_offset = p - start; + entry->prev = entry->offset; + entry->offset = base + pg_offset; + if ((use_cookie ? entry->cookie : entry->offset) > offset) + break; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); } } -check_eof: - if (NFS_DIREOF(inode) != 0) - return ret; - return -1L; -} - -static __inline__ long nfs_readdir_offset(struct inode *inode, __u32 cookie) -{ - /* Cookie zero is always at page offset zero. Optimize the - * other common case since most directories fit entirely - * in one page. - */ - if (!cookie || (!NFS_COOKIES(inode) && NFS_DIREOF(inode))) - return 0; - return __nfs_readdir_offset(inode, cookie); + kunmap(page); + return (IS_ERR(p)) ? PTR_ERR(p) : (long)pg_offset; } -/* Since a cookie of zero is declared special by the NFS - * protocol, we easily can tell if a cookie in an existing - * table chunk is valid or not. +/* + * Find the given page, and call find_dirent() in order to try to + * return the next entry. * - * NOTE: The cookies are indexed off-by-one because zero - * need not an entry. + * Returns -EIO if the page is not available, or up to date. */ -static __inline__ __u32 *find_cookie(struct inode *inode, unsigned long off) +static inline +long find_dirent_page(struct inode *inode, loff_t offset, + struct nfs_entry *entry) { - static __u32 cookie_zero = 0; - struct nfs_cookie_table *p; - __u32 *ret; - - if (!off) - return &cookie_zero; - off -= 1; - p = NFS_COOKIES(inode); - while(off >= COOKIES_PER_CHUNK && p) { - off -= COOKIES_PER_CHUNK; - p = p->next; - } - ret = NULL; - if (p) { - ret = &p->cookies[off]; - if (!*ret) - ret = NULL; - } - return ret; -} + decode_dirent_t decode = NFS_PROTO(inode)->decode_dirent; + struct page *page; + unsigned long index = entry->offset >> PAGE_CACHE_SHIFT; + long status = -EIO; + int plus = NFS_USE_READDIRPLUS(inode), + use_cookie = NFS_MONOTONE_COOKIES(inode); -#define NFS_NAMELEN_ALIGN(__len) ((((__len)+3)>>2)<<2) -static int create_cookie(__u32 cookie, unsigned long off, struct inode *inode) -{ - struct nfs_cookie_table **cpp; + dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", entry->offset & PAGE_CACHE_MASK); - cpp = (struct nfs_cookie_table **) &NFS_COOKIES(inode); - while (off >= COOKIES_PER_CHUNK && *cpp) { - off -= COOKIES_PER_CHUNK; - cpp = &(*cpp)->next; - } - if (*cpp) { - (*cpp)->cookies[off] = cookie; - } else { - struct nfs_cookie_table *new; - int i; - - new = kmem_cache_alloc(nfs_cookie_cachep, SLAB_ATOMIC); - if(!new) - return -1; - *cpp = new; - new->next = NULL; - for(i = 0; i < COOKIES_PER_CHUNK; i++) { - if (i == off) { - new->cookies[i] = cookie; - } else { - new->cookies[i] = 0; - } - } - } - return 0; + if (entry->page) + page_cache_release(entry->page); + + page = find_get_page(&inode->i_data, index); + + if (page && Page_Uptodate(page)) + status = find_dirent(page, offset, entry, decode, plus, use_cookie); + + /* NB: on successful return we will be holding the page */ + if (status < 0) { + entry->page = NULL; + if (page) + page_cache_release(page); + } else + entry->page = page; + + dfprintk(VFS, "NFS: find_dirent_page() returns %ld\n", status); + return status; } -static struct page *try_to_get_dirent_page(struct file *, __u32, int); -/* Recover from a revalidation flush. The case here is that - * the inode for the directory got invalidated somehow, and - * all of our cached information is lost. In order to get - * a correct cookie for the current readdir request from the - * user, we must (re-)fetch older readdir page cache entries. +/* + * Recurse through the page cache pages, and return a + * filled nfs_entry structure of the next directory entry if possible. * - * Returns < 0 if some error occurrs, else it is the page offset - * to fetch. + * The target for the search is position 'offset'. + * The latter may either be an offset into the page cache, or (better) + * a cookie depending on whether we're interested in strictly following + * the RFC wrt. not assuming monotonicity of cookies or not. + * + * For most systems, the latter is more reliable since it naturally + * copes with holes in the directory. */ -static long refetch_to_readdir_cookie(struct file *file, struct inode *inode) +static inline +long search_cached_dirent_pages(struct inode *inode, loff_t offset, + struct nfs_entry *entry) { - struct page *page; - u32 goal_cookie = file->f_pos; - long cur_off, ret = -1L; + long res = 0; + int loop_count = 0; -again: - cur_off = 0; + dfprintk(VFS, "NFS: search_cached_dirent_pages() searching for cookie %Ld\n", (long long)offset); for (;;) { - page = find_get_page(&inode->i_data, cur_off); - if (page) { - if (!Page_Uptodate(page)) - goto out_error; - } else { - __u32 *cp = find_cookie(inode, cur_off); - - if (!cp) - goto out_error; - - page = try_to_get_dirent_page(file, *cp, 0); - if (!page) { - if (!cur_off) - goto out_error; - - /* Someone touched the dir on us. */ - goto again; - } + res = find_dirent_page(inode, offset, entry); + if (res == -EAGAIN) { + /* Align to beginning of next page */ + entry->offset &= PAGE_CACHE_MASK; + entry->offset += PAGE_CACHE_SIZE; + } + if (res != -EAGAIN) + break; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); } - page_cache_release(page); - - if ((ret = nfs_readdir_offset(inode, goal_cookie)) >= 0) - goto out; - - cur_off += 1; } -out: - return ret; - -out_error: - if (page) - page_cache_release(page); - goto out; + if (res < 0 && entry->page) { + page_cache_release(entry->page); + entry->page = NULL; + } + dfprintk(VFS, "NFS: search_cached_dirent_pages() returned %ld\n", res); + return res; } + /* Now we cache directories properly, by stuffing the dirent * data directly in the page cache. * @@ -279,198 +207,240 @@ out_error: * page-in of the RPC reply, nowhere else, this simplies * things substantially. */ - -static int nfs_dir_filler(struct dentry *dentry, struct page *page) +static inline +long try_to_get_dirent_page(struct file *file, struct inode *inode, + struct nfs_entry *entry) { - struct nfs_readdirargs rd_args; - struct nfs_readdirres rd_res; - struct inode *inode = dentry->d_inode; - long offset = page->index; - __u32 *cookiep; - int err; + struct dentry *dir = file->f_dentry; + struct page *page; + __u32 *p; + unsigned long index = entry->offset >> PAGE_CACHE_SHIFT; + long res = 0; + unsigned int dtsize = NFS_SERVER(inode)->dtsize; + int plus = NFS_USE_READDIRPLUS(inode); - kmap(page); + dfprintk(VFS, "NFS: try_to_get_dirent_page() reading directory page @ index %ld\n", index); - err = -EIO; - cookiep = find_cookie(inode, offset); - if (!cookiep) - goto fail; + page = grab_cache_page(&inode->i_data, index); - rd_args.fh = NFS_FH(dentry); - rd_res.buffer = (char *)page_address(page); - rd_res.bufsiz = PAGE_CACHE_SIZE; - rd_res.cookie = *cookiep; - do { - rd_args.buffer = rd_res.buffer; - rd_args.bufsiz = rd_res.bufsiz; - rd_args.cookie = rd_res.cookie; - err = rpc_call(NFS_CLIENT(inode), - NFSPROC_READDIR, &rd_args, &rd_res, 0); - if (err < 0) - goto fail; - } while(rd_res.bufsiz > 0); - - err = -EIO; - if (rd_res.bufsiz < 0) - NFS_DIREOF(inode) = rd_res.cookie; - else if (create_cookie(rd_res.cookie, offset, inode)) - goto fail; + if (!page) { + res = -ENOMEM; + goto out; + } - SetPageUptodate(page); - kunmap(page); - UnlockPage(page); - return 0; -fail: - SetPageError(page); - kunmap(page); - UnlockPage(page); - return err; -} + if (Page_Uptodate(page)) { + dfprintk(VFS, "NFS: try_to_get_dirent_page(): page already up to date.\n"); + goto unlock_out; + } -static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok) -{ - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - struct page *page; - long offset; + p = (__u32 *)kmap(page); - if ((offset = nfs_readdir_offset(inode, cookie)) < 0) { - if (!refetch_ok || - (offset = refetch_to_readdir_cookie(file, inode)) < 0) { - goto fail; - } - } + if (dtsize > PAGE_CACHE_SIZE) + dtsize = PAGE_CACHE_SIZE; + res = NFS_PROTO(inode)->readdir(dir, entry->cookie, p, dtsize, plus); + + kunmap(page); - page = read_cache_page(&inode->i_data, offset, - (filler_t *)nfs_dir_filler, dentry); - if (IS_ERR(page)) - goto fail; - if (!Page_Uptodate(page)) - goto fail2; - return page; + if (res < 0) + goto error; + if (PageError(page)) + ClearPageError(page); + SetPageUptodate(page); -fail2: + unlock_out: + UnlockPage(page); page_cache_release(page); -fail: - return NULL; + out: + dfprintk(VFS, "NFS: try_to_get_dirent_page() returns %ld\n", res); + return res; + error: + SetPageError(page); + goto unlock_out; } -/* Seek up to dirent assosciated with the passed in cookie, - * then fill in dirents found. Return the last cookie - * actually given to the user, to update the file position. +/* Recover from a revalidation flush. The case here is that + * the inode for the directory got invalidated somehow, and + * all of our cached information is lost. In order to get + * a correct cookie for the current readdir request from the + * user, we must (re-)fetch all the older readdir page cache + * entries. + * + * Returns < 0 if some error occurs. */ -static __inline__ u32 nfs_do_filldir(__u32 *p, u32 cookie, - void *dirent, filldir_t filldir) +static inline +long refetch_to_readdir(struct file *file, struct inode *inode, + loff_t off, struct nfs_entry *entry) { - u32 end; - - while((end = *p++) != 0) { - __u32 fileid, len, skip, this_cookie; - char *name; + struct nfs_entry my_dirent, + *dirent = &my_dirent; + long res; + int plus = NFS_USE_READDIRPLUS(inode), + use_cookie = NFS_MONOTONE_COOKIES(inode), + loop_count = 0; + + dfprintk(VFS, "NFS: refetch_to_readdir() searching for cookie %Ld\n", (long long)off); + *dirent = *entry; + entry->page = NULL; + + for (res = 0;res >= 0;) { + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } - fileid = *p++; - len = *p++; - name = (char *) p; - skip = NFS_NAMELEN_ALIGN(len); - p += (skip >> 2); - this_cookie = *p++; + /* Search for last cookie in page cache */ + res = search_cached_dirent_pages(inode, off, dirent); - if (this_cookie < cookie) + if (res >= 0) { + /* Cookie was found */ + if ((use_cookie?dirent->cookie:dirent->offset) > off) { + *entry = *dirent; + dirent->page = NULL; + break; + } continue; + } + + if (dirent->page) + page_cache_release(dirent->page); + dirent->page = NULL; - cookie = this_cookie; - if (filldir(dirent, name, len, cookie, fileid) < 0) + if (res != -EIO) { + *entry = *dirent; break; + } + + /* Read in a new page */ + res = try_to_get_dirent_page(file, inode, dirent); + if (res == -EBADCOOKIE) { + memset(dirent, 0, sizeof(*dirent)); + nfs_zap_caches(inode); + res = 0; + } + /* We requested READDIRPLUS, but the server doesn't grok it */ + if (plus && res == -ENOTSUPP) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + memset(dirent, 0, sizeof(*dirent)); + nfs_zap_caches(inode); + plus = 0; + res = 0; + } } + if (dirent->page) + page_cache_release(dirent->page); - return cookie; + dfprintk(VFS, "NFS: refetch_to_readdir() returns %ld\n", res); + return res; } -/* The file offset position is represented in pure bytes, to - * make the page cache interface straight forward. - * - * However, some way is needed to make the connection between the - * opaque NFS directory entry cookies and our offsets, so a per-inode - * cookie cache table is used. +/* + * Once we've found the start of the dirent within a page: fill 'er up... */ -static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static +int nfs_do_filldir(struct file *file, struct inode *inode, + struct nfs_entry *entry, void *dirent, filldir_t filldir) { - struct dentry *dentry = filp->f_dentry; - struct inode *inode = dentry->d_inode; - struct page *page; - long offset; - int res; - - res = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry); - if (res < 0) - return res; - - if (NFS_DIREOF(inode) && filp->f_pos >= NFS_DIREOF(inode)) - return 0; - - if ((offset = nfs_readdir_offset(inode, filp->f_pos)) < 0) - goto no_dirent_page; - - page = find_get_page(&inode->i_data, offset); - if (!page) - goto no_dirent_page; - if (!Page_Uptodate(page)) - goto dirent_read_error; -success: - kmap(page); - filp->f_pos = nfs_do_filldir((__u32 *) page_address(page), - filp->f_pos, dirent, filldir); + decode_dirent_t decode = NFS_PROTO(inode)->decode_dirent; + struct page *page = entry->page; + __u8 *p, + *start; + unsigned long base = page_offset(page), + offset = entry->offset, + pg_offset, + fileid; + int plus = NFS_USE_READDIRPLUS(inode), + use_cookie = NFS_MONOTONE_COOKIES(inode), + loop_count = 0, + res = 0; + + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ offset %ld\n", entry->offset); + pg_offset = offset & ~PAGE_CACHE_MASK; + start = (u8*)kmap(page); + p = start + pg_offset; + + for(;;) { + /* Note: entry->prev contains the offset of the start of the + * current dirent */ + fileid = nfs_fileid_to_ino_t(entry->ino); + if (use_cookie) + res = filldir(dirent, entry->name, entry->len, entry->prev_cookie, fileid); + else + res = filldir(dirent, entry->name, entry->len, entry->prev, fileid); + if (res < 0) + break; + file->f_pos = (use_cookie) ? entry->cookie : entry->offset; + p = (u8*)decode((__u32*)p, entry, plus); + if (!p || IS_ERR(p)) + break; + pg_offset = p - start; + entry->prev = entry->offset; + entry->offset = base + pg_offset; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } kunmap(page); - page_cache_release(page); - return 0; - -no_dirent_page: - page = try_to_get_dirent_page(filp, filp->f_pos, 1); - if (!page) - goto no_page; - if (Page_Uptodate(page)) - goto success; -dirent_read_error: - page_cache_release(page); -no_page: - return -EIO; + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ offset %ld; returning = %d\n", entry->offset, res); + return res; } -/* Flush directory cookie and EOF caches for an inode. - * So we don't thrash allocating/freeing cookie tables, - * we keep the cookies around until the inode is - * deleted/reused. +/* The file offset position is now represented as a true offset into the + * page cache as is the case in most of the other filesystems. */ -__inline__ void nfs_flush_dircache(struct inode *inode) +static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct nfs_cookie_table *p = NFS_COOKIES(inode); + struct dentry *dentry = filp->f_dentry; + struct inode *inode = dentry->d_inode; + struct page *page; + struct nfs_entry my_entry, + *entry = &my_entry; + loff_t offset; + long res; + + res = nfs_revalidate(dentry); + if (res < 0) + return res; - while (p != NULL) { - int i; + /* + * filp->f_pos points to the file offset in the page cache. + * but if the cache has meanwhile been zapped, we need to + * read from the last dirent to revalidate f_pos + * itself. + */ + memset(entry, 0, sizeof(*entry)); - for(i = 0; i < COOKIES_PER_CHUNK; i++) - p->cookies[i] = 0; + offset = filp->f_pos; - p = p->next; - } - NFS_DIREOF(inode) = 0; -} + while(!entry->eof) { + res = search_cached_dirent_pages(inode, offset, entry); -/* Free up directory cache state, this happens when - * nfs_delete_inode is called on an NFS directory. - */ -void nfs_free_dircache(struct inode *inode) -{ - struct nfs_cookie_table *p = NFS_COOKIES(inode); + if (res < 0) { + if (entry->eof) + break; + res = refetch_to_readdir(filp, inode, offset, entry); + if (res < 0) + break; + } - while (p != NULL) { - struct nfs_cookie_table *next = p->next; - kmem_cache_free(nfs_cookie_cachep, p); - p = next; + page = entry->page; + if (!page) + printk(KERN_ERR "NFS: Missing page...\n"); + res = nfs_do_filldir(filp, inode, entry, dirent, filldir); + page_cache_release(page); + entry->page = NULL; + if (res < 0) { + res = 0; + break; + } + offset = filp->f_pos; } - NFS_COOKIES(inode) = NULL; - NFS_DIREOF(inode) = 0; + if (entry->page) + page_cache_release(entry->page); + if (res < 0 && res != -EBADCOOKIE) + return res; + return 0; } /* @@ -540,7 +510,8 @@ static inline int nfs_neg_need_reval(struct dentry *dentry) */ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) { - struct dentry * parent = dentry->d_parent; + struct dentry *dir = dentry->d_parent; + struct inode *dir_i = dir->d_inode; struct inode * inode = dentry->d_inode; int error; struct nfs_fh fhandle; @@ -559,7 +530,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) if (is_bad_inode(inode)) { dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", - parent->d_name.name, dentry->d_name.name); + dir->d_name.name, dentry->d_name.name); goto out_bad; } @@ -574,13 +545,14 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) /* * Do a new lookup and check the dentry attributes. */ - error = nfs_proc_lookup(NFS_DSERVER(parent), NFS_FH(parent), - dentry->d_name.name, &fhandle, &fattr); + error = NFS_PROTO(dir_i)->lookup(dir, &dentry->d_name, &fhandle, + &fattr); if (error) goto out_bad; /* Inode number matches? */ - if (NFS_FSID(inode) != fattr.fsid || + if (!(fattr.valid & NFS_ATTR_FATTR) || + NFS_FSID(inode) != fattr.fsid || NFS_FILEID(inode) != fattr.fileid) goto out_bad; @@ -603,10 +575,9 @@ out_bad: goto out_valid; d_drop(dentry); /* Purge readdir caches. */ - if (dentry->d_parent->d_inode) { - nfs_zap_caches(dentry->d_parent->d_inode); - NFS_CACHEINV(dentry->d_parent->d_inode); - } + nfs_zap_caches(dir_i); + if (inode && S_ISDIR(inode->i_mode)) + nfs_zap_caches(inode); return 0; } @@ -687,18 +658,19 @@ static void show_dentry(struct list_head * dlist) #endif /* NFS_PARANOIA */ #endif /* 0 */ -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry) +static struct dentry *nfs_lookup(struct inode *dir_i, struct dentry * dentry) { + struct dentry *dir = dentry->d_parent; struct inode *inode; int error; struct nfs_fh fhandle; struct nfs_fattr fattr; dfprintk(VFS, "NFS: lookup(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dir->d_name.name, dentry->d_name.name); error = -ENAMETOOLONG; - if (dentry->d_name.len > NFS_MAXNAMLEN) + if (dentry->d_name.len > NFS_SERVER(dir_i)->namelen) goto out; error = -ENOMEM; @@ -709,8 +681,8 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry) } dentry->d_op = &nfs_dentry_operations; - error = nfs_proc_lookup(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, &fhandle, &fattr); + error = NFS_PROTO(dir_i)->lookup(dir, &dentry->d_name, &fhandle, + &fattr); inode = NULL; if (error == -ENOENT) goto no_entry; @@ -743,6 +715,7 @@ static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, nfs_renew_times(dentry); error = 0; } + NFS_CACHEINV(dentry->d_parent->d_inode); return error; } @@ -752,29 +725,32 @@ static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int nfs_create(struct inode *dir_i, struct dentry *dentry, int mode) { - int error; + struct dentry *dir = dentry->d_parent; struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; + int error; dfprintk(VFS, "NFS: create(%x/%ld, %s\n", - dir->i_dev, dir->i_ino, dentry->d_name.name); + dir_i->i_dev, dir_i->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; /* - * Invalidate the dir cache before the operation to avoid a race. + * The 0 argument passed into the create function should one day + * contain the O_EXCL flag if requested. This allows NFSv3 to + * select the appropriate create strategy. Currently open_namei + * does not pass the create flags. */ - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, &attr, &fhandle, &fattr); - if (!error) + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->create(dir, &dentry->d_name, + &attr, 0, &fhandle, &fattr); + if (!error && fhandle.size != 0) error = nfs_instantiate(dentry, &fhandle, &fattr); - if (error) + if (error || fhandle.size == 0) d_drop(dentry); return error; } @@ -782,31 +758,26 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) /* * See comments for nfs_proc_create regarding failed operations. */ -static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev) +static int nfs_mknod(struct inode *dir_i, struct dentry *dentry, int mode, int rdev) { - int error; + struct dentry *dir = dentry->d_parent; struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; + int error; dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n", - dir->i_dev, dir->i_ino, dentry->d_name.name); + dir_i->i_dev, dir_i->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - /* FIXME: move this to a special nfs_proc_mknod() */ - if (S_ISCHR(mode) || S_ISBLK(mode)) { - attr.ia_size = rdev; /* get out your barf bag */ - attr.ia_valid |= ATTR_SIZE; - } - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, &attr, &fhandle, &fattr); - if (!error) + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->mknod(dir, &dentry->d_name, &attr, rdev, + &fhandle, &fattr); + if (!error && fhandle.size != 0) error = nfs_instantiate(dentry, &fhandle, &fattr); - if (error) + if (error || fhandle.size == 0) d_drop(dentry); return error; } @@ -814,19 +785,21 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde /* * See comments for nfs_proc_create regarding failed operations. */ -static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int nfs_mkdir(struct inode *dir_i, struct dentry *dentry, int mode) { - int error; + struct dentry *dir = dentry->d_parent; struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; + int error; dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n", - dir->i_dev, dir->i_ino, dentry->d_name.name); + dir_i->i_dev, dir_i->i_ino, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; +#if 0 /* * Always drop the dentry, we can't always depend on * the fattr returned by the server (AIX seems to be @@ -834,44 +807,48 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) * depending on potentially bogus information. */ d_drop(dentry); - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent), - dentry->d_name.name, &attr, &fhandle, &fattr); - if (!error) - dir->i_nlink++; +#endif + nfs_zap_caches(dir_i); + dir_i->i_nlink++; + error = NFS_PROTO(dir_i)->mkdir(dir, &dentry->d_name, &attr, &fhandle, + &fattr); + if (!error && fhandle.size != 0) + error = nfs_instantiate(dentry, &fhandle, &fattr); + if (error || fhandle.size == 0) + d_drop(dentry); return error; } -static int nfs_rmdir(struct inode *dir, struct dentry *dentry) +static int nfs_rmdir(struct inode *dir_i, struct dentry *dentry) { + struct dentry *dir = dentry->d_parent; int error; dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n", - dir->i_dev, dir->i_ino, dentry->d_name.name); + dir_i->i_dev, dir_i->i_ino, dentry->d_name.name); - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name); + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->rmdir(dir, &dentry->d_name); /* Update i_nlink and invalidate dentry. */ if (!error) { d_drop(dentry); - if (dir->i_nlink) - dir->i_nlink--; + if (dir_i->i_nlink) + dir_i->i_nlink--; } return error; } -static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) +static int nfs_sillyrename(struct inode *dir_i, struct dentry *dentry) { + struct dentry *dir = dentry->d_parent; static unsigned int sillycounter = 0; - const int i_inosize = sizeof(dir->i_ino)*2; + const int i_inosize = sizeof(dir_i->i_ino)*2; const int countersize = sizeof(sillycounter)*2; const int slen = strlen(".nfs") + i_inosize + countersize; char silly[slen+1]; + struct qstr qsilly; struct dentry *sdentry; int error = -EIO; @@ -923,11 +900,10 @@ dentry->d_parent->d_name.name, dentry->d_name.name); goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_rename(NFS_SERVER(dir), - NFS_FH(dentry->d_parent), dentry->d_name.name, - NFS_FH(dentry->d_parent), silly); + nfs_zap_caches(dir_i); + qsilly.name = silly; + qsilly.len = strlen(silly); + error = NFS_PROTO(dir_i)->rename(dir, &dentry->d_name, dir, &qsilly); if (!error) { nfs_renew_times(dentry); d_move(dentry, sdentry); @@ -948,7 +924,8 @@ out: */ static int nfs_safe_remove(struct dentry *dentry) { - struct inode *dir = dentry->d_parent->d_inode; + struct dentry *dir = dentry->d_parent; + struct inode *dir_i = dir->d_inode; struct inode *inode = dentry->d_inode; int error, rehash = 0; @@ -979,22 +956,22 @@ dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count); d_drop(dentry); rehash = 1; } + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->remove(dir, &dentry->d_name); + if (error < 0) + goto out; /* - * Update i_nlink and free the inode before unlinking. + * Update i_nlink and free the inode */ if (inode) { if (inode->i_nlink) inode->i_nlink --; d_delete(dentry); } - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name); /* * Rehash the negative dentry if the operation succeeded. */ - if (!error && rehash) + if (rehash) d_add(dentry, NULL); out: return error; @@ -1023,16 +1000,22 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) } static int -nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +nfs_symlink(struct inode *dir_i, struct dentry *dentry, const char *symname) { + struct dentry *dir = dentry->d_parent; struct iattr attr; + struct nfs_fattr sym_attr; + struct nfs_fh sym_fh; + struct qstr qsymname; + unsigned int maxlen; int error; dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n", - dir->i_dev, dir->i_ino, dentry->d_name.name, symname); + dir_i->i_dev, dir_i->i_ino, dentry->d_name.name, symname); error = -ENAMETOOLONG; - if (strlen(symname) > NFS_MAXPATHLEN) + maxlen = (NFS_PROTO(dir_i)->version==2) ? NFS2_MAXPATHLEN : NFS3_MAXPATHLEN; + if (strlen(symname) > maxlen) goto out; #ifdef NFS_PARANOIA @@ -1047,21 +1030,19 @@ dentry->d_parent->d_name.name, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = S_IFLNK | S_IRWXUGO; - /* - * Drop the dentry in advance to force a new lookup. - * Since nfs_proc_symlink doesn't return a fattr, we - * can't instantiate the new inode. - */ - d_drop(dentry); - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, symname, &attr); - if (!error) { - nfs_renew_times(dentry->d_parent); - } else if (error == -EEXIST) { - printk("nfs_proc_symlink: %s/%s already exists??\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + qsymname.name = symname; + qsymname.len = strlen(symname); + + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->symlink(dir, &dentry->d_name, &qsymname, + &attr, &sym_fh, &sym_attr); + if (!error && sym_fh.size != 0 && (sym_attr.valid & NFS_ATTR_FATTR)) { + error = nfs_instantiate(dentry, &sym_fh, &sym_attr); + } else { + if (error == -EEXIST) + printk("nfs_proc_symlink: %s/%s already exists??\n", + dir->d_name.name, dentry->d_name.name); + d_drop(dentry); } out: @@ -1069,8 +1050,9 @@ out: } static int -nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +nfs_link(struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry) { + struct dentry *dir = dentry->d_parent; struct inode *inode = old_dentry->d_inode; int error; @@ -1084,10 +1066,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) * we can't use the existing dentry. */ d_drop(dentry); - invalidate_inode_pages(dir); - nfs_flush_dircache(dir); - error = nfs_proc_link(NFS_DSERVER(old_dentry), NFS_FH(old_dentry), - NFS_FH(dentry->d_parent), dentry->d_name.name); + nfs_zap_caches(dir_i); + error = NFS_PROTO(dir_i)->link(old_dentry, dir, &dentry->d_name); if (!error) { /* * Update the link count immediately, as some apps @@ -1197,14 +1177,12 @@ go_ahead: if (new_inode) d_delete(new_dentry); - invalidate_inode_pages(new_dir); - nfs_flush_dircache(new_dir); - invalidate_inode_pages(old_dir); - nfs_flush_dircache(old_dir); - error = nfs_proc_rename(NFS_DSERVER(old_dentry), - NFS_FH(old_dentry->d_parent), old_dentry->d_name.name, - NFS_FH(new_dentry->d_parent), new_dentry->d_name.name); - + nfs_zap_caches(new_dir); + nfs_zap_caches(old_dir); + error = NFS_PROTO(old_dir)->rename(old_dentry->d_parent, + &old_dentry->d_name, + new_dentry->d_parent, + &new_dentry->d_name); NFS_CACHEINV(old_dir); NFS_CACHEINV(new_dir); /* Update the dcache if needed */ @@ -1229,16 +1207,15 @@ int nfs_init_fhcache(void) if (nfs_fh_cachep == NULL) return -ENOMEM; - nfs_cookie_cachep = kmem_cache_create("nfs_dcookie", - sizeof(struct nfs_cookie_table), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (nfs_cookie_cachep == NULL) - return -ENOMEM; - return 0; } +void nfs_destroy_fhcache(void) +{ + if (kmem_cache_destroy(nfs_fh_cachep)) + printk(KERN_INFO "nfs_fh: not all structures were freed\n"); +} + /* * Local variables: * version-control: t diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 32d290c73..d5c3d0944 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -22,6 +22,7 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> #include <linux/mm.h> #include <linux/malloc.h> #include <linux/pagemap.h> @@ -215,10 +216,10 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) struct inode * inode = filp->f_dentry->d_inode; int status = 0; - dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n", + dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_dev, inode->i_ino, fl->fl_type, fl->fl_flags, - fl->fl_start, fl->fl_end); + (long long)fl->fl_start, (long long)fl->fl_end); if (!inode) return -EINVAL; diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c index d36c3a9ae..800a42171 100644 --- a/fs/nfs/flushd.c +++ b/fs/nfs/flushd.c @@ -299,6 +299,5 @@ nfs_flushd_exit(struct rpc_task *task) cache->task = NULL; spin_unlock(&nfs_flushd_lock); wake_up(&cache->request_wait); - rpc_release_task(task); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ca7e1b944..14c43cd24 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -27,6 +27,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> #include <linux/nfs_flushd.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> @@ -58,7 +59,32 @@ static struct super_operations nfs_sops = { umount_begin: nfs_umount_begin, }; +/* + * RPC cruft for NFS + */ struct rpc_stat nfs_rpcstat = { &nfs_program }; +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2, +#ifdef CONFIG_NFS_V3 + &nfs_version3, +#endif +}; + +struct rpc_program nfs_program = { + "nfs", + NFS_PROGRAM, + sizeof(nfs_version) / sizeof(nfs_version[0]), + nfs_version, + &nfs_rpcstat, +}; + +static inline unsigned long +nfs_fattr_to_ino_t(struct nfs_fattr *fattr) +{ + return nfs_fileid_to_ino_t(fattr->fileid); +} /* * The "read_inode" function doesn't actually do anything: @@ -83,6 +109,7 @@ nfs_read_inode(struct inode * inode) inode->u.nfs_i.npages = 0; NFS_CACHEINV(inode); NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; } static void @@ -101,18 +128,12 @@ nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); - lock_kernel(); - if (S_ISDIR(inode->i_mode)) { - nfs_free_dircache(inode); - } else { - /* - * The following can never actually happen... - */ - if (nfs_have_writebacks(inode)) { - printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); - } + /* + * The following can never actually happen... + */ + if (nfs_have_writebacks(inode)) { + printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); } - unlock_kernel(); clear_inode(inode); } @@ -153,33 +174,68 @@ nfs_umount_begin(struct super_block *sb) rpc_killall_tasks(rpc); } -/* - * Compute and set NFS server blocksize - */ -static unsigned int -nfs_block_size(unsigned int bsize, unsigned char *nrbitsp) -{ - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; +static inline unsigned long +nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +{ /* make sure blocksize is a power of two */ if ((bsize & (bsize - 1)) || nrbitsp) { - unsigned int nrbits; + unsigned char nrbits; for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) ; bsize = 1 << nrbits; if (nrbitsp) *nrbitsp = nrbits; - if (bsize < NFS_DEF_FILE_IO_BUFFER_SIZE) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; } return bsize; } +/* + * Calculate the number of 512byte blocks used. + */ +static inline unsigned long +nfs_calc_block_size(u64 tsize) +{ + loff_t used = (tsize + 511) / 512; + return (used > ULONG_MAX) ? ULONG_MAX : used; +} + +/* + * Compute and set NFS server blocksize + */ +static inline unsigned long +nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +{ + if (bsize < 1024) + bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) + bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + + return nfs_block_bits(bsize, nrbitsp); +} + +/* + * Obtain the root inode of the file system. + */ +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh) +{ + struct nfs_server *server = &sb->u.nfs_sb.s_server; + struct nfs_fattr fattr; + struct inode *inode; + int error; + + if ((error = server->rpc_ops->getroot(server, rootfh, &fattr)) < 0) { + printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error); + return NULL; + } + + inode = __nfs_fhget(sb, &fattr); + return inode; +} + extern struct nfs_fh *nfs_fh_alloc(void); extern void nfs_fh_free(struct nfs_fh *p); @@ -194,19 +250,20 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) { struct nfs_mount_data *data = (struct nfs_mount_data *) raw_data; struct nfs_server *server; - struct rpc_xprt *xprt; - struct rpc_clnt *clnt; - struct nfs_fh *root_fh; - struct inode *root_inode; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + struct nfs_fh *root = &data->root, *root_fh, fh; + struct inode *root_inode = NULL; unsigned int authflavor; - int tcp; struct sockaddr_in srvaddr; struct rpc_timeout timeparms; - struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo; + int tcp, version, maxlen; if (!data) goto out_miss_args; + memset(&fh, 0, sizeof(fh)); if (data->version != NFS_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", data->version < NFS_MOUNT_VERSION ? "older" : "newer"); @@ -214,6 +271,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) data->namlen = 0; if (data->version < 3) data->bsize = 0; + if (data->version < 4) { + data->flags &= ~NFS_MOUNT_VER3; + root = &fh; + root->size = NFS2_FHSIZE; + memcpy(root->data, data->old_root.data, NFS2_FHSIZE); + } } /* We now require that the mount process passes the remote address */ @@ -225,12 +288,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) sb->s_magic = NFS_SUPER_MAGIC; sb->s_op = &nfs_sops; + sb->s_blocksize_bits = 0; sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - sb->u.nfs_sb.s_root = data->root; server = &sb->u.nfs_sb.s_server; server->rsize = nfs_block_size(data->rsize, NULL); server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags; + server->flags = data->flags & NFS_MOUNT_FLAGMASK; if (data->flags & NFS_MOUNT_NOAC) { data->acregmin = data->acregmax = 0; @@ -241,11 +304,32 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; + server->namelen = data->namlen; server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); if (!server->hostname) goto out_unlock; strcpy(server->hostname, data->hostname); + nfsv3_try_again: + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ + if (data->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3 + server->rpc_ops = &nfs_v3_clientops; + version = 3; + if (data->version < 4) { + printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); + goto out_unlock; + } +#else + printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); + goto out_unlock; +#endif + } else { + server->rpc_ops = &nfs_v2_clientops; + version = 2; + } + /* Which protocol do we use? */ tcp = (data->flags & NFS_MOUNT_TCP); @@ -255,6 +339,11 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) timeparms.to_maxval = tcp? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT; timeparms.to_exponential = 1; + if (!timeparms.to_initval) + timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10; + if (!timeparms.to_retries) + timeparms.to_retries = 5; + /* Now create transport and client */ xprt = xprt_create_proto(tcp? IPPROTO_TCP : IPPROTO_UDP, &srvaddr, &timeparms); @@ -269,7 +358,7 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) authflavor = RPC_AUTH_KRB; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - NFS_VERSION, authflavor); + version, authflavor); if (clnt == NULL) goto out_no_client; @@ -289,20 +378,68 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) root_fh = nfs_fh_alloc(); if (!root_fh) goto out_no_fh; - *root_fh = data->root; - - if (nfs_proc_getattr(server, root_fh, &fattr) != 0) - goto out_no_fattr; + memcpy((u8*)root_fh, (u8*)root, sizeof(*root)); + + /* Did getting the root inode fail? */ + if (!(root_inode = nfs_get_root(sb, root)) + && (data->flags & NFS_MOUNT_VER3)) { + data->flags &= ~NFS_MOUNT_VER3; + nfs_fh_free(root_fh); + rpciod_down(); + rpc_shutdown_client(server->client); + goto nfsv3_try_again; + } - root_inode = __nfs_fhget(sb, &fattr); if (!root_inode) goto out_no_root; sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto out_no_root; + sb->s_root->d_op = &nfs_dentry_operations; sb->s_root->d_fsdata = root_fh; + /* Get some general file system info */ + if (server->rpc_ops->statfs(server, root, &fsinfo) >= 0) { + if (server->namelen == 0) + server->namelen = fsinfo.namelen; + } else { + printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); + goto out_no_root; + } + + /* Work out a lot of parameters */ + if (data->rsize == 0) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (data->wsize == 0) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + /* NFSv3: we don't have bsize, but rather rtmult and wtmult... */ + if (!fsinfo.bsize) + fsinfo.bsize = (fsinfo.rtmult>fsinfo.wtmult) ? fsinfo.rtmult : fsinfo.wtmult; + /* Also make sure we don't go below rsize/wsize since + * RPC calls are expensive */ + if (fsinfo.bsize < server->rsize) + fsinfo.bsize = server->rsize; + if (fsinfo.bsize < server->wsize) + fsinfo.bsize = server->wsize; + + if (data->bsize == 0) + sb->s_blocksize = nfs_block_bits(fsinfo.bsize, &sb->s_blocksize_bits); + if (server->rsize > fsinfo.rtmax) + server->rsize = fsinfo.rtmax; + if (server->rsize > PAGE_CACHE_SIZE) + server->rsize = PAGE_CACHE_SIZE; + if (server->wsize > fsinfo.wtmax) + server->wsize = fsinfo.wtmax; + if (server->wsize > NFS_WRITE_MAXIOV << PAGE_CACHE_SHIFT) + server->wsize = NFS_WRITE_MAXIOV << PAGE_CACHE_SHIFT; + + maxlen = (version == 2) ? NFS2_MAXNAMLEN : NFS3_MAXNAMLEN; + + if (server->namelen == 0 || server->namelen > maxlen) + server->namelen = maxlen; + /* Fire up the writeback cache */ if (nfs_reqlist_alloc(server) < 0) { printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n"); @@ -322,11 +459,6 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) out_no_root: printk("nfs_read_super: get root inode failed\n"); iput(root_inode); - goto out_free_fh; - -out_no_fattr: - printk("nfs_read_super: get root fattr failed\n"); -out_free_fh: nfs_fh_free(root_fh); out_no_fh: rpciod_down(); @@ -366,21 +498,33 @@ out_fail: static int nfs_statfs(struct super_block *sb, struct statfs *buf) { - int error; + struct nfs_server *server = &sb->u.nfs_sb.s_server; + unsigned char blockbits; + unsigned long blockres; struct nfs_fsinfo res; + int error; - error = nfs_proc_statfs(&sb->u.nfs_sb.s_server, &sb->u.nfs_sb.s_root, - &res); - if (error) { - printk("nfs_statfs: statfs error = %d\n", -error); - res.bsize = res.blocks = res.bfree = res.bavail = -1; - } + error = server->rpc_ops->statfs(server, NFS_FH(sb->s_root), &res); buf->f_type = NFS_SUPER_MAGIC; - buf->f_bsize = res.bsize; - buf->f_blocks = res.blocks; - buf->f_bfree = res.bfree; - buf->f_bavail = res.bavail; - buf->f_namelen = NAME_MAX; + if (error < 0) + goto out_err; + + if (res.bsize == 0) + res.bsize = sb->s_blocksize; + buf->f_bsize = nfs_block_bits(res.bsize, &blockbits); + blockres = (1 << blockbits) - 1; + buf->f_blocks = (res.tbytes + blockres) >> blockbits; + buf->f_bfree = (res.fbytes + blockres) >> blockbits; + buf->f_bavail = (res.abytes + blockres) >> blockbits; + buf->f_files = res.tfiles; + buf->f_ffree = res.afiles; + if (res.namelen == 0 || res.namelen > server->namelen) + res.namelen = server->namelen; + buf->f_namelen = res.namelen; + return 0; + out_err: + printk("nfs_statfs: statfs error = %d\n", -error); + buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; return 0; } @@ -429,11 +573,12 @@ void nfs_zap_caches(struct inode *inode) { NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_CACHEINV(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; invalidate_inode_pages(inode); - if (S_ISDIR(inode->i_mode)) - nfs_flush_dircache(inode); + + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + NFS_CACHEINV(inode); } /* @@ -481,9 +626,16 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr) * Preset the size and mtime, as there's no need * to invalidate the caches. */ - inode->i_size = fattr->size; - inode->i_mtime = fattr->mtime.seconds; - NFS_OLDMTIME(inode) = fattr->mtime.seconds; + inode->i_size = nfs_size_to_loff_t(fattr->size); + inode->i_mtime = nfs_time_to_secs(fattr->mtime); + inode->i_atime = nfs_time_to_secs(fattr->atime); + inode->i_ctime = nfs_time_to_secs(fattr->ctime); + NFS_CACHE_CTIME(inode) = fattr->ctime; + NFS_CACHE_MTIME(inode) = fattr->mtime; + NFS_CACHE_ATIME(inode) = fattr->atime; + NFS_CACHE_ISIZE(inode) = fattr->size; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; } nfs_refresh_inode(inode, fattr); } @@ -551,9 +703,9 @@ nfs_fhget(struct dentry *dentry, struct nfs_fh *fhandle, { struct super_block *sb = dentry->d_sb; - dprintk("NFS: nfs_fhget(%s/%s fileid=%d)\n", + dprintk("NFS: nfs_fhget(%s/%s fileid=%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - fattr->fileid); + (long long)fattr->fileid); /* Install the file handle in the dentry */ *((struct nfs_fh *) dentry->d_fsdata) = *fhandle; @@ -572,7 +724,7 @@ nfs_fhget(struct dentry *dentry, struct nfs_fh *fhandle, inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_flags = 0; - inode->i_ino = fattr->fileid; + inode->i_ino = nfs_fattr_to_ino_t(fattr); nfs_read_inode(inode); nfs_fill_inode(inode, fattr); inode->u.nfs_i.flags |= NFS_IS_SNAPSHOT; @@ -598,12 +750,15 @@ __nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr) struct inode *inode = NULL; unsigned long ino; + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + goto out_no_inode; + if (!fattr->nlink) { printk("NFS: Buggy server - nlink == 0!\n"); goto out_no_inode; } - ino = fattr->fileid; + ino = nfs_fattr_to_ino_t(fattr); while((inode = iget4(sb, ino, nfs_find_actor, fattr)) != NULL) { @@ -666,8 +821,7 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error); if (error) goto out; - error = nfs_proc_setattr(NFS_DSERVER(dentry), NFS_FH(dentry), - &fattr, attr); + error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error) goto out; /* @@ -676,13 +830,21 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error); */ if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size != fattr.size) - printk("nfs_notify_change: attr=%Ld, fattr=%d??\n", - (long long) attr->ia_size, fattr.size); - inode->i_mtime = fattr.mtime.seconds; + printk("nfs_notify_change: attr=%Ld, fattr=%Ld??\n", + (long long) attr->ia_size, (long long)fattr.size); vmtruncate(inode, attr->ia_size); } - if (attr->ia_valid & ATTR_MTIME) - inode->i_mtime = fattr.mtime.seconds; + + /* + * If we changed the size or mtime, update the inode + * now to avoid invalidating the page cache. + */ + if (!(fattr.valid & NFS_ATTR_WCC)) { + fattr.pre_size = NFS_CACHE_ISIZE(inode); + fattr.pre_mtime = NFS_CACHE_MTIME(inode); + fattr.pre_ctime = NFS_CACHE_CTIME(inode); + fattr.valid |= NFS_ATTR_WCC; + } error = nfs_refresh_inode(inode, &fattr); out: return error; @@ -695,24 +857,13 @@ out: int nfs_wait_on_inode(struct inode *inode, int flag) { - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - int intr, error = 0; - - intr = NFS_SERVER(inode)->flags & NFS_MOUNT_INTR; - add_wait_queue(&inode->i_wait, &wait); - for (;;) { - set_task_state(tsk, (intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE)); - error = 0; - if (!(NFS_FLAGS(inode) & flag)) - break; - error = -ERESTARTSYS; - if (intr && signalled()) - break; - schedule(); - } - set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(&inode->i_wait, &wait); + struct rpc_clnt *clnt = NFS_CLIENT(inode); + int error; + if (!(NFS_FLAGS(inode) & flag)) + return 0; + inode->i_count++; + error = nfs_wait_event(clnt, inode->i_wait, !(NFS_FLAGS(inode) & flag)); + iput(inode); return error; } @@ -768,30 +919,32 @@ __nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) } NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; - status = nfs_proc_getattr(server, NFS_FH(dentry), &fattr); + status = NFS_PROTO(inode)->getattr(dentry, &fattr); if (status) { + struct dentry *dir = dentry->d_parent; + struct inode *dir_i = dir->d_inode; int error; u32 *fh; struct nfs_fh fhandle; dfprintk(PAGECACHE, "nfs_revalidate_inode: %s/%s getattr failed, ino=%ld, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, inode->i_ino, status); + dir->d_name.name, dentry->d_name.name, + inode->i_ino, status); if (status != -ESTALE) goto out; /* * A "stale filehandle" error ... show the current fh * and find out what the filehandle should be. */ - fh = (u32 *) NFS_FH(dentry); + fh = (u32 *) NFS_FH(dentry)->data; dfprintk(PAGECACHE, "NFS: bad fh %08x%08x%08x%08x%08x%08x%08x%08x\n", fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7]); - error = nfs_proc_lookup(server, NFS_FH(dentry->d_parent), - dentry->d_name.name, &fhandle, &fattr); + error = NFS_PROTO(dir_i)->lookup(dir, &dentry->d_name, + &fhandle, &fattr); if (error) { dfprintk(PAGECACHE, "NFS: lookup failed, error=%d\n", error); goto out; } - fh = (u32 *) &fhandle; + fh = (u32 *) fhandle.data; dfprintk(PAGECACHE, " %08x%08x%08x%08x%08x%08x%08x%08x\n", fh[0],fh[1],fh[2],fh[3],fh[4],fh[5],fh[6],fh[7]); goto out; @@ -827,19 +980,37 @@ out: int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { - int invalid = 0; - int error = -EIO; - - dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n", - inode->i_dev, inode->i_ino, inode->i_count); + __u64 new_size, new_mtime; + loff_t new_isize; + int invalid = 0; + int error = -EIO; if (!inode || !fattr) { - printk("nfs_refresh_inode: inode or fattr is NULL\n"); + printk(KERN_ERR "nfs_refresh_inode: inode or fattr is NULL\n"); + goto out; + } + if (inode->i_mode == 0) { + printk(KERN_ERR "nfs_refresh_inode: empty inode\n"); goto out; } - if (inode->i_ino != fattr->fileid) { - printk("nfs_refresh_inode: mismatch, ino=%ld, fattr=%d\n", - inode->i_ino, fattr->fileid); + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + goto out; + + if (is_bad_inode(inode)) + goto out; + + dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n", + inode->i_dev, inode->i_ino, inode->i_count, + fattr->valid); + + + if (NFS_FSID(inode) != fattr->fsid || + NFS_FILEID(inode) != fattr->fileid) { + printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + "expected (0x%Lx/0x%Lx), got (0x%Lx/0x%Lx)\n", + (long long)NFS_FSID(inode), (long long)NFS_FILEID(inode), + (long long)fattr->fsid, (long long)fattr->fileid); goto out; } @@ -849,54 +1020,101 @@ nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - inode->i_mode = fattr->mode; - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + new_mtime = fattr->mtime; + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); - inode->i_blocks = fattr->blocks; - inode->i_atime = fattr->atime.seconds; - inode->i_ctime = fattr->ctime.seconds; + error = 0; /* * Update the read time so we don't revalidate too often. */ NFS_READTIME(inode) = jiffies; - error = 0; /* - * If we have pending write-back entries, we don't want - * to look at the size or the mtime the server sends us - * too closely, as we're in the middle of modifying them. + * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. + * NOT inode->i_size!!! */ - if (nfs_have_writebacks(inode)) - goto out; - - if (inode->i_size != fattr->size) { + if (NFS_CACHE_ISIZE(inode) != new_size) { #ifdef NFS_DEBUG_VERBOSE -printk("NFS: size change on %x/%ld\n", inode->i_dev, inode->i_ino); + printk(KERN_DEBUG "NFS: isize change on %x/%ld\n", inode->i_dev, inode->i_ino); #endif - inode->i_size = fattr->size; invalid = 1; } - if (inode->i_mtime != fattr->mtime.seconds) { + /* + * Note: we don't check inode->i_mtime since pipes etc. + * can change this value in VFS without requiring a + * cache revalidation. + */ + if (NFS_CACHE_MTIME(inode) != new_mtime) { #ifdef NFS_DEBUG_VERBOSE -printk("NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino); + printk(KERN_DEBUG "NFS: mtime change on %x/%ld\n", inode->i_dev, inode->i_ino); #endif - inode->i_mtime = fattr->mtime.seconds; invalid = 1; } - if (invalid) - goto out_invalid; + /* Check Weak Cache Consistency data. + * If size and mtime match the pre-operation values, we can + * assume that any attribute changes were caused by our NFS + * operation, so there's no need to invalidate the caches. + */ + if ((fattr->valid & NFS_ATTR_WCC) + && NFS_CACHE_ISIZE(inode) == fattr->pre_size + && NFS_CACHE_MTIME(inode) == fattr->pre_mtime) { + invalid = 0; + } + + /* + * If we have pending writebacks, things can get + * messy. + */ + if (nfs_have_writebacks(inode) && new_isize < inode->i_size) + new_isize = inode->i_size; + + NFS_CACHE_CTIME(inode) = fattr->ctime; + inode->i_ctime = nfs_time_to_secs(fattr->ctime); + /* If we've been messing around with atime, don't + * update it. Save the server value in NFS_CACHE_ATIME. + */ + NFS_CACHE_ATIME(inode) = fattr->atime; + if (time_before(inode->i_atime, nfs_time_to_secs(fattr->atime))) + inode->i_atime = nfs_time_to_secs(fattr->atime); + NFS_CACHE_MTIME(inode) = new_mtime; + inode->i_mtime = nfs_time_to_secs(new_mtime); + + NFS_CACHE_ISIZE(inode) = new_size; + inode->i_size = new_isize; + + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; + + if (fattr->valid & NFS_ATTR_FATTR_V3) { + /* + * report the blocks in 512byte units + */ + inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); + inode->i_blksize = inode->i_sb->s_blocksize; + } else { + inode->i_blocks = fattr->du.nfs2.blocks; + inode->i_blksize = fattr->du.nfs2.blocksize; + } + inode->i_rdev = 0; + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + inode->i_rdev = to_kdev_t(fattr->rdev); + /* Update attrtimeo value */ - if (fattr->mtime.seconds == NFS_OLDMTIME(inode)) { + if (!invalid && time_after(jiffies, NFS_ATTRTIMEO_UPDATE(inode)+NFS_ATTRTIMEO(inode))) { if ((NFS_ATTRTIMEO(inode) <<= 1) > NFS_MAXATTRTIMEO(inode)) NFS_ATTRTIMEO(inode) = NFS_MAXATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; } - NFS_OLDMTIME(inode) = fattr->mtime.seconds; + + if (invalid) + nfs_zap_caches(inode); out: return error; @@ -906,22 +1124,16 @@ out_changed: * Big trouble! The inode has become a different object. */ #ifdef NFS_PARANOIA -printk("nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", -inode->i_ino, inode->i_mode, fattr->mode); + printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", + inode->i_ino, inode->i_mode, fattr->mode); #endif /* * No need to worry about unhashing the dentry, as the * lookup validation will know that the inode is bad. + * (But we fall through to invalidate the caches.) */ nfs_invalidate_inode(inode); goto out; - -out_invalid: -#ifdef NFS_DEBUG_VERBOSE -printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages); -#endif - nfs_zap_caches(inode); - goto out; } /* @@ -930,7 +1142,9 @@ printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages); static DECLARE_FSTYPE(nfs_fs_type, "nfs", nfs_read_super, 0); extern int nfs_init_fhcache(void); +extern void nfs_destroy_fhcache(void); extern int nfs_init_nfspagecache(void); +extern void nfs_destroy_nfspagecache(void); /* * Initialize NFS @@ -972,6 +1186,8 @@ init_module(void) void cleanup_module(void) { + nfs_destroy_nfspagecache(); + nfs_destroy_fhcache(); #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index f74984de2..0adfacd3e 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -30,7 +30,9 @@ #define MOUNT_UMNT 3 */ -static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *); +static int nfs_gen_mount(struct sockaddr_in *, + char *, struct nfs_fh *, int); +static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, int); extern struct rpc_program mnt_program; struct mnt_fhstatus { @@ -44,24 +46,38 @@ struct mnt_fhstatus { int nfs_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh) { + return nfs_gen_mount(addr, path, fh, NFS_MNT_VERSION); +} + +int +nfs3_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh) +{ + return nfs_gen_mount(addr, path, fh, NFS_MNT3_VERSION); +} + +static int +nfs_gen_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh, int version) +{ struct rpc_clnt *mnt_clnt; struct mnt_fhstatus result = { 0, fh }; char hostname[32]; int status; + int call; dprintk("NFS: nfs_mount(%08x:%s)\n", (unsigned)ntohl(addr->sin_addr.s_addr), path); strcpy(hostname, in_ntoa(addr->sin_addr.s_addr)); - if (!(mnt_clnt = mnt_create(hostname, addr))) + if (!(mnt_clnt = mnt_create(hostname, addr, version))) return -EACCES; - status = rpc_call(mnt_clnt, NFS_MNTPROC_MNT, path, &result, 0); + call = (version == 3) ? MOUNTPROC3_MNT : MNTPROC_MNT; + status = rpc_call(mnt_clnt, call, path, &result, 0); return status < 0? status : (result.status? -EACCES : 0); } static struct rpc_clnt * -mnt_create(char *hostname, struct sockaddr_in *srvaddr) +mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; @@ -70,7 +86,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr) return NULL; clnt = rpc_create_client(xprt, hostname, - &mnt_program, NFS_MNT_VERSION, + &mnt_program, version, RPC_AUTH_NULL); if (!clnt) { xprt_destroy(xprt); @@ -104,8 +120,26 @@ xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path) static int xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res) { - if ((res->status = ntohl(*p++)) == 0) - memcpy(res->fh, p, sizeof(*res->fh)); + memset((u8 *)res, 0, sizeof(*res)); + if ((res->status = ntohl(*p++)) == 0) { + res->fh->size = NFS2_FHSIZE; + memcpy(res->fh->data, p, NFS2_FHSIZE); + } + return 0; +} + +static int +xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res) +{ + memset((u8 *)res, 0, sizeof(*res)); + if ((res->status = ntohl(*p++)) == 0) { + int size = ntohl(*p++); + if (size <= NFS3_FHSIZE) { + res->fh->size = size; + memcpy(res->fh->data, p, res->fh->size); + } else + res->status = -EBADHANDLE; + } return 0; } @@ -122,13 +156,30 @@ static struct rpc_procinfo mnt_procedures[2] = { MNT_dirpath_sz << 2, 0 }, }; +static struct rpc_procinfo mnt3_procedures[2] = { + { "mnt3_null", + (kxdrproc_t) xdr_error, + (kxdrproc_t) xdr_error, 0, 0 }, + { "mnt3_mount", + (kxdrproc_t) xdr_encode_dirpath, + (kxdrproc_t) xdr_decode_fhstatus3, + MNT_dirpath_sz << 2, 0 }, +}; + + static struct rpc_version mnt_version1 = { 1, 2, mnt_procedures }; +static struct rpc_version mnt_version3 = { + 3, 2, mnt3_procedures +}; + static struct rpc_version * mnt_version[] = { NULL, &mnt_version1, + NULL, + &mnt_version3, }; static struct rpc_stat mnt_stats; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5ad2aaa67..1dd1553ba 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -1,14 +1,14 @@ /* - * linux/fs/nfs/xdr.c + * linux/fs/nfs/nfs2xdr.c * * XDR functions to encode/decode NFS RPC arguments and results. * * Copyright (C) 1992, 1993, 1994 Rick Sladkey * Copyright (C) 1996 Olaf Kirch + * 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu> + * FIFO's need special handling in NFSv2 */ -#define NFS_NEED_XDR_TYPES - #include <linux/param.h> #include <linux/sched.h> #include <linux/mm.h> @@ -20,6 +20,8 @@ #include <linux/pagemap.h> #include <linux/proc_fs.h> #include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> #include <linux/nfs_fs.h> /* Uncomment this to support servers requiring longword lengths */ @@ -28,8 +30,7 @@ #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ -#define QUADLEN(len) (((len) + 3) >> 2) -static int nfs_stat_to_errno(int stat); +extern int nfs_stat_to_errno(int stat); /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO @@ -40,8 +41,8 @@ static int nfs_stat_to_errno(int stat); */ #define NFS_fhandle_sz 8 #define NFS_sattr_sz 8 -#define NFS_filename_sz 1+(NFS_MAXNAMLEN>>2) -#define NFS_path_sz 1+(NFS_MAXPATHLEN>>2) +#define NFS_filename_sz 1+(NFS2_MAXNAMLEN>>2) +#define NFS_path_sz 1+(NFS2_MAXPATHLEN>>2) #define NFS_fattr_sz 17 #define NFS_info_sz 5 #define NFS_entry_sz NFS_filename_sz+3 @@ -49,6 +50,7 @@ static int nfs_stat_to_errno(int stat); #define NFS_enc_void_sz 0 #define NFS_diropargs_sz NFS_fhandle_sz+NFS_filename_sz #define NFS_sattrargs_sz NFS_fhandle_sz+NFS_sattr_sz +#define NFS_readlinkargs_sz NFS_fhandle_sz #define NFS_readargs_sz NFS_fhandle_sz+3 #define NFS_writeargs_sz NFS_fhandle_sz+4 #define NFS_createargs_sz NFS_diropargs_sz+NFS_sattr_sz @@ -56,14 +58,13 @@ static int nfs_stat_to_errno(int stat); #define NFS_linkargs_sz NFS_fhandle_sz+NFS_diropargs_sz #define NFS_symlinkargs_sz NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz #define NFS_readdirargs_sz NFS_fhandle_sz+2 -#define NFS_readlinkargs_sz NFS_fhandle_sz #define NFS_dec_void_sz 0 #define NFS_attrstat_sz 1+NFS_fattr_sz #define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz #define NFS_readlinkres_sz 1 #define NFS_readres_sz 1+NFS_fattr_sz+1 -#define NFS_writeres_sz NFS_attrstat_sz +#define NFS_writeres_sz NFS_attrstat_sz #define NFS_stat_sz 1 #define NFS_readdirres_sz 1 #define NFS_statfsres_sz 1+NFS_info_sz @@ -74,15 +75,19 @@ static int nfs_stat_to_errno(int stat); static inline u32 * xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle) { - *((struct nfs_fh *) p) = *fhandle; - return p + QUADLEN(sizeof(*fhandle)); + memcpy(p, fhandle->data, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); } static inline u32 * xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle) { - *fhandle = *((struct nfs_fh *) p); - return p + QUADLEN(sizeof(*fhandle)); + /* Zero handle first to allow comparisons */ + memset(fhandle, 0, sizeof(*fhandle)); + /* NFSv2 handles have a fixed length */ + fhandle->size = NFS2_FHSIZE; + memcpy(fhandle->data, p, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); } static inline u32 * @@ -93,7 +98,14 @@ xdr_decode_string2(u32 *p, char **string, unsigned int *len, if (*len > maxlen) return NULL; *string = (char *) p; - return p + QUADLEN(*len); + return p + XDR_QUADLEN(*len); +} + +static inline u32* +xdr_decode_time(u32 *p, u64 *timep) +{ + *timep = ((u64)ntohl(*p++) << 32) + (u64)ntohl(*p++); + return p; } static inline u32 * @@ -105,21 +117,23 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) fattr->uid = ntohl(*p++); fattr->gid = ntohl(*p++); fattr->size = ntohl(*p++); - fattr->blocksize = ntohl(*p++); + fattr->du.nfs2.blocksize = ntohl(*p++); fattr->rdev = ntohl(*p++); - fattr->blocks = ntohl(*p++); + fattr->du.nfs2.blocks = ntohl(*p++); fattr->fsid = ntohl(*p++); fattr->fileid = ntohl(*p++); - fattr->atime.seconds = ntohl(*p++); - fattr->atime.useconds = ntohl(*p++); - fattr->mtime.seconds = ntohl(*p++); - fattr->mtime.useconds = ntohl(*p++); - fattr->ctime.seconds = ntohl(*p++); - fattr->ctime.useconds = ntohl(*p++); + p = xdr_decode_time(p, &fattr->atime); + p = xdr_decode_time(p, &fattr->mtime); + p = xdr_decode_time(p, &fattr->ctime); + fattr->valid |= NFS_ATTR_FATTR; + if (fattr->type == NFCHR && fattr->rdev == NFS2_FIFO_DEV) { + fattr->type = NFFIFO; + fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; + fattr->rdev = 0; + } return p; } - #define SATTR(p, attr, flag, field) \ *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0 static inline u32 * @@ -194,7 +208,7 @@ static int nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_string(p, args->name); + p = xdr_encode_array(p, args->name, args->len); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -208,7 +222,8 @@ static int nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int replen, buflen; + int buflen, replen; + unsigned int nr; p = xdr_encode_fhandle(p, args->fh); *p++ = htonl(args->offset); @@ -216,16 +231,25 @@ nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) *p++ = htonl(args->count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + /* Get the number of buffers in the receive iovec */ + nr = args->nriov; + + if (nr+2 > MAX_IOVEC) { + printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); + return -EINVAL; + } + /* set up reply iovec */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; buflen = req->rq_rvec[0].iov_len; req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->count; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; + /* Copy the iovec */ + memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); + + req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[nr+1].iov_len = buflen - replen; req->rq_rlen = args->count + buflen; - req->rq_rnr = 3; + req->rq_rnr += nr+1; return 0; } @@ -239,7 +263,6 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) struct iovec *iov = req->rq_rvec; int status, count, recvd, hdrlen; - dprintk("RPC: readres OK status %lx\n", (long)ntohl(*p)); if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); p = xdr_decode_fattr(p, res->fattr); @@ -247,22 +270,26 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) count = ntohl(*p++); hdrlen = (u8 *) p - (u8 *) iov->iov_base; recvd = req->rq_rlen - hdrlen; - if (p != iov[2].iov_base) { + if (p != iov[req->rq_rnr-1].iov_base) { /* Unexpected reply header size. Punt. * XXX: Move iovec contents to align data on page * boundary and adjust RPC header size guess */ - printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen); + printk(KERN_WARNING "NFS: Odd RPC header size in read reply: %d\n", hdrlen); return -errno_NFSERR_IO; } if (count > recvd) { - printk("NFS: server cheating in read reply: " + printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; } dprintk("RPC: readres OK count %d\n", count); - if (count < res->count) - memset((u8 *)(iov[1].iov_base+count), 0, res->count-count); + if (count < res->count) { + xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); + res->count = count; + res->eof = 1; /* Silly NFSv3ism which can't be helped */ + } else + res->eof = 0; return count; } @@ -288,13 +315,13 @@ nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) nr = args->nriov; if (nr+2 > MAX_IOVEC) { - printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs " - "(nr %d max %d)\n", nr, MAX_IOVEC); - return -EINVAL; - } + printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs " + "(nr %d max %d)\n", nr, MAX_IOVEC); + return -EINVAL; + } /* Copy the iovec */ - memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); + memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); #ifdef NFS_PAD_WRITES /* @@ -325,7 +352,7 @@ static int nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_string(p, args->name); + p = xdr_encode_array(p, args->name, args->len); p = xdr_encode_sattr(p, args->sattr); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -338,9 +365,9 @@ static int nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args) { p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_string(p, args->fromname); + p = xdr_encode_array(p, args->fromname, args->fromlen); p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_string(p, args->toname); + p = xdr_encode_array(p, args->toname, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -353,7 +380,7 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_string(p, args->toname); + p = xdr_encode_array(p, args->toname, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -365,8 +392,8 @@ static int nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) { p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_string(p, args->fromname); - p = xdr_encode_string(p, args->topath); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_array(p, args->topath, args->tolen); p = xdr_encode_sattr(p, args->sattr); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -380,108 +407,113 @@ nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) { struct rpc_task *task = req->rq_task; struct rpc_auth *auth = task->tk_auth; - int bufsiz = args->bufsiz; - int replen; - - p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + u32 bufsiz = args->bufsiz; + int buflen, replen; - /* Some servers (e.g. HP OS 9.5) seem to expect the buffer size + /* + * Some servers (e.g. HP OS 9.5) seem to expect the buffer size * to be in longwords ... check whether to convert the size. */ if (task->tk_client->cl_flags & NFS_CLNTF_BUFSIZE) - *p++ = htonl(bufsiz >> 2); - else - *p++ = htonl(bufsiz); + bufsiz = bufsiz >> 2; + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->cookie); + *p++ = htonl(bufsiz); /* see above */ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); /* set up reply iovec */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; + buflen = req->rq_rvec[0].iov_len; req->rq_rvec[0].iov_len = replen; req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = bufsiz; - req->rq_rlen = replen + bufsiz; - req->rq_rnr = 2; + req->rq_rvec[1].iov_len = args->bufsiz; + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = buflen + args->bufsiz; + req->rq_rnr += 2; return 0; } /* * Decode the result of a readdir call. + * We're not really decoding anymore, we just leave the buffer untouched + * and only check that it is syntactically correct. + * The real decoding happens in nfs_decode_entry below, called directly + * from nfs_readdir for each entry. */ -#define NFS_DIRENT_MAXLEN (5 * sizeof(u32) + (NFS_MAXNAMLEN + 1)) static int nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) { struct iovec *iov = req->rq_rvec; int status, nr; - u32 *end; - u32 last_cookie = res->cookie; + u32 *end, *entry, len; - status = ntohl(*p++); - if (status) { - nr = -nfs_stat_to_errno(status); - goto error; - } + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) { /* Unexpected reply header size. Punt. */ - printk("NFS: Odd RPC header size in readdirres reply\n"); - nr = -errno_NFSERR_IO; - goto error; + printk(KERN_WARNING "NFS: Odd RPC header size in readdirres reply\n"); + return -errno_NFSERR_IO; } - /* Get start and end address of XDR readdir response. */ + /* Get start and end address of XDR data */ p = (u32 *) iov[1].iov_base; end = (u32 *) ((u8 *) p + iov[1].iov_len); - for (nr = 0; *p++; nr++) { - __u32 len; - - /* Convert fileid. */ - *p = ntohl(*p); - p++; - /* Convert and capture len */ - len = *p = ntohl(*p); - p++; - - if ((p + QUADLEN(len) + 3) > end) { - struct rpc_clnt *clnt = req->rq_task->tk_client; + /* Get start and end of dirent buffer */ + if (res->buffer != p) { + printk(KERN_ERR "NFS: Bad result buffer in readdir\n"); + return -errno_NFSERR_IO; + } - clnt->cl_flags |= NFS_CLNTF_BUFSIZE; - p -= 2; - p[-1] = 0; - p[0] = 0; - break; + for (nr = 0; *p++; nr++) { + entry = p - 1; + p++; /* fileid */ + len = ntohl(*p++); + p += XDR_QUADLEN(len) + 1; /* name plus cookie */ + if (len > NFS2_MAXNAMLEN) { + printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", + len); + return -errno_NFSERR_IO; } - if (len > NFS_MAXNAMLEN) { - nr = -errno_NFSERR_IO; - goto error; + if (p + 2 > end) { + printk(KERN_NOTICE + "NFS: short packet in readdir reply!\n"); + entry[0] = entry[1] = 0; + break; } - p += QUADLEN(len); - - /* Convert and capture cookie. */ - last_cookie = *p = ntohl(*p); - p++; } - p -= 1; - status = ((end - p) << 2); - if (!p[1] && (status >= NFS_DIRENT_MAXLEN)) { - status = ((__u8 *)p - (__u8 *)iov[1].iov_base); - res->buffer += status; - res->bufsiz -= status; - } else if (p[1]) { - status = (int)((long)p & ~PAGE_CACHE_MASK); - res->bufsiz = -status; - } else { - res->bufsiz = 0; + p++; /* EOF flag */ + + if (p > end) { + printk(KERN_NOTICE + "NFS: short packet in readdir reply!\n"); + return -errno_NFSERR_IO; } - res->cookie = last_cookie; return nr; +} -error: - res->bufsiz = 0; - return nr; +u32 * +nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) +{ + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + entry->ino = ntohl(*p++); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + entry->cookie = ntohl(*p++); + entry->eof = !p[0] && p[1]; + + return p; } /* @@ -518,12 +550,9 @@ nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) { int status; - dprintk("RPC: attrstat status %lx\n", (long)ntohl(*p)); if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); xdr_decode_fattr(p, fattr); - dprintk("RPC: attrstat OK type %d mode %o dev %x ino %x\n", - fattr->type, fattr->mode, fattr->fsid, fattr->fileid); return 0; } @@ -536,36 +565,34 @@ nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) { int status; - dprintk("RPC: diropres status %lx\n", (long)ntohl(*p)); if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); p = xdr_decode_fhandle(p, res->fh); xdr_decode_fattr(p, res->fattr); - dprintk("RPC: diropres OK type %x mode %o dev %x ino %x\n", - res->fattr->type, res->fattr->mode, - res->fattr->fsid, res->fattr->fileid); return 0; } /* - * Encode arguments to readlink call + * Encode READLINK args */ -static int nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args) +static int +nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args) { struct rpc_task *task = req->rq_task; struct rpc_auth *auth = task->tk_auth; - int bufsiz = NFS_MAXPATHLEN; - int replen; + int buflen, replen; p = xdr_encode_fhandle(p, args->fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2; - req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = (void *) args->buffer; - req->rq_rvec[1].iov_len = bufsiz; - req->rq_rlen = replen + bufsiz; - req->rq_rnr = 2; - + buflen = req->rq_rvec[0].iov_len; + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->bufsiz; + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = buflen + args->bufsiz; + req->rq_rnr += 2; return 0; } @@ -573,31 +600,24 @@ static int nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlin * Decode READLINK reply */ static int -nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy) +nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) { - struct iovec *iov = req->rq_rvec; - int status, len; - char *name; + u32 *strlen; + char *string; + int status; + unsigned int len; - /* Verify OK status. */ - if ((status = ntohl(*p++)) != 0) + if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); - - /* Verify OK response length. */ - if ((__u8 *)p != ((u8 *) iov->iov_base + iov->iov_len)) - return -errno_NFSERR_IO; - - /* Convert and verify that string length is in range. */ - p = iov[1].iov_base; - len = *p = ntohl(*p); - p++; - if (len > iov[1].iov_len) - return -errno_NFSERR_IO; - - /* NULL terminate the string we got. */ - name = (char *) p; - name[len] = 0; - + strlen = (u32*)res->buffer; + /* Convert length of symlink */ + len = ntohl(*strlen); + if (len > res->bufsiz - 5) + len = res->bufsiz - 5; + *strlen = len; + /* NULL terminate the string we got */ + string = (char *)(strlen + 1); + string[len] = 0; return 0; } @@ -618,14 +638,34 @@ static int nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) { int status; + u32 xfer_size; if ((status = ntohl(*p++))) return -nfs_stat_to_errno(status); - res->tsize = ntohl(*p++); - res->bsize = ntohl(*p++); - res->blocks = ntohl(*p++); - res->bfree = ntohl(*p++); - res->bavail = ntohl(*p++); + + /* For NFSv2, we more or less have to guess the preferred + * read/write/readdir sizes from the single 'transfer size' + * value. + */ + xfer_size = ntohl(*p++); /* tsize */ + res->rtmax = 8 * 1024; + res->rtpref = xfer_size; + res->rtmult = xfer_size; + res->wtmax = 8 * 1024; + res->wtpref = xfer_size; + res->wtmult = xfer_size; + res->dtpref = PAGE_CACHE_SIZE; + res->maxfilesize = 0x7FFFFFFF; /* just a guess */ + res->bsize = ntohl(*p++); + + res->tbytes = ntohl(*p++) * res->bsize; + res->fbytes = ntohl(*p++) * res->bsize; + res->abytes = ntohl(*p++) * res->bsize; + res->tfiles = 0; + res->ffiles = 0; + res->afiles = 0; + res->namelen = 0; + return 0; } @@ -642,7 +682,7 @@ static struct { { NFSERR_NOENT, ENOENT }, { NFSERR_IO, errno_NFSERR_IO }, { NFSERR_NXIO, ENXIO }, - { NFSERR_EAGAIN, EAGAIN }, +/* { NFSERR_EAGAIN, EAGAIN }, */ { NFSERR_ACCES, EACCES }, { NFSERR_EXIST, EEXIST }, { NFSERR_XDEV, EXDEV }, @@ -653,18 +693,31 @@ static struct { { NFSERR_FBIG, EFBIG }, { NFSERR_NOSPC, ENOSPC }, { NFSERR_ROFS, EROFS }, - { NFSERR_OPNOTSUPP, EOPNOTSUPP }, + { NFSERR_MLINK, EMLINK }, { NFSERR_NAMETOOLONG, ENAMETOOLONG }, { NFSERR_NOTEMPTY, ENOTEMPTY }, { NFSERR_DQUOT, EDQUOT }, { NFSERR_STALE, ESTALE }, + { NFSERR_REMOTE, EREMOTE }, #ifdef EWFLUSH { NFSERR_WFLUSH, EWFLUSH }, #endif + { NFSERR_BADHANDLE, EBADHANDLE }, + { NFSERR_NOT_SYNC, ENOTSYNC }, + { NFSERR_BAD_COOKIE, EBADCOOKIE }, + { NFSERR_NOTSUPP, ENOTSUPP }, + { NFSERR_TOOSMALL, ETOOSMALL }, + { NFSERR_SERVERFAULT, ESERVERFAULT }, + { NFSERR_BADTYPE, EBADTYPE }, + { NFSERR_JUKEBOX, EJUKEBOX }, { -1, EIO } }; -static int +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +int nfs_stat_to_errno(int stat) { int i; @@ -673,7 +726,7 @@ nfs_stat_to_errno(int stat) if (nfs_errtbl[i].stat == stat) return nfs_errtbl[i].errno; } - printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); return nfs_errtbl[i].errno; } @@ -685,7 +738,8 @@ nfs_stat_to_errno(int stat) { "nfs_" #proc, \ (kxdrproc_t) nfs_xdr_##argtype, \ (kxdrproc_t) nfs_xdr_##restype, \ - MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2 \ + MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + 0 \ } static struct rpc_procinfo nfs_procedures[18] = { @@ -709,22 +763,8 @@ static struct rpc_procinfo nfs_procedures[18] = { PROC(statfs, fhandle, statfsres), }; -static struct rpc_version nfs_version2 = { +struct rpc_version nfs_version2 = { 2, sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), nfs_procedures }; - -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2 -}; - -struct rpc_program nfs_program = { - "nfs", - NFS_PROGRAM, - sizeof(nfs_version) / sizeof(nfs_version[0]), - nfs_version, - &nfs_rpcstat, -}; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c new file mode 100644 index 000000000..67de662a6 --- /dev/null +++ b/fs/nfs/nfs3proc.c @@ -0,0 +1,477 @@ +/* + * linux/fs/nfs/nfs3proc.c + * + * Client-side NFSv3 procedures stubs. + * + * Copyright (C) 1997, Olaf Kirch + */ + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> + +#include <asm/segment.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +/* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + + dprintk("NFS call getroot\n"); + fattr->valid = 0; + status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); + dprintk("NFS reply getroot\n"); + return status; +} + +/* + * One function for each procedure in the NFS protocol. + */ +static int +nfs3_proc_getattr(struct dentry *dentry, struct nfs_fattr *fattr) +{ + int status; + + dprintk("NFS call getattr\n"); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_GETATTR, + NFS_FH(dentry), fattr, 0); + dprintk("NFS reply getattr\n"); + return status; +} + +static int +nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) +{ + struct nfs3_sattrargs arg = { NFS_FH(dentry), sattr, 0, 0 }; + int status; + + dprintk("NFS call setattr\n"); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_SETATTR, &arg, fattr, 0); + dprintk("NFS reply setattr\n"); + return status; +} + +static int +nfs3_proc_lookup(struct dentry *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { NFS_FH(dir), name->name, name->len }; + struct nfs3_diropres res = { &dir_attr, fhandle, fattr }; + int status; + + dprintk("NFS call lookup %s\n", name->name); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_LOOKUP, &arg, &res, 0); + if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_GETATTR, + fhandle, fattr, 0); + dprintk("NFS reply lookup: %d\n", status); + nfs_refresh_inode(dir->d_inode, &dir_attr); + return status; +} + +static int +nfs3_proc_access(struct dentry *dentry, int mode, int ruid) +{ + struct nfs_fattr fattr; + struct nfs3_accessargs arg = { NFS_FH(dentry), 0 }; + struct nfs3_accessres res = { &fattr, 0 }; + int status, flags; + + dprintk("NFS call access\n"); + fattr.valid = 0; + + if (mode & MAY_READ) + arg.access |= NFS3_ACCESS_READ; + if (S_ISDIR(dentry->d_inode->i_mode)) { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_LOOKUP; + } else { + if (mode & MAY_WRITE) + arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; + if (mode & MAY_EXEC) + arg.access |= NFS3_ACCESS_EXECUTE; + } + flags = (ruid) ? RPC_CALL_REALUID : 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_ACCESS, &arg, &res, flags); + nfs_refresh_inode(dentry->d_inode, &fattr); + dprintk("NFS reply access\n"); + + if (status == 0 && (arg.access & res.access) != arg.access) + status = -EACCES; + return status; +} + +static int +nfs3_proc_readlink(struct dentry *dentry, void *buffer, unsigned int buflen) +{ + struct nfs_fattr fattr; + struct nfs3_readlinkargs args = { NFS_FH(dentry), buffer, buflen }; + struct nfs3_readlinkres res = { &fattr, buffer, buflen }; + int status; + + dprintk("NFS call readlink\n"); + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_READLINK, + &args, &res, 0); + nfs_refresh_inode(dentry->d_inode, &fattr); + dprintk("NFS reply readlink: %d\n", status); + return status; +} + +static int +nfs3_proc_read(struct dentry *dentry, struct nfs_fattr *fattr, int flags, + loff_t offset, unsigned int count, void *buffer, int *eofp) +{ + struct nfs_readargs arg = { NFS_FH(dentry), offset, count, 1, + {{buffer, count}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}} }; + struct nfs_readres res = { fattr, count, 0 }; + struct rpc_message msg = { NFS3PROC_READ, &arg, &res, NULL }; + int status; + + dprintk("NFS call read %d @ %Ld\n", count, (long long)offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags); + dprintk("NFS reply read: %d\n", status); + *eofp = res.eof; + return status; +} + +static int +nfs3_proc_write(struct dentry *dentry, struct nfs_fattr *fattr, int flags, + loff_t offset, unsigned int count, + void *buffer, struct nfs_writeverf *verf) +{ + struct nfs_writeargs arg = { NFS_FH(dentry), offset, count, + NFS_FILE_SYNC, 1, + {{buffer, count}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}} }; + struct nfs_writeres res = { fattr, verf, 0 }; + struct rpc_message msg = { NFS3PROC_WRITE, &arg, &res, NULL }; + int status, rpcflags = 0; + + dprintk("NFS call write %d @ %Ld\n", count, (long long)offset); + fattr->valid = 0; + if (flags & NFS_RW_SWAP) + rpcflags |= NFS_RPC_SWAPFLAGS; + arg.stable = (flags & NFS_RW_SYNC) ? NFS_FILE_SYNC : NFS_UNSTABLE; + + status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, rpcflags); + + dprintk("NFS reply read: %d\n", status); + return status < 0? status : res.count; +} + +/* + * Create a regular file. + * For now, we don't implement O_EXCL. + */ +static int +nfs3_proc_create(struct dentry *dir, struct qstr *name, struct iattr *sattr, + int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_createargs arg = { NFS_FH(dir), name->name, name->len, + sattr, 0, { 0, 0 } }; + struct nfs3_diropres res = { &dir_attr, fhandle, fattr }; + int status; + + dprintk("NFS call create %s\n", name->name); + arg.createmode = NFS3_CREATE_UNCHECKED; + if (flags & O_EXCL) { + arg.createmode = NFS3_CREATE_EXCLUSIVE; + arg.verifier[0] = jiffies; + arg.verifier[1] = current->pid; + } + +again: + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_CREATE, &arg, &res, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + + /* If the server doesn't support the exclusive creation semantics, + * try again with simple 'guarded' mode. */ + if (status == NFSERR_NOTSUPP) { + switch (arg.createmode) { + case NFS3_CREATE_EXCLUSIVE: + arg.createmode = NFS3_CREATE_GUARDED; + break; + + case NFS3_CREATE_GUARDED: + arg.createmode = NFS3_CREATE_UNCHECKED; + break; + + case NFS3_CREATE_UNCHECKED: + goto exit; + } + goto again; + } + +exit: + dprintk("NFS reply create: %d\n", status); + + /* When we created the file with exclusive semantics, make + * sure we set the attributes afterwards. */ + if (status == 0 && arg.createmode == NFS3_CREATE_EXCLUSIVE) { + struct nfs3_sattrargs arg = { fhandle, sattr, 0, 0 }; + dprintk("NFS call setattr (post-create)\n"); + + /* Note: we could use a guarded setattr here, but I'm + * not sure this buys us anything (and I'd have + * to revamp the NFSv3 XDR code) */ + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_SETATTR, + &arg, fattr, 0); + dprintk("NFS reply setattr (post-create): %d\n", status); + } + + return status; +} + +static int +nfs3_proc_remove(struct dentry *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { NFS_FH(dir), name->name, name->len }; + struct rpc_message msg = { NFS3PROC_REMOVE, &arg, &dir_attr, NULL }; + int status; + + dprintk("NFS call remove %s\n", name->name); + dir_attr.valid = 0; + status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply remove: %d\n", status); + return status; +} + +static int +nfs3_proc_rename(struct dentry *old_dir, struct qstr *old_name, + struct dentry *new_dir, struct qstr *new_name) +{ + struct nfs_fattr old_dir_attr, new_dir_attr; + struct nfs3_renameargs arg = { NFS_FH(old_dir), + old_name->name, old_name->len, + NFS_FH(new_dir), + new_name->name, new_name->len }; + struct nfs3_renameres res = { &old_dir_attr, &new_dir_attr }; + int status; + + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + old_dir_attr.valid = 0; + new_dir_attr.valid = 0; + status = rpc_call(NFS_CLIENT(old_dir->d_inode), NFS3PROC_RENAME, &arg, &res, 0); + nfs_refresh_inode(old_dir->d_inode, &old_dir_attr); + nfs_refresh_inode(new_dir->d_inode, &new_dir_attr); + dprintk("NFS reply rename: %d\n", status); + return status; +} + +static int +nfs3_proc_link(struct dentry *dentry, struct dentry *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr, fattr; + struct nfs3_linkargs arg = { NFS_FH(dentry), NFS_FH(dir), + name->name, name->len }; + struct nfs3_linkres res = { &dir_attr, &fattr }; + int status; + + dprintk("NFS call link %s\n", name->name); + dir_attr.valid = 0; + fattr.valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFS3PROC_LINK, &arg, &res, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + nfs_refresh_inode(dentry->d_inode, &fattr); + dprintk("NFS reply link: %d\n", status); + return status; +} + +static int +nfs3_proc_symlink(struct dentry *dir, struct qstr *name, struct qstr *path, + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_symlinkargs arg = { NFS_FH(dir), name->name, name->len, + path->name, path->len, sattr }; + struct nfs3_diropres res = { &dir_attr, fhandle, fattr }; + int status; + + dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_SYMLINK, &arg, &res, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply symlink: %d\n", status); + return status; +} + +static int +nfs3_proc_mkdir(struct dentry *dir, struct qstr *name, struct iattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_createargs arg = { NFS_FH(dir), name->name, name->len, + sattr, 0, { 0, 0 } }; + struct nfs3_diropres res = { &dir_attr, fhandle, fattr }; + int status; + + dprintk("NFS call mkdir %s\n", name->name); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_MKDIR, &arg, &res, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply mkdir: %d\n", status); + return status; +} + +static int +nfs3_proc_rmdir(struct dentry *dir, struct qstr *name) +{ + struct nfs_fattr dir_attr; + struct nfs3_diropargs arg = { NFS_FH(dir), name->name, name->len }; + int status; + + dprintk("NFS call rmdir %s\n", name->name); + dir_attr.valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_RMDIR, &arg, &dir_attr, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply rmdir: %d\n", status); + return status; +} + +/* + * The READDIR implementation is somewhat hackish - we pass the user buffer + * to the encode function, which installs it in the receive iovec. + * The decode function itself doesn't perform any decoding, it just makes + * sure the reply is syntactically correct. + * + * Also note that this implementation handles both plain readdir and + * readdirplus. + */ +static int +nfs3_proc_readdir(struct dentry *dir, u64 cookie, void *entry, + unsigned int size, int plus) +{ + struct nfs_fattr dir_attr; + struct nfs3_readdirargs arg = { NFS_FH(dir), cookie, {0, 0}, 0, 0, 0 }; + struct nfs3_readdirres res = { &dir_attr, 0, 0, 0, 0 }; + struct rpc_message msg = { NFS3PROC_READDIR, &arg, &res, NULL }; + u32 *verf = NFS_COOKIEVERF(dir->d_inode); + int status; + + arg.buffer = entry; + arg.bufsiz = size; + arg.verf[0] = verf[0]; + arg.verf[1] = verf[1]; + arg.plus = plus; + res.buffer = entry; + res.bufsiz = size; + res.verf = verf; + res.plus = plus; + + if (plus) + msg.rpc_proc = NFS3PROC_READDIRPLUS; + + dprintk("NFS call readdir%s %d\n", + plus? "plus" : "", (unsigned int) cookie); + + dir_attr.valid = 0; + status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply readdir: %d\n", status); + return status; +} + +static int +nfs3_proc_mknod(struct dentry *dir, struct qstr *name, struct iattr *sattr, + dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_fattr dir_attr; + struct nfs3_mknodargs arg = { NFS_FH(dir), name->name, name->len, 0, + sattr, rdev }; + struct nfs3_diropres res = { &dir_attr, fh, fattr }; + int status; + + switch (sattr->ia_mode & S_IFMT) { + case S_IFBLK: arg.type = NF3BLK; break; + case S_IFCHR: arg.type = NF3CHR; break; + case S_IFIFO: arg.type = NF3FIFO; break; + case S_IFSOCK: arg.type = NF3SOCK; break; + default: return -EINVAL; + } + + dprintk("NFS call mknod %s %x\n", name->name, rdev); + dir_attr.valid = 0; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFS3PROC_MKNOD, &arg, &res, 0); + nfs_refresh_inode(dir->d_inode, &dir_attr); + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +/* + * This is a combo call of fsstat and fsinfo + */ +static int +nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + dprintk("NFS call fsstat\n"); + memset((char *)info, 0, sizeof(*info)); + status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, info, 0); + if (status < 0) + goto error; + status = rpc_call(server->client, NFS3PROC_FSINFO, fhandle, info, 0); + +error: + dprintk("NFS reply statfs: %d\n", status); + return status; +} + +extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); + +struct nfs_rpc_ops nfs_v3_clientops = { + 3, /* protocol version */ + nfs3_proc_get_root, + nfs3_proc_getattr, + nfs3_proc_setattr, + nfs3_proc_lookup, + nfs3_proc_access, + nfs3_proc_readlink, + nfs3_proc_read, + nfs3_proc_write, + NULL, /* commit */ + nfs3_proc_create, + nfs3_proc_remove, + nfs3_proc_rename, + nfs3_proc_link, + nfs3_proc_symlink, + nfs3_proc_mkdir, + nfs3_proc_rmdir, + nfs3_proc_readdir, + nfs3_proc_mknod, + nfs3_proc_statfs, + nfs3_decode_dirent, +}; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index beed6ec1e..ef8580c02 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1,94 +1,158 @@ /* - * linux/fs/nfs/nfs2xdr.c + * linux/fs/nfs/nfs3xdr.c * * XDR functions to encode/decode NFSv3 RPC arguments and results. - * Note: this is incomplete! * - * Copyright (C) 1996 Olaf Kirch + * Copyright (C) 1996, 1997 Olaf Kirch */ -#define NFS_NEED_XDR_TYPES - #include <linux/param.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/malloc.h> -#include <linux/nfs_fs.h> #include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> #include <linux/pagemap.h> #include <linux/proc_fs.h> +#include <linux/kdev_t.h> #include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> -#ifdef RPC_DEBUG -# define RPC_FACILITY RPCDBG_NFS -#endif +/* Uncomment this to support servers requiring longword lengths */ +#define NFS_PAD_WRITES 1 -#define QUADLEN(len) (((len) + 3) >> 2) -static int nfs_stat_to_errno(int stat); +#define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO +extern int nfs_stat_to_errno(int); + /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ -#define NFS_fhandle_sz (1+16) -#define NFS_sattr_sz 8 -#define NFS_filename_sz 1+(NFS_MAXNAMLEN>>2) -#define NFS_path_sz 1+(NFS_MAXPATHLEN>>2) -#define NFS_fattr_sz 17 -#define NFS_info_sz 5 -#define NFS_entry_sz NFS_filename_sz+3 - -#define NFS_enc_void_sz 0 -#define NFS_diropargs_sz NFS_fhandle_sz+NFS_filename_sz -#define NFS_sattrargs_sz NFS_fhandle_sz+NFS_sattr_sz -#define NFS_readargs_sz NFS_fhandle_sz+3 -#define NFS_writeargs_sz NFS_fhandle_sz+4 -#define NFS_createargs_sz NFS_diropargs_sz+NFS_sattr_sz -#define NFS_renameargs_sz NFS_diropargs_sz+NFS_diropargs_sz -#define NFS_linkargs_sz NFS_fhandle_sz+NFS_diropargs_sz -#define NFS_symlinkargs_sz NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz -#define NFS_readdirargs_sz NFS_fhandle_sz+2 - -#define NFS_dec_void_sz 0 -#define NFS_attrstat_sz 1+NFS_fattr_sz -#define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz -#define NFS_readlinkres_sz 1+NFS_path_sz -#define NFS_readres_sz 1+NFS_fattr_sz+1 -#define NFS_stat_sz 1 -#define NFS_readdirres_sz 1 -#define NFS_statfsres_sz 1+NFS_info_sz +#define NFS3_fhandle_sz 1+16 +#define NFS3_fh_sz NFS3_fhandle_sz /* shorthand */ +#define NFS3_sattr_sz 15 +#define NFS3_filename_sz 1+(NFS3_MAXNAMLEN>>2) +#define NFS3_path_sz 1+(NFS3_MAXPATHLEN>>2) +#define NFS3_fattr_sz 21 +#define NFS3_wcc_attr_sz 6 +#define NFS3_pre_op_attr_sz 1+NFS3_wcc_attr_sz +#define NFS3_post_op_attr_sz 1+NFS3_fattr_sz +#define NFS3_wcc_data_sz NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz +#define NFS3_fsstat_sz +#define NFS3_fsinfo_sz +#define NFS3_pathconf_sz +#define NFS3_entry_sz NFS3_filename_sz+3 + +#define NFS3_enc_void_sz 0 +#define NFS3_sattrargs_sz NFS3_fh_sz+NFS3_sattr_sz+3 +#define NFS3_diropargs_sz NFS3_fh_sz+NFS3_filename_sz +#define NFS3_accessargs_sz NFS3_fh_sz+1 +#define NFS3_readlinkargs_sz NFS3_fh_sz +#define NFS3_readargs_sz NFS3_fh_sz+3 +#define NFS3_writeargs_sz NFS3_fh_sz+5 +#define NFS3_createargs_sz NFS3_diropargs_sz+NFS3_sattr_sz +#define NFS3_mkdirargs_sz NFS3_diropargs_sz+NFS3_sattr_sz +#define NFS3_symlinkargs_sz NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz +#define NFS3_mknodargs_sz NFS3_diropargs_sz+2+NFS3_sattr_sz +#define NFS3_renameargs_sz NFS3_diropargs_sz+NFS3_diropargs_sz +#define NFS3_linkargs_sz NFS3_fh_sz+NFS3_diropargs_sz +#define NFS3_readdirargs_sz NFS3_fh_sz+2 +#define NFS3_commitargs_sz NFS3_fh_sz+3 + +#define NFS3_dec_void_sz 0 +#define NFS3_attrstat_sz 1+NFS3_fattr_sz +#define NFS3_wccstat_sz 1+NFS3_wcc_data_sz +#define NFS3_lookupres_sz 1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz) +#define NFS3_accessres_sz 1+NFS3_post_op_attr_sz+1 +#define NFS3_readlinkres_sz 1+NFS3_post_op_attr_sz +#define NFS3_readres_sz 1+NFS3_post_op_attr_sz+3 +#define NFS3_writeres_sz 1+NFS3_wcc_data_sz+4 +#define NFS3_createres_sz 1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz +#define NFS3_renameres_sz 1+(2 * NFS3_wcc_data_sz) +#define NFS3_linkres_sz 1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz +#define NFS3_readdirres_sz 1+NFS3_post_op_attr_sz+2 +#define NFS3_fsstatres_sz 1+NFS3_post_op_attr_sz+13 +#define NFS3_fsinfores_sz 1+NFS3_post_op_attr_sz+12 +#define NFS3_pathconfres_sz 1+NFS3_post_op_attr_sz+6 +#define NFS3_commitres_sz 1+NFS3_wcc_data_sz+2 + +/* + * Map file type to S_IFMT bits + */ +static struct { + unsigned int mode; + unsigned int nfs2type; +} nfs_type2fmt[] = { + { 0, NFNON }, + { S_IFREG, NFREG }, + { S_IFDIR, NFDIR }, + { S_IFBLK, NFBLK }, + { S_IFCHR, NFCHR }, + { S_IFLNK, NFLNK }, + { S_IFSOCK, NFSOCK }, + { S_IFIFO, NFFIFO }, + { 0, NFBAD } +}; /* * Common NFS XDR functions as inlines */ static inline u32 * -xdr_encode_fhandle(u32 *p, struct nfs3_fh *fh) +xdr_encode_fhandle(u32 *p, struct nfs_fh *fh) { *p++ = htonl(fh->size); memcpy(p, fh->data, fh->size); - return p + QUADLEN(fh->size); + return p + XDR_QUADLEN(fh->size); } static inline u32 * -xdr_decode_fhandle(u32 *p, struct nfs3_fh *fh) +xdr_decode_fhandle(u32 *p, struct nfs_fh *fh) { + /* + * Zero all nonused bytes + */ + memset((u8 *)fh, 0, sizeof(*fh)); if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { memcpy(fh->data, p, fh->size); - return p + QUADLEN(fh->size); + return p + XDR_QUADLEN(fh->size); } return NULL; } -static inline enum nfs_ftype -xdr_decode_ftype(u32 type) +/* + * Encode/decode time. + * Since the VFS doesn't care for fractional times, we ignore the + * nanosecond field. + */ +static inline u32 * +xdr_encode_time(u32 *p, time_t time) +{ + *p++ = htonl(time); + *p++ = 0; + return p; +} + +static inline u32 * +xdr_decode_time3(u32 *p, u64 *timep) { - return (type == NF3FIFO)? NFFIFO : (enum nfs_ftype) type; + *timep = ((u64)ntohl(*p++) << 32) + (u64)ntohl(*p++); + return p; +} + +static inline u32 * +xdr_encode_time3(u32 *p, u64 time) +{ + *p++ = htonl(time >> 32); + *p++ = htonl(time & 0xFFFFFFFF); + return p; } static inline u32 * @@ -99,47 +163,119 @@ xdr_decode_string2(u32 *p, char **string, unsigned int *len, if (*len > maxlen) return NULL; *string = (char *) p; - return p + QUADLEN(*len); + return p + XDR_QUADLEN(*len); } static inline u32 * -xdr_decode_fattr(u32 *p, struct nfs3_fattr *fattr) +xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) { - fattr->type = xdr_decode_ftype(ntohl(*p++)); - fattr->mode = ntohl(*p++); + unsigned int type; + int fmode; + + type = ntohl(*p++); + if (type >= NF3BAD) + type = NF3BAD; + fmode = nfs_type2fmt[type].mode; + fattr->type = nfs_type2fmt[type].nfs2type; + fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; fattr->nlink = ntohl(*p++); fattr->uid = ntohl(*p++); fattr->gid = ntohl(*p++); - fattr->size = ((u64) ntohl(*p++) << 32) | ntohl(*p++); - fattr->used = ((u64) ntohl(*p++) << 32) | ntohl(*p++); - fattr->rdev_maj = ntohl(*p++); - fattr->rdev_min = ntohl(*p++); - fattr->fsid = ntohl(*p++); - fattr->fileid = ntohl(*p++); - fattr->atime.seconds = ntohl(*p++); - fattr->atime.useconds = ntohl(*p++); - fattr->mtime.seconds = ntohl(*p++); - fattr->mtime.useconds = ntohl(*p++); - fattr->ctime.seconds = ntohl(*p++); - fattr->ctime.useconds = ntohl(*p++); + p = xdr_decode_hyper(p, &fattr->size); + p = xdr_decode_hyper(p, &fattr->du.nfs3.used); + /* Turn remote device info into Linux-specific dev_t */ + fattr->rdev = (ntohl(*p++) << MINORBITS) | (ntohl(*p++) & MINORMASK); + p = xdr_decode_hyper(p, &fattr->fsid); + p = xdr_decode_hyper(p, &fattr->fileid); + p = xdr_decode_time3(p, &fattr->atime); + p = xdr_decode_time3(p, &fattr->mtime); + p = xdr_decode_time3(p, &fattr->ctime); + + /* Update the mode bits */ + fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); + return p; +} + +static inline u32 * +xdr_encode_sattr(u32 *p, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_MODE) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_mode); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_UID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_uid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_GID) { + *p++ = xdr_one; + *p++ = htonl(attr->ia_gid); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_SIZE) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (__u64) attr->ia_size); + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time(p, attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + if (attr->ia_valid & ATTR_MTIME_SET) { + *p++ = xdr_two; + p = xdr_encode_time(p, attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + *p++ = xdr_one; + } else { + *p++ = xdr_zero; + } + return p; +} + +static inline u32 * +xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_hyper(p, &fattr->pre_size); + p = xdr_decode_time3(p, &fattr->pre_mtime); + p = xdr_decode_time3(p, &fattr->pre_ctime); + fattr->valid |= NFS_ATTR_WCC; return p; } static inline u32 * -xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr) -{ - *p++ = htonl(sattr->mode); - *p++ = htonl(sattr->uid); - *p++ = htonl(sattr->gid); - *p++ = htonl(sattr->size >> 32); - *p++ = htonl(sattr->size & 0xFFFFFFFF); - *p++ = htonl(sattr->atime.seconds); - *p++ = htonl(sattr->atime.useconds); - *p++ = htonl(sattr->mtime.seconds); - *p++ = htonl(sattr->mtime.useconds); +xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + p = xdr_decode_fattr(p, fattr); return p; } +static inline u32 * +xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr) +{ + if (*p++) + return xdr_decode_wcc_attr(p, fattr); + return p; +} + + +static inline u32 * +xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr) +{ + p = xdr_decode_pre_op_attr(p, fattr); + return xdr_decode_post_op_attr(p, fattr); +} + /* * NFS encode functions */ @@ -147,7 +283,7 @@ xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr) * Encode void argument */ static int -nfs_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy) +nfs3_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy) { req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -155,10 +291,9 @@ nfs_xdr_enc_void(struct rpc_rqst *req, u32 *p, void *dummy) /* * Encode file handle argument - * GETATTR, READLINK, STATFS */ static int -nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs3_fh *fh) +nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh) { p = xdr_encode_fhandle(p, fh); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); @@ -169,23 +304,37 @@ nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs3_fh *fh) * Encode SETATTR arguments */ static int -nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args) +nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_sattr(p, args->sattr); + *p++ = htonl(args->guard); + if (args->guard) + p = xdr_encode_time3(p, args->guardtime); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } /* * Encode directory ops argument - * LOOKUP, REMOVE, RMDIR */ static int -nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) +nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode access() argument + */ +static int +nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_string(p, args->name); + *p++ = htonl(args->access); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -193,149 +342,181 @@ nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args) /* * Arguments to a READ call. Since we read data directly into the page * cache, we also set up the reply iovec here so that iov[1] points - * exactly to the page wewant to fetch. + * exactly to the page we want to fetch. */ static int -nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) +nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int replen, buflen; + int buflen, replen; + unsigned int nr; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->count); + p = xdr_encode_hyper(p, args->offset); *p++ = htonl(args->count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); -#if 1 + /* Get the number of buffers in the receive iovec */ + nr = args->nriov; + + if (nr+2 > MAX_IOVEC) { + printk(KERN_ERR "NFS: Bad number of iov's in xdr_readargs\n"); + return -EINVAL; + } + /* set up reply iovec */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; buflen = req->rq_rvec[0].iov_len; req->rq_rvec[0].iov_len = replen; - req->rq_rvec[1].iov_base = args->buffer; - req->rq_rvec[1].iov_len = args->count; - req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; - req->rq_rvec[2].iov_len = buflen - replen; + + /* Copy the iovec */ + memcpy(req->rq_rvec + 1, args->iov, nr * sizeof(struct iovec)); + + req->rq_rvec[nr+1].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[nr+1].iov_len = buflen - replen; req->rq_rlen = args->count + buflen; - req->rq_rnr = 3; -#else - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; - req->rq_rvec[0].iov_len = replen; -#endif + req->rq_rnr += nr+1; return 0; } /* - * Decode READ reply + * Write arguments. Splice the buffer to be written into the iovec. */ static int -nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) +nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { - struct iovec *iov = req->rq_rvec; - int status, count, recvd, hdrlen; + unsigned int nr; + u32 count = args->count; - dprintk("RPC: readres OK status %lx\n", ntohl(*p)); - if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); - p = xdr_decode_fattr(p, res->fattr); + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(count); + *p++ = htonl(args->stable); + *p++ = htonl(count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - count = ntohl(*p++); - hdrlen = (u8 *) p - (u8 *) iov->iov_base; - recvd = req->rq_rlen - hdrlen; - if (p != iov[2].iov_base) { - /* Unexpected reply header size. Punt. - * XXX: Move iovec contents to align data on page - * boundary and adjust RPC header size guess */ - printk("NFS: Odd RPC header size in read reply: %d\n", hdrlen); - return -errno_NFSERR_IO; - } - if (count > recvd) { - printk("NFS: server cheating in read reply: " - "count %d > recvd %d\n", count, recvd); - count = recvd; + /* Get the number of buffers in the send iovec */ + nr = args->nriov; + + if (nr+2 > MAX_IOVEC) { + printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs\n"); + return -EINVAL; } - dprintk("RPC: readres OK count %d\n", count); - if (count < res->count) - memset((u8 *)(iov[1].iov_base+count), 0, res->count-count); + /* Copy the iovec */ + memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); - return count; -} +#ifdef NFS_PAD_WRITES + /* + * Some old servers require that the message length + * be a multiple of 4, so we pad it here if needed. + */ + if (count & 3) { + struct iovec *iov = req->rq_svec + nr + 1; + int pad = 4 - (count & 3); + + iov->iov_base = (void *) "\0\0\0"; + iov->iov_len = pad; + count += pad; + nr++; + } +#endif + req->rq_slen += count; + req->rq_snr += nr; + return 0; +} /* - * Write arguments. Splice the buffer to be written into the iovec. + * Encode CREATE arguments */ static int -nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args) { p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->offset); - *p++ = htonl(args->offset); - *p++ = htonl(args->count); - *p++ = htonl(args->count); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + p = xdr_encode_array(p, args->name, args->len); - req->rq_svec[1].iov_base = (void *) args->buffer; - req->rq_svec[1].iov_len = args->count; - req->rq_slen += args->count; - req->rq_snr = 2; + *p++ = htonl(args->createmode); + if (args->createmode == NFS3_CREATE_EXCLUSIVE) { + *p++ = args->verifier[0]; + *p++ = args->verifier[1]; + } else + p = xdr_encode_sattr(p, args->sattr); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } /* - * Encode create arguments - * CREATE, MKDIR + * Encode MKDIR arguments */ static int -nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args) +nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_string(p, args->name); + p = xdr_encode_array(p, args->name, args->len); p = xdr_encode_sattr(p, args->sattr); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } /* - * Encode RENAME arguments + * Encode SYMLINK arguments */ static int -nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args) +nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args) { p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_string(p, args->fromname); - p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_string(p, args->toname); + p = xdr_encode_array(p, args->fromname, args->fromlen); + p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_array(p, args->topath, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } /* - * Encode LINK arguments + * Encode MKNOD arguments + */ +static int +nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_array(p, args->name, args->len); + *p++ = htonl(args->type); + p = xdr_encode_sattr(p, args->sattr); + if (args->type == NF3CHR || args->type == NF3BLK) { + *p++ = htonl(args->rdev >> MINORBITS); + *p++ = htonl(args->rdev & MINORMASK); + } + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +/* + * Encode RENAME arguments */ static int -nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args) +nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args) { p = xdr_encode_fhandle(p, args->fromfh); + p = xdr_encode_array(p, args->fromname, args->fromlen); p = xdr_encode_fhandle(p, args->tofh); - p = xdr_encode_string(p, args->toname); + p = xdr_encode_array(p, args->toname, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } /* - * Encode SYMLINK arguments + * Encode LINK arguments */ static int -nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) +nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args) { p = xdr_encode_fhandle(p, args->fromfh); - p = xdr_encode_string(p, args->fromname); - p = xdr_encode_string(p, args->topath); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_fhandle(p, args->tofh); + p = xdr_encode_array(p, args->toname, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -344,131 +525,164 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) * Encode arguments to readdir call */ static int -nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args) +nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args) { struct rpc_auth *auth = req->rq_task->tk_auth; - int replen; + int buflen, replen; p = xdr_encode_fhandle(p, args->fh); - *p++ = htonl(args->cookie); + p = xdr_encode_hyper(p, args->cookie); + *p++ = args->verf[0]; + *p++ = args->verf[1]; + if (args->plus) { + /* readdirplus: need dircount + buffer size. + * We just make sure we make dircount big enough */ + *p++ = htonl(args->bufsiz >> 3); + } *p++ = htonl(args->bufsiz); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); /* set up reply iovec */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2; - /* - dprintk("RPC: readdirargs: slack is 4 * (%d + %d + %d) = %d\n", - RPC_REPHDRSIZE, auth->au_rslack, NFS_readdirres_sz, replen); - */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2; + buflen = req->rq_rvec[0].iov_len; req->rq_rvec[0].iov_len = replen; req->rq_rvec[1].iov_base = args->buffer; req->rq_rvec[1].iov_len = args->bufsiz; - req->rq_rlen = replen + args->bufsiz; - req->rq_rnr = 2; - - /* - dprintk("RPC: readdirargs set up reply vec:\n"); - dprintk(" rvec[0] = %p/%d\n", - req->rq_rvec[0].iov_base, - req->rq_rvec[0].iov_len); - dprintk(" rvec[1] = %p/%d\n", - req->rq_rvec[1].iov_base, - req->rq_rvec[1].iov_len); - */ + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = buflen + args->bufsiz; + req->rq_rnr += 2; return 0; } /* - * Decode the result of a readdir call. We decode the result in place - * to avoid a malloc of NFS_MAXNAMLEN+1 for each file name. - * After decoding, the layout in memory looks like this: - * entry1 entry2 ... entryN <space> stringN ... string2 string1 - * Each entry consists of three __u32 values, the same space as NFS uses. - * Note that the strings are not null-terminated so that the entire number - * of entries returned by the server should fit into the buffer. + * Decode the result of a readdir call. + * We just check for syntactical correctness. */ static int -nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) +nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res) { - struct iovec *iov = req->rq_rvec; - int status, nr, len; - char *string, *start; - u32 *end; - __u32 fileid, cookie, *entry; - - if ((status = ntohl(*p++))) + struct iovec *iov = req->rq_rvec; + int hdrlen; + int status, nr; + unsigned int len; + u32 *entry, *end; + + status = ntohl(*p++); + /* Decode post_op_attrs */ + p = xdr_decode_post_op_attr(p, res->dir_attr); + if (status) return -nfs_stat_to_errno(status); - if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) { - /* Unexpected reply header size. Punt. */ - printk("NFS: Odd RPC header size in readdirres reply\n"); - return -errno_NFSERR_IO; + /* Decode verifier cookie */ + if (res->verf) { + res->verf[0] = *p++; + res->verf[1] = *p++; + } else { + p += 2; } - p = (u32 *) iov[1].iov_base; - end = (u32 *) ((u8 *) p + iov[1].iov_len); - - if (p != res->buffer) { - printk("NFS: p != res->buffer in %s:%d!!!\n", - __FILE__, __LINE__); - return -errno_NFSERR_IO; + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len > hdrlen) { + dprintk("NFS: READDIR header is short. iovec will be shifted.\n"); + xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); } - entry = (__u32 *) res->buffer; - start = (char *) res->buffer; - string = start + res->bufsiz; + p = (u32 *) iov[1].iov_base; + end = (u32 *) ((u8 *) p + iov[1].iov_len); for (nr = 0; *p++; nr++) { - fileid = ntohl(*p++); - - len = ntohl(*p++); - if ((p + QUADLEN(len) + 3) > end) { - printk(KERN_NOTICE - "NFS: short packet in readdir reply!\n"); - break; - } - if (len > NFS_MAXNAMLEN) { - printk("NFS: giant filename in readdir (len %x)!\n", + entry = p - 1; + p += 2; /* inode # */ + len = ntohl(*p++); /* string length */ + p += XDR_QUADLEN(len) + 2; /* name + cookie */ + if (len > NFS3_MAXNAMLEN) { + printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", len); return -errno_NFSERR_IO; } - string -= len; - if ((void *) (entry+3) > (void *) string) { - /* - * This error is impossible as long as the temp - * buffer is no larger than the user buffer. The - * current packing algorithm uses the same amount - * of space in the user buffer as in the XDR data, - * so it's guaranteed to fit. - */ - printk("NFS: incorrect buffer size in %s!\n", - __FUNCTION__); - break; + + if (res->plus) { + /* post_op_attr */ + if (*p++) + p += 21; + /* post_op_fh3 */ + if (*p++) { + len = ntohl(*p++); + if (len > NFS3_FHSIZE) { + printk(KERN_WARNING "NFS: giant filehandle in " + "readdir (len %x)!\n", len); + return -errno_NFSERR_IO; + } + p += XDR_QUADLEN(len); + } } - memmove(string, p, len); - p += QUADLEN(len); - cookie = ntohl(*p++); - /* - * To make everything fit, we encode the length, offset, - * and eof flag into 32 bits. This works for filenames - * up to 32K and PAGE_SIZE up to 64K. - */ - status = !p[0] && p[1] ? (1 << 15) : 0; /* eof flag */ - *entry++ = fileid; - *entry++ = cookie; - *entry++ = ((string - start) << 16) | status | (len & 0x7FFF); - /* - dprintk("NFS: decoded dirent %.*s cookie %d eof %d\n", - len, string, cookie, status); - */ + if (p + 2 > end) { + printk(KERN_NOTICE + "NFS: short packet in readdir reply!\n"); + /* truncate listing */ + entry[0] = entry[1] = 0; + break; + } } -#ifdef NFS_PARANOIA -printk("nfs_xdr_readdirres: %d entries, ent sp=%d, str sp=%d\n", -nr, ((char *) entry - start), (start + res->bufsiz - string)); -#endif + return nr; } +u32 * +nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus) +{ + struct nfs_entry old = *entry; + + if (!*p++) { + if (!*p) + return ERR_PTR(-EAGAIN); + entry->eof = 1; + return ERR_PTR(-EBADCOOKIE); + } + + p = xdr_decode_hyper(p, &entry->ino); + entry->len = ntohl(*p++); + entry->name = (const char *) p; + p += XDR_QUADLEN(entry->len); + entry->prev_cookie = entry->cookie; + p = xdr_decode_hyper(p, &entry->cookie); + + if (plus) { + p = xdr_decode_post_op_attr(p, &entry->fattr); + /* In fact, a post_op_fh3: */ + if (*p++) { + p = xdr_decode_fhandle(p, &entry->fh); + /* Ugh -- server reply was truncated */ + if (p == NULL) { + dprintk("NFS: FH truncated\n"); + *entry = old; + return ERR_PTR(-EAGAIN); + } + } else { + /* If we don't get a file handle, the attrs + * aren't worth a lot. */ + entry->fattr.valid = 0; + } + } + + entry->eof = !p[0] && p[1]; + return p; +} + +/* + * Encode COMMIT arguments + */ +static int +nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) +{ + p = xdr_encode_fhandle(p, args->fh); + p = xdr_encode_hyper(p, args->offset); + *p++ = htonl(args->count); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + /* * NFS XDR decode functions */ @@ -476,59 +690,92 @@ nr, ((char *) entry - start), (start + res->bufsiz - string)); * Decode void reply */ static int -nfs_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy) +nfs3_xdr_dec_void(struct rpc_rqst *req, u32 *p, void *dummy) { return 0; } /* - * Decode simple status reply + * Decode attrstat reply. */ static int -nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy) +nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) { int status; - if ((status = ntohl(*p++)) != 0) - status = -nfs_stat_to_errno(status); - return status; + if ((status = ntohl(*p++))) + return -nfs_stat_to_errno(status); + xdr_decode_fattr(p, fattr); + return 0; } /* - * Decode attrstat reply - * GETATTR, SETATTR, WRITE + * Decode status+wcc_data reply + * SATTR, REMOVE, RMDIR */ static int -nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) { int status; - dprintk("RPC: attrstat status %lx\n", ntohl(*p)); if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); - xdr_decode_fattr(p, fattr); - dprintk("RPC: attrstat OK type %d mode %o dev %x ino %x\n", - fattr->type, fattr->mode, fattr->fsid, fattr->fileid); - return 0; + status = -nfs_stat_to_errno(status); + xdr_decode_wcc_data(p, fattr); + return status; } /* - * Decode diropres reply - * LOOKUP, CREATE, MKDIR + * Decode LOOKUP reply */ static int -nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) +nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res) { int status; - dprintk("RPC: diropres status %lx\n", ntohl(*p)); - if ((status = ntohl(*p++))) + if ((status = ntohl(*p++))) { + status = -nfs_stat_to_errno(status); + } else { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } + xdr_decode_post_op_attr(p, res->dir_attr); + return status; +} + +/* + * Decode ACCESS reply + */ +static int +nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res) +{ + int status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, res->fattr); + if (status) return -nfs_stat_to_errno(status); - p = xdr_decode_fhandle(p, res->fh); - xdr_decode_fattr(p, res->fattr); - dprintk("RPC: diropres OK type %x mode %o dev %x ino %x\n", - res->fattr->type, res->fattr->mode, - res->fattr->fsid, res->fattr->fileid); + res->access = ntohl(*p++); + return 0; +} + +static int +nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args) +{ + struct rpc_task *task = req->rq_task; + struct rpc_auth *auth = task->tk_auth; + int buflen, replen; + + p = xdr_encode_fhandle(p, args->fh); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2; + buflen = req->rq_rvec[0].iov_len; + req->rq_rvec[0].iov_len = replen; + req->rq_rvec[1].iov_base = args->buffer; + req->rq_rvec[1].iov_len = args->bufsiz; + req->rq_rvec[2].iov_base = (u8 *) req->rq_rvec[0].iov_base + replen; + req->rq_rvec[2].iov_len = buflen - replen; + req->rq_rlen = buflen + args->bufsiz; + req->rq_rnr += 2; return 0; } @@ -536,155 +783,299 @@ nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res) * Decode READLINK reply */ static int -nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) +nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkres *res) { + struct iovec *iov = req->rq_rvec; + int hdrlen; + u32 *strlen; + char *string; int status; + unsigned int len; - if ((status = ntohl(*p++))) + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, res->fattr); + + if (status != 0) return -nfs_stat_to_errno(status); - xdr_decode_string2(p, res->string, res->lenp, res->maxlen); - /* Caller takes over the buffer here to avoid extra copy */ - res->buffer = req->rq_task->tk_buffer; - req->rq_task->tk_buffer = NULL; + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len > hdrlen) { + dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); + xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + } + + strlen = (u32*)res->buffer; + /* Convert length of symlink */ + len = ntohl(*strlen); + if (len > res->bufsiz - 5) + len = res->bufsiz - 5; + *strlen = len; + /* NULL terminate the string we got */ + string = (char *)(strlen + 1); + string[len] = 0; return 0; } /* - * Decode STATFS reply + * Decode READ reply */ static int -nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) { - int status; + struct iovec *iov = req->rq_rvec; + int status, count, ocount, recvd, hdrlen; - if ((status = ntohl(*p++))) + status = ntohl(*p++); + p = xdr_decode_post_op_attr(p, res->fattr); + + if (status != 0) return -nfs_stat_to_errno(status); - res->tsize = ntohl(*p++); - res->bsize = ntohl(*p++); - res->blocks = ntohl(*p++); - res->bfree = ntohl(*p++); - res->bavail = ntohl(*p++); - return 0; + + /* Decode reply could and EOF flag. NFSv3 is somewhat redundant + * in that it puts the count both in the res struct and in the + * opaque data count. */ + count = ntohl(*p++); + res->eof = ntohl(*p++); + ocount = ntohl(*p++); + + if (ocount != count) { + printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); + return -errno_NFSERR_IO; + } + + hdrlen = (u8 *) p - (u8 *) iov->iov_base; + if (iov->iov_len > hdrlen) { + dprintk("NFS: READ header is short. iovec will be shifted.\n"); + xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen); + } + + recvd = req->rq_rlen - hdrlen; + if (count > recvd) { + printk(KERN_WARNING "NFS: server cheating in read reply: " + "count %d > recvd %d\n", count, recvd); + count = recvd; + } + + if (count < res->count) { + xdr_zero_iovec(iov+1, req->rq_rnr-2, res->count - count); + res->count = count; + } + + return count; } /* - * We need to translate between nfs status return values and - * the local errno values which may not be the same. + * Decode WRITE response */ -static struct { - int stat; - int errno; -} nfs_errtbl[] = { - { NFS_OK, 0 }, - { NFSERR_PERM, EPERM }, - { NFSERR_NOENT, ENOENT }, - { NFSERR_IO, errno_NFSERR_IO }, - { NFSERR_NXIO, ENXIO }, - { NFSERR_EAGAIN, EAGAIN }, - { NFSERR_ACCES, EACCES }, - { NFSERR_EXIST, EEXIST }, - { NFSERR_XDEV, EXDEV }, - { NFSERR_NODEV, ENODEV }, - { NFSERR_NOTDIR, ENOTDIR }, - { NFSERR_ISDIR, EISDIR }, - { NFSERR_INVAL, EINVAL }, - { NFSERR_FBIG, EFBIG }, - { NFSERR_NOSPC, ENOSPC }, - { NFSERR_ROFS, EROFS }, - { NFSERR_NAMETOOLONG, ENAMETOOLONG }, - { NFSERR_NOTEMPTY, ENOTEMPTY }, - { NFSERR_DQUOT, EDQUOT }, - { NFSERR_STALE, ESTALE }, -#ifdef EWFLUSH - { NFSERR_WFLUSH, EWFLUSH }, -#endif - { -1, EIO } -}; +static int +nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) +{ + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + + res->count = ntohl(*p++); + res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + + return res->count; +} + +/* + * Decode a CREATE response + */ static int -nfs_stat_to_errno(int stat) +nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res) { - int i; + int status; - for (i = 0; nfs_errtbl[i].stat != -1; i++) { - if (nfs_errtbl[i].stat == stat) - return nfs_errtbl[i].errno; + status = ntohl(*p++); + if (status == 0) { + if (*p++) { + if (!(p = xdr_decode_fhandle(p, res->fh))) + return -errno_NFSERR_IO; + p = xdr_decode_post_op_attr(p, res->fattr); + } else { + memset(res->fh, 0, sizeof(*res->fh)); + /* Do decode post_op_attr but set it to NULL */ + p = xdr_decode_post_op_attr(p, res->fattr); + res->fattr->valid = 0; + } + } else { + status = -nfs_stat_to_errno(status); } - printk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); - return nfs_errtbl[i].errno; + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; } -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif +/* + * Decode RENAME reply + */ +static int +nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res) +{ + int status; -#define PROC(proc, argtype, restype) \ - { "nfs_" #proc, \ - (kxdrproc_t) nfs_xdr_##argtype, \ - (kxdrproc_t) nfs_xdr_##restype, \ - MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2 \ - } + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + p = xdr_decode_wcc_data(p, res->fromattr); + p = xdr_decode_wcc_data(p, res->toattr); + return status; +} -static struct rpc_procinfo nfs_procedures[18] = { - PROC(null, enc_void, dec_void), - PROC(getattr, fhandle, attrstat), - PROC(setattr, sattrargs, attrstat), - PROC(root, enc_void, dec_void), - PROC(lookup, diropargs, diropres), - PROC(readlink, fhandle, readlinkres), - PROC(read, readargs, readres), - PROC(writecache, enc_void, dec_void), - PROC(write, writeargs, attrstat), - PROC(create, createargs, diropres), - PROC(remove, diropargs, stat), - PROC(rename, renameargs, stat), - PROC(link, linkargs, stat), - PROC(symlink, symlinkargs, stat), - PROC(mkdir, createargs, diropres), - PROC(rmdir, diropargs, stat), - PROC(readdir, readdirargs, readdirres), - PROC(statfs, fhandle, statfsres), -}; +/* + * Decode LINK reply + */ +static int +nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res) +{ + int status; -static struct rpc_version nfs_version2 = { - 2, - sizeof(nfs_procedures)/sizeof(nfs_procedures[0]), - nfs_procedures -}; + if ((status = ntohl(*p++)) != 0) + status = -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + p = xdr_decode_wcc_data(p, res->dir_attr); + return status; +} -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2 -}; +/* + * Decode FSSTAT reply + */ +static int +nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + struct nfs_fattr dummy; + int status; -struct rpc_program nfs_program = { - "nfs", - NFS_PROGRAM, - sizeof(nfs_version) / sizeof(nfs_version[0]), - nfs_version, - &nfs_rpcstat, -}; + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, &dummy); + if (status != 0) + return -nfs_stat_to_errno(status); + + p = xdr_decode_hyper(p, &res->tbytes); + p = xdr_decode_hyper(p, &res->fbytes); + p = xdr_decode_hyper(p, &res->abytes); + p = xdr_decode_hyper(p, &res->tfiles); + p = xdr_decode_hyper(p, &res->ffiles); + p = xdr_decode_hyper(p, &res->afiles); + + /* ignore invarsec */ + return 0; +} + +/* + * Decode FSINFO reply + */ +static int +nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + struct nfs_fattr dummy; + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, &dummy); + if (status != 0) + return -nfs_stat_to_errno(status); + + res->rtmax = ntohl(*p++); + res->rtpref = ntohl(*p++); + res->rtmult = ntohl(*p++); + res->wtmax = ntohl(*p++); + res->wtpref = ntohl(*p++); + res->wtmult = ntohl(*p++); + res->dtpref = ntohl(*p++); + p = xdr_decode_hyper(p, &res->maxfilesize); + + /* ignore time_delta and properties */ + return 0; +} + +/* + * Decode PATHCONF reply + */ +static int +nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res) +{ + struct nfs_fattr dummy; + int status; + + status = ntohl(*p++); + + p = xdr_decode_post_op_attr(p, &dummy); + if (status != 0) + return -nfs_stat_to_errno(status); + res->linkmax = ntohl(*p++); + res->namelen = ntohl(*p++); + + /* ignore remaining fields */ + return 0; +} /* - * RPC stats support + * Decode COMMIT reply */ static int -nfs_get_info(char *buffer, char **start, off_t offset, int length, int dummy) +nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) { - return rpcstat_get_info(&nfs_rpcstat, buffer, start, offset, length); + int status; + + status = ntohl(*p++); + p = xdr_decode_wcc_data(p, res->fattr); + if (status != 0) + return -nfs_stat_to_errno(status); + + res->verf->verifier[0] = *p++; + res->verf->verifier[1] = *p++; + return 0; } -static struct proc_dir_entry proc_nfsclnt = { - 0, 3, "nfs", - S_IFREG | S_IRUGO, 1, 0, 0, - 6, NULL, - nfs_get_info +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype) \ + { "nfs3_" #proc, \ + (kxdrproc_t) nfs3_xdr_##argtype, \ + (kxdrproc_t) nfs3_xdr_##restype, \ + MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + 0 \ + } + +static struct rpc_procinfo nfs3_procedures[22] = { + PROC(null, enc_void, dec_void), + PROC(getattr, fhandle, attrstat), + PROC(setattr, sattrargs, wccstat), + PROC(lookup, diropargs, lookupres), + PROC(access, accessargs, accessres), + PROC(readlink, readlinkargs, readlinkres), + PROC(read, readargs, readres), + PROC(write, writeargs, writeres), + PROC(create, createargs, createres), + PROC(mkdir, mkdirargs, createres), + PROC(symlink, symlinkargs, createres), + PROC(mknod, mknodargs, createres), + PROC(remove, diropargs, wccstat), + PROC(rmdir, diropargs, wccstat), + PROC(rename, renameargs, renameres), + PROC(link, linkargs, linkres), + PROC(readdir, readdirargs, readdirres), + PROC(readdirplus, readdirargs, readdirres), + PROC(fsstat, fhandle, fsstatres), + PROC(fsinfo, fhandle, fsinfores), + PROC(pathconf, fhandle, pathconfres), + PROC(commit, commitargs, commitres), }; -struct rpc_stat nfs_rpcstat = { - NULL, /* next */ - &proc_nfsclnt, /* /proc/net directory entry */ - &nfs_program, /* RPC program */ +struct rpc_version nfs_version3 = { + 3, + sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]), + nfs3_procedures }; + diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index a592608be..b632cf175 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -61,8 +61,12 @@ * Martin Mares : Use root_server_addr appropriately during setup. * Martin Mares : Rewrote parameter parsing, now hopefully giving * correct overriding. + * Trond Myklebust : Add in preliminary support for NFSv3 and TCP. + * Fix bug in root_nfs_addr(). nfs_data.namlen + * is NOT for the length of the hostname. */ +#include <linux/config.h> #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> @@ -147,6 +151,12 @@ static struct nfs_bool_opts { { "noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC }, { "lock", ~NFS_MOUNT_NONLM, 0 }, { "nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM }, +#ifdef CONFIG_NFS_V3 + { "v2", ~NFS_MOUNT_VER3, 0 }, + { "v3", ~NFS_MOUNT_VER3, NFS_MOUNT_VER3 }, +#endif + { "udp", ~NFS_MOUNT_TCP, 0 }, + { "tcp", ~NFS_MOUNT_TCP, NFS_MOUNT_TCP }, { NULL, 0, 0 } }; @@ -271,7 +281,6 @@ static int __init root_nfs_addr(void) } strncpy(nfs_data.hostname, in_ntoa(servaddr), sizeof(nfs_data.hostname)-1); - nfs_data.namlen = strlen(nfs_data.hostname); return 0; } @@ -360,14 +369,14 @@ set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port) /* * Query server portmapper for the port of a daemon program. */ -static int __init root_nfs_getport(int program, int version) +static int __init root_nfs_getport(int program, int version, int proto) { struct sockaddr_in sin; printk(KERN_NOTICE "Looking up port of RPC %d/%d on %s\n", program, version, in_ntoa(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpc_getport_external(&sin, program, version, IPPROTO_UDP); + return rpc_getport_external(&sin, program, version, proto); } @@ -379,22 +388,39 @@ static int __init root_nfs_getport(int program, int version) static int __init root_nfs_ports(void) { int port; + int nfsd_ver, mountd_ver; + int nfsd_port, mountd_port; + int proto; + + if (nfs_data.flags & NFS_MOUNT_VER3) { + nfsd_ver = NFS3_VERSION; + mountd_ver = NFS_MNT3_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } else { + nfsd_ver = NFS2_VERSION; + mountd_ver = NFS_MNT_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } + + proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; if (nfs_port < 0) { - if ((port = root_nfs_getport(NFS_PROGRAM, NFS_VERSION)) < 0) { + if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { printk(KERN_ERR "Root-NFS: Unable to get nfsd port " "number from server, using default\n"); - port = NFS_PORT; + port = nfsd_port; } nfs_port = htons(port); dprintk("Root-NFS: Portmapper on server returned %d " "as nfsd port\n", port); } - if ((port = root_nfs_getport(NFS_MNT_PROGRAM, NFS_MNT_VERSION)) < 0) { + if ((port = root_nfs_getport(NFS_MNT_PROGRAM, nfsd_ver, proto)) < 0) { printk(KERN_ERR "Root-NFS: Unable to get mountd port " "number from server, using default\n"); - port = NFS_MNT_PORT; + port = mountd_port; } mount_port = htons(port); dprintk("Root-NFS: mountd port is %d\n", port); @@ -413,7 +439,10 @@ static int __init root_nfs_get_handle(void) int status; set_sockaddr(&sin, servaddr, mount_port); - status = nfs_mount(&sin, nfs_path, &nfs_data.root); + if (nfs_data.flags & NFS_MOUNT_VER3) + status = nfs3_mount(&sin, nfs_path, &nfs_data.root); + else + status = nfs_mount(&sin, nfs_path, &nfs_data.root); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " "while mounting %s\n", status, nfs_path); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 3823c3118..d6d532217 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -13,10 +13,6 @@ * Note: Error returns are optimized for NFS_OK, which isn't translated via * nfs_stat_to_errno(), but happens to be already the right return code. * - * FixMe: We ought to define a sensible small max size for - * things like getattr that are tiny packets and use the - * old get_free_page stuff with it. - * * Also, the code currently doesn't check the size of the packet, when * it decodes the packet. * @@ -25,209 +21,357 @@ * Completely rewritten to support the new RPC call interface; * rewrote and moved the entire XDR stuff to xdr.c * --Olaf Kirch June 1996 + * + * The code below initializes all auto variables explicitly, otherwise + * it will fail to work as a module (gcc generates a memset call for an + * incomplete struct). */ -#define NFS_NEED_XDR_TYPES - +#include <linux/types.h> #include <linux/param.h> +#include <linux/malloc.h> #include <linux/sched.h> #include <linux/mm.h> -#include <linux/malloc.h> #include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> #include <linux/pagemap.h> #include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs2.h> #include <linux/nfs_fs.h> #include <asm/segment.h> -#ifdef NFS_DEBUG -# define NFSDBG_FACILITY NFSDBG_PROC -#endif +#define NFSDBG_FACILITY NFSDBG_PROC + +/* + * Bare-bones access to getattr: this is for nfs_read_super. + */ +static int +nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + int status; + dprintk("NFS call getroot\n"); + fattr->valid = 0; + status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); + dprintk("NFS reply getroot\n"); + return status; +} /* * One function for each procedure in the NFS protocol. */ -int -nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int +nfs_proc_getattr(struct dentry *dentry, struct nfs_fattr *fattr) { int status; dprintk("NFS call getattr\n"); - status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_GETATTR, + NFS_FH(dentry), fattr, 0); dprintk("NFS reply getattr\n"); return status; } -int -nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct iattr *sattr) +static int +nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, + struct iattr *sattr) { - struct nfs_sattrargs arg = { fhandle, sattr }; + struct nfs_sattrargs arg = { NFS_FH(dentry), sattr }; int status; dprintk("NFS call setattr\n"); - status = rpc_call(server->client, NFSPROC_SETATTR, &arg, fattr, 0); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_SETATTR, &arg, fattr, 0); dprintk("NFS reply setattr\n"); return status; } -int -nfs_proc_lookup(struct nfs_server *server, struct nfs_fh *dir, const char *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int +nfs_proc_lookup(struct dentry *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs_diropargs arg = { dir, name }; + struct nfs_diropargs arg = { NFS_FH(dir), name->name, name->len }; struct nfs_diropok res = { fhandle, fattr }; int status; - dprintk("NFS call lookup %s\n", name); - status = rpc_call(server->client, NFSPROC_LOOKUP, &arg, &res, 0); + dprintk("NFS call lookup %s\n", name->name); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_LOOKUP, &arg, &res, 0); dprintk("NFS reply lookup: %d\n", status); return status; } -int -nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, int swap, - unsigned long offset, unsigned int count, - void *buffer, struct nfs_fattr *fattr) +static int +nfs_proc_readlink(struct dentry *dentry, void *buffer, unsigned int bufsiz) { - struct nfs_readargs arg = { fhandle, offset, count, buffer }; - struct nfs_readres res = { fattr, count }; + struct nfs_readlinkargs args = { NFS_FH(dentry), buffer, bufsiz }; + struct nfs_readlinkres res = { buffer, bufsiz }; int status; - dprintk("NFS call read %d @ %ld\n", count, offset); - status = rpc_call(server->client, NFSPROC_READ, &arg, &res, - swap? NFS_RPC_SWAPFLAGS : 0); - dprintk("NFS reply read: %d\n", status); + dprintk("NFS call readlink\n"); + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_READLINK, + &args, &res, 0); + dprintk("NFS reply readlink: %d\n", status); return status; } -int -nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap, - unsigned long offset, unsigned int count, - const void *buffer, struct nfs_fattr *fattr) +static int +nfs_proc_read(struct dentry *dentry, struct nfs_fattr *fattr, int flags, + loff_t offset, unsigned int count, void *buffer, int *eofp) { - struct nfs_writeargs arg = { fhandle, offset, count, 1, 1, - {{(void *) buffer, count}, {0,0}, {0,0}, {0,0}, - {0,0}, {0,0}, {0,0}, {0,0}}}; - struct nfs_writeverf verf; - struct nfs_writeres res = {fattr, &verf, count}; + struct nfs_readargs arg = { NFS_FH(dentry), offset, count, 1, + {{ buffer, count }, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}} }; + struct nfs_readres res = { fattr, count, 0}; + struct rpc_message msg = { NFSPROC_READ, &arg, &res, NULL }; int status; - dprintk("NFS call write %d @ %ld\n", count, offset); - status = rpc_call(server->client, NFSPROC_WRITE, &arg, &res, - swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0); + dprintk("NFS call read %d @ %Ld\n", count, (long long)offset); + fattr->valid = 0; + status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags); + dprintk("NFS reply read: %d\n", status); + *eofp = res.eof; + return status; +} + +static int +nfs_proc_write(struct dentry *dentry, struct nfs_fattr *fattr, int how, + loff_t offset, unsigned int count, + void *buffer, struct nfs_writeverf *verf) +{ + struct nfs_writeargs arg = {NFS_FH(dentry), offset, count, + NFS_FILE_SYNC, 1, + {{buffer, count}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}}}; + struct nfs_writeres res = {fattr, verf, count}; + struct rpc_message msg = { NFSPROC_WRITE, &arg, &res, NULL }; + int status, flags = 0; + + dprintk("NFS call write %d @ %Ld\n", count, (long long)offset); + fattr->valid = 0; + if (how & NFS_RW_SWAP) + flags |= NFS_RPC_SWAPFLAGS; + status = rpc_call_sync(NFS_CLIENT(dentry->d_inode), &msg, flags); + + dprintk("NFS reply write: %d\n", status); + verf->committed = NFS_FILE_SYNC; /* NFSv2 always syncs data */ return status < 0? status : count; } -int -nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct iattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int +nfs_proc_create(struct dentry *dir, struct qstr *name, struct iattr *sattr, + int flags, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs_createargs arg = { dir, name, sattr }; + struct nfs_createargs arg = { NFS_FH(dir), name->name, + name->len, sattr }; struct nfs_diropok res = { fhandle, fattr }; int status; - dprintk("NFS call create %s\n", name); - status = rpc_call(server->client, NFSPROC_CREATE, &arg, &res, 0); + fattr->valid = 0; + dprintk("NFS call create %s\n", name->name); + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0); dprintk("NFS reply create: %d\n", status); return status; } -int -nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name) +/* + * In NFSv2, mknod is grafted onto the create call. + */ +static int +nfs_proc_mknod(struct dentry *dir, struct qstr *name, struct iattr *sattr, + dev_t rdev, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs_diropargs arg = { dir, name }; + struct nfs_createargs arg = { NFS_FH(dir), name->name, + name->len, sattr }; + struct nfs_diropok res = { fhandle, fattr }; + int status, mode; + + dprintk("NFS call mknod %s\n", name->name); + + mode = sattr->ia_mode; + if (S_ISFIFO(mode)) { + sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR; + sattr->ia_valid &= ~ATTR_SIZE; + } else if (S_ISCHR(rdev) || S_ISBLK(rdev)) { + sattr->ia_valid |= ATTR_SIZE; + sattr->ia_size = rdev; /* get out your barf bag */ + } + + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0); + + if (status == -EINVAL && S_ISFIFO(mode)) { + sattr->ia_mode = mode; + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_CREATE, &arg, &res, 0); + } + dprintk("NFS reply mknod: %d\n", status); + return status; +} + +static int +nfs_proc_remove(struct dentry *dir, struct qstr *name) +{ + struct nfs_diropargs arg = { NFS_FH(dir), name->name, name->len }; + struct rpc_message msg = { NFSPROC_REMOVE, &arg, NULL, NULL }; int status; - dprintk("NFS call remove %s\n", name); - status = rpc_call(server->client, NFSPROC_REMOVE, &arg, NULL, 0); + dprintk("NFS call remove %s\n", name->name); + status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0); + dprintk("NFS reply remove: %d\n", status); return status; } -int -nfs_proc_rename(struct nfs_server *server, - struct nfs_fh *old_dir, const char *old_name, - struct nfs_fh *new_dir, const char *new_name) +static int +nfs_proc_rename(struct dentry *old_dir, struct qstr *old_name, + struct dentry *new_dir, struct qstr *new_name) { - struct nfs_renameargs arg = { old_dir, old_name, new_dir, new_name }; + struct nfs_renameargs arg = { NFS_FH(old_dir), old_name->name, + old_name->len, + NFS_FH(new_dir), new_name->name, + new_name->len}; int status; - dprintk("NFS call rename %s -> %s\n", old_name, new_name); - status = rpc_call(server->client, NFSPROC_RENAME, &arg, NULL, 0); + dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); + status = rpc_call(NFS_CLIENT(old_dir->d_inode), NFSPROC_RENAME, &arg, NULL, 0); dprintk("NFS reply rename: %d\n", status); return status; } -int -nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fh *dir, const char *name) +static int +nfs_proc_link(struct dentry *dentry, struct dentry *dir, struct qstr *name) { - struct nfs_linkargs arg = { fhandle, dir, name }; + struct nfs_linkargs arg = { NFS_FH(dentry), NFS_FH(dir), + name->name, name->len }; int status; - dprintk("NFS call link %s\n", name); - status = rpc_call(server->client, NFSPROC_LINK, &arg, NULL, 0); + dprintk("NFS call link %s\n", name->name); + status = rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_LINK, &arg, NULL, 0); dprintk("NFS reply link: %d\n", status); return status; } -int -nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, - const char *name, const char *path, - struct iattr *sattr) +static int +nfs_proc_symlink(struct dentry *dir, struct qstr *name, struct qstr *path, + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { - struct nfs_symlinkargs arg = { dir, name, path, sattr }; + struct nfs_symlinkargs arg = { NFS_FH(dir), name->name, name->len, + path->name, path->len, sattr }; int status; - dprintk("NFS call symlink %s -> %s\n", name, path); - status = rpc_call(server->client, NFSPROC_SYMLINK, &arg, NULL, 0); + dprintk("NFS call symlink %s -> %s\n", name->name, path->name); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_SYMLINK, &arg, NULL, 0); dprintk("NFS reply symlink: %d\n", status); return status; } -int -nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct iattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int +nfs_proc_mkdir(struct dentry *dir, struct qstr *name, struct iattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs_createargs arg = { dir, name, sattr }; + struct nfs_createargs arg = { NFS_FH(dir), name->name, name->len, + sattr }; struct nfs_diropok res = { fhandle, fattr }; int status; - dprintk("NFS call mkdir %s\n", name); - status = rpc_call(server->client, NFSPROC_MKDIR, &arg, &res, 0); + dprintk("NFS call mkdir %s\n", name->name); + fattr->valid = 0; + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_MKDIR, &arg, &res, 0); dprintk("NFS reply mkdir: %d\n", status); return status; } -int -nfs_proc_rmdir(struct nfs_server *server, struct nfs_fh *dir, const char *name) +static int +nfs_proc_rmdir(struct dentry *dir, struct qstr *name) { - struct nfs_diropargs arg = { dir, name }; + struct nfs_diropargs arg = { NFS_FH(dir), name->name, name->len }; int status; - dprintk("NFS call rmdir %s\n", name); - status = rpc_call(server->client, NFSPROC_RMDIR, &arg, NULL, 0); + dprintk("NFS call rmdir %s\n", name->name); + status = rpc_call(NFS_CLIENT(dir->d_inode), NFSPROC_RMDIR, &arg, NULL, 0); dprintk("NFS reply rmdir: %d\n", status); return status; } -int +/* + * The READDIR implementation is somewhat hackish - we pass a temporary + * buffer to the encode function, which installs it in the receive + * the receive iovec. The decode function just parses the reply to make + * sure it is syntactically correct; the entries itself are decoded + * from nfs_readdir by calling the decode_entry function directly. + */ +static int +nfs_proc_readdir(struct dentry *dir, __u64 cookie, void *entry, + unsigned int size, int plus) +{ + struct nfs_readdirargs arg; + struct nfs_readdirres res; + struct rpc_message msg = { NFSPROC_READDIR, &arg, &res, NULL }; + struct nfs_server *server = NFS_DSERVER(dir); + int status; + + if (server->rsize < size) + size = server->rsize; + + arg.fh = NFS_FH(dir); + arg.cookie = cookie; + arg.buffer = entry; + arg.bufsiz = size; + res.buffer = entry; + res.bufsiz = size; + + dprintk("NFS call readdir %d\n", (unsigned int)cookie); + status = rpc_call_sync(NFS_CLIENT(dir->d_inode), &msg, 0); + + dprintk("NFS reply readdir: %d\n", status); + return status; +} + +static int nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { int status; dprintk("NFS call statfs\n"); + memset((char *)info, 0, sizeof(*info)); status = rpc_call(server->client, NFSPROC_STATFS, fhandle, info, 0); dprintk("NFS reply statfs: %d\n", status); return status; } + +extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); + +struct nfs_rpc_ops nfs_v2_clientops = { + 2, /* protocol version */ + nfs_proc_get_root, + nfs_proc_getattr, + nfs_proc_setattr, + nfs_proc_lookup, + NULL, /* access */ + nfs_proc_readlink, + nfs_proc_read, + nfs_proc_write, + NULL, /* commit */ + nfs_proc_create, + nfs_proc_remove, + nfs_proc_rename, + nfs_proc_link, + nfs_proc_symlink, + nfs_proc_mkdir, + nfs_proc_rmdir, + nfs_proc_readdir, + nfs_proc_mknod, + nfs_proc_statfs, + nfs_decode_dirent, +}; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index aa17780e5..4cf51ec8d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,7 +15,6 @@ * within the RPC code when root squashing is suspected. */ -#define NFS_NEED_XDR_TYPES #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -52,14 +51,18 @@ struct nfs_rreq { */ static inline void nfs_readreq_setup(struct nfs_rreq *req, struct nfs_fh *fh, - unsigned long offset, void *buffer, unsigned int rsize) + loff_t offset, void *buffer, unsigned int rsize) { req->ra_args.fh = fh; req->ra_args.offset = offset; req->ra_args.count = rsize; - req->ra_args.buffer = buffer; + req->ra_args.iov[0].iov_base = (void *)buffer; + req->ra_args.iov[0].iov_len = rsize; + req->ra_args.nriov = 1; + req->ra_fattr.valid = 0; req->ra_res.fattr = &req->ra_fattr; req->ra_res.count = rsize; + req->ra_res.eof = 0; } @@ -70,11 +73,12 @@ static int nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page) { struct nfs_rreq rqst; - unsigned long offset = page->index << PAGE_CACHE_SHIFT; + struct rpc_message msg; + loff_t offset = page_offset(page); char *buffer; int rsize = NFS_SERVER(inode)->rsize; int result, refresh = 0; - int count = PAGE_SIZE; + int count = PAGE_CACHE_SIZE; int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0; dprintk("NFS: nfs_readpage_sync(%p)\n", page); @@ -89,16 +93,21 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page) if (count < rsize) rsize = count; - dprintk("NFS: nfs_proc_read(%s, (%s/%s), %ld, %d, %p)\n", + dprintk("NFS: nfs_proc_read(%s, (%s/%s), %Ld, %d, %p)\n", NFS_SERVER(inode)->hostname, dentry->d_parent->d_name.name, dentry->d_name.name, - offset, rsize, buffer); + (long long)offset, rsize, buffer); /* Set up arguments and perform rpc call */ nfs_readreq_setup(&rqst, NFS_FH(dentry), offset, buffer, rsize); lock_kernel(); - result = rpc_call(NFS_CLIENT(inode), NFSPROC_READ, &rqst.ra_args, &rqst.ra_res, flags); + msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ; + msg.rpc_argp = &rqst.ra_args; + msg.rpc_resp = &rqst.ra_res; + msg.rpc_cred = NULL; + result = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); unlock_kernel(); + nfs_refresh_inode(inode, &rqst.ra_fattr); /* * Even if we had a partial success we can't mark the page @@ -124,9 +133,6 @@ nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page) io_error: kunmap(page); UnlockPage(page); - /* Note: we don't refresh if the call returned error */ - if (refresh && result >= 0) - nfs_refresh_inode(inode, &rqst.ra_fattr); return result; } @@ -139,19 +145,18 @@ nfs_readpage_result(struct rpc_task *task) { struct nfs_rreq *req = (struct nfs_rreq *) task->tk_calldata; struct page *page = req->ra_page; - unsigned long address = page_address(page); + char *address = req->ra_args.iov[0].iov_base; int result = task->tk_status; static int succ = 0, fail = 0; - dprintk("NFS: %4d received callback for page %lx, result %d\n", + dprintk("NFS: %4d received callback for page %p, result %d\n", task->tk_pid, address, result); + nfs_refresh_inode(req->ra_inode, &req->ra_fattr); if (result >= 0) { result = req->ra_res.count; - if (result < PAGE_SIZE) { - memset((char *) address + result, 0, PAGE_SIZE - result); - } - nfs_refresh_inode(req->ra_inode, &req->ra_fattr); + if (result < PAGE_CACHE_SIZE) + memset(address + result, 0, PAGE_CACHE_SIZE - result); SetPageUptodate(page); succ++; } else { @@ -161,9 +166,8 @@ nfs_readpage_result(struct rpc_task *task) } kunmap(page); UnlockPage(page); - __free_page(page); + page_cache_release(page); - rpc_release_task(task); kfree(req); } @@ -189,15 +193,15 @@ nfs_readpage_async(struct dentry *dentry, struct inode *inode, address = kmap(page); /* Initialize request */ /* N.B. Will the dentry remain valid for life of request? */ - nfs_readreq_setup(req, NFS_FH(dentry), page->index << PAGE_CACHE_SHIFT, - (void *) address, PAGE_SIZE); + nfs_readreq_setup(req, NFS_FH(dentry), page_offset(page), + (void *) address, PAGE_CACHE_SIZE); req->ra_inode = inode; req->ra_page = page; /* count has been incremented by caller */ /* Start the async call */ dprintk("NFS: executing async READ request.\n"); - msg.rpc_proc = NFSPROC_READ; + msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ; msg.rpc_argp = &req->ra_args; msg.rpc_resp = &req->ra_res; msg.rpc_cred = NULL; @@ -225,7 +229,7 @@ out_free: * We read the page synchronously in the following cases: * - The file is a swap file. Swap-ins are always sync operations, * so there's no need bothering to make async reads 100% fail-safe. - * - The NFS rsize is smaller than PAGE_SIZE. We could kludge our way + * - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way * around this by creating several consecutive read requests, but * that's hardly worth it. * - The error flag is set for this page. This happens only when a @@ -240,7 +244,7 @@ nfs_readpage(struct dentry *dentry, struct page *page) lock_kernel(); dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page->index); get_page(page); /* @@ -256,7 +260,7 @@ nfs_readpage(struct dentry *dentry, struct page *page) error = -1; if (!IS_SWAPFILE(inode) && !PageError(page) && - NFS_SERVER(inode)->rsize >= PAGE_SIZE) + NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) error = nfs_readpage_async(dentry, inode, page); if (error >= 0) goto out; @@ -269,7 +273,7 @@ nfs_readpage(struct dentry *dentry, struct page *page) out_error: UnlockPage(page); out_free: - __free_page(page); + page_cache_release(page); out: unlock_kernel(); return error; diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 9f5a95ff9..6b4a94f44 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -14,8 +14,9 @@ #include <linux/sched.h> #include <linux/errno.h> #include <linux/sunrpc/clnt.h> -#include <linux/nfs_fs.h> #include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs_fs.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/mm.h> @@ -27,16 +28,17 @@ */ static int nfs_symlink_filler(struct dentry *dentry, struct page *page) { - struct nfs_readlinkargs rl_args; - kmap(page); + struct inode *inode = dentry->d_inode; + void *buffer = (void *)kmap(page); + int error; + /* We place the length at the beginning of the page, * in host byte order, followed by the string. The * XDR response verification will NULL terminate it. */ - rl_args.fh = NFS_FH(dentry); - rl_args.buffer = (const void *)page_address(page); - if (rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_READLINK, - &rl_args, NULL, 0) < 0) + error = NFS_PROTO(inode)->readlink(dentry, buffer, + PAGE_CACHE_SIZE - sizeof(u32)-4); + if (error < 0) goto error; SetPageUptodate(page); kunmap(page); @@ -85,12 +87,10 @@ static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen) return res; } -static struct dentry * -nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow) +static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; - struct dentry *res = vfs_follow_link(dentry, base, follow, - nfs_getlink(dentry, &page)); + int res = vfs_follow_link(nd, nfs_getlink(dentry,&page)); if (page) { kunmap(page); page_cache_release(page); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af023a121..651251548 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -55,6 +55,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> #include <linux/nfs_flushd.h> #include <asm/uaccess.h> #include <linux/smp_lock.h> @@ -160,7 +161,6 @@ static __inline__ void nfs_writedata_free(struct nfs_write_data *p) static void nfs_writedata_release(struct rpc_task *task) { struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; - rpc_release_task(task); nfs_writedata_free(wdata); } @@ -172,6 +172,12 @@ static void nfs_writedata_release(struct rpc_task *task) static __inline__ int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) { + if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) { + fattr->pre_size = NFS_CACHE_ISIZE(inode); + fattr->pre_mtime = NFS_CACHE_MTIME(inode); + fattr->pre_ctime = NFS_CACHE_CTIME(inode); + fattr->valid |= NFS_ATTR_WCC; + } return nfs_refresh_inode(inode, fattr); } @@ -183,10 +189,12 @@ static int nfs_writepage_sync(struct dentry *dentry, struct inode *inode, struct page *page, unsigned long offset, unsigned int count) { + loff_t base; unsigned int wsize = NFS_SERVER(inode)->wsize; - int result, refresh = 0, written = 0; + int result, refresh = 0, written = 0, flags; u8 *buffer; struct nfs_fattr fattr; + struct nfs_writeverf verf; lock_kernel(); dprintk("NFS: nfs_writepage_sync(%s/%s %d@%lu/%ld)\n", @@ -194,15 +202,17 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode, count, page->index, offset); buffer = (u8 *) kmap(page) + offset; - offset += page->index << PAGE_CACHE_SHIFT; + base = page_offset(page) + offset; + + flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC; do { if (count < wsize && !IS_SWAPFILE(inode)) wsize = count; - result = nfs_proc_write(NFS_DSERVER(dentry), NFS_FH(dentry), - IS_SWAPFILE(inode), offset, wsize, - buffer, &fattr); + result = NFS_PROTO(inode)->write(dentry, &fattr, flags, + base, wsize, buffer, &verf); + nfs_write_attributes(inode, &fattr); if (result < 0) { /* Must mark the page invalid after I/O error */ @@ -214,41 +224,19 @@ nfs_writepage_sync(struct dentry *dentry, struct inode *inode, wsize, result); refresh = 1; buffer += wsize; - offset += wsize; + base += wsize; written += wsize; count -= wsize; /* * If we've extended the file, update the inode * now so we don't invalidate the cache. */ - if (offset > inode->i_size) - inode->i_size = offset; + if (base > inode->i_size) + inode->i_size = base; } while (count); io_error: kunmap(page); - /* Note: we don't refresh if the call failed (fattr invalid) */ - if (refresh && result >= 0) { - /* See comments in nfs_wback_result */ - /* N.B. I don't think this is right -- sync writes in order */ - if (fattr.size < inode->i_size) - fattr.size = inode->i_size; - if (fattr.mtime.seconds < inode->i_mtime) - printk("nfs_writepage_sync: prior time??\n"); - /* Solaris 2.5 server seems to send garbled - * fattrs occasionally */ - if (inode->i_ino == fattr.fileid) { - /* - * We expect the mtime value to change, and - * don't want to invalidate the caches. - */ - inode->i_mtime = fattr.mtime.seconds; - nfs_refresh_inode(inode, &fattr); - } - else - printk("nfs_writepage_sync: inode %ld, got %u?\n", - inode->i_ino, fattr.fileid); - } unlock_kernel(); return written? written : result; @@ -290,7 +278,7 @@ static int region_locked(struct inode *inode, struct nfs_page *req) { struct file_lock *fl; - unsigned long rqstart, rqend; + loff_t rqstart, rqend; /* Don't optimize writes if we don't use NLM */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) @@ -665,20 +653,18 @@ nfs_wait_on_request(struct nfs_page *req) * Interruptible by signals only if mounted with intr flag. */ static int -nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start, unsigned int count) +nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages) { struct list_head *p, *head; - unsigned long idx_start, idx_end; - unsigned int pages = 0; + unsigned long idx_end; + unsigned int res = 0; int error; - idx_start = start >> PAGE_CACHE_SHIFT; - if (count == 0) + if (npages == 0) idx_end = ~0; - else { - unsigned long idx_count = (count-1) >> PAGE_CACHE_SHIFT; - idx_end = idx_start + idx_count; - } + else + idx_end = idx_start + npages - 1; + spin_lock(&nfs_wreq_lock); head = &inode->u.nfs_i.writeback; p = head->next; @@ -705,10 +691,10 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start return error; spin_lock(&nfs_wreq_lock); p = head->next; - pages++; + res++; } spin_unlock(&nfs_wreq_lock); - return pages; + return res; } /* @@ -769,19 +755,18 @@ nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst) #endif static int -nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) { struct list_head *p; struct nfs_page *req; - unsigned long idx_start, idx_end; - int pages; + unsigned long idx_end; + int res; - pages = 0; - idx_start = start >> PAGE_CACHE_SHIFT; - if (count == 0) + res = 0; + if (npages == 0) idx_end = ~0; else - idx_end = idx_start + ((count-1) >> PAGE_CACHE_SHIFT); + idx_end = idx_start + npages - 1; p = src->next; while (p != src) { unsigned long pg_idx; @@ -800,36 +785,36 @@ nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, u continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); - pages++; + res++; } - return pages; + return res; } static int -nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) { - int pages; + int res; spin_lock(&nfs_wreq_lock); - pages = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, start, count); - inode->u.nfs_i.ndirty -= pages; + res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, idx_start, npages); + inode->u.nfs_i.ndirty -= res; if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); spin_unlock(&nfs_wreq_lock); - return pages; + return res; } #ifdef CONFIG_NFS_V3 static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) { - int pages; + int res; spin_lock(&nfs_wreq_lock); - pages = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, start, count); - inode->u.nfs_i.ncommit -= pages; + res = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, idx_start, npages); + inode->u.nfs_i.ncommit -= res; if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); spin_unlock(&nfs_wreq_lock); - return pages; + return res; } #endif @@ -1038,7 +1023,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - count, page_offset(page) +offset); + count, (long long)(page_offset(page) +offset)); /* * If wsize is smaller than page size, update and write @@ -1071,7 +1056,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig if (synchronous) { int error; - error = nfs_sync_file(inode, file, page_offset(page) + offset, count, FLUSH_SYNC|FLUSH_STABLE); + error = nfs_sync_file(inode, file, page_index(page), 1, FLUSH_SYNC|FLUSH_STABLE); if (error < 0 || (error = file->f_error) < 0) status = error; file->f_error = 0; @@ -1086,7 +1071,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig nfs_release_request(req); done: dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", - status, inode->i_size); + status, (long long)inode->i_size); if (status < 0) clear_bit(PG_uptodate, &page->flags); return status; @@ -1173,6 +1158,8 @@ nfs_flush_one(struct list_head *head, struct file *file, int how) /* Finalize the task. */ rpc_init_task(task, clnt, nfs_writeback_done, flags); task->tk_calldata = data; + /* Release requests */ + task->tk_release = nfs_writedata_release; #ifdef CONFIG_NFS_V3 msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE; @@ -1266,22 +1253,25 @@ nfs_writeback_done(struct rpc_task *task) /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. */ static unsigned long complain = 0; if (time_before(complain, jiffies)) { - printk(KERN_NOTICE "NFS: faulty NFSv3 server %s:" - " (committed = %d) != (stable = %d)\n", - NFS_SERVER(inode)->hostname, - resp->verf->committed, argp->stable); + dprintk("NFS: faulty NFSv3 server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(inode)->hostname, + resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } } #endif /* Update attributes as result of writeback. */ - if (task->tk_status >= 0) - nfs_write_attributes(inode, resp->fattr); + nfs_write_attributes(inode, resp->fattr); while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1293,7 +1283,7 @@ nfs_writeback_done(struct rpc_task *task) req->wb_file->f_dentry->d_parent->d_name.name, req->wb_file->f_dentry->d_name.name, req->wb_bytes, - page_offset(req->wb_page) + req->wb_offset); + (long long)(page_offset(req->wb_page) + req->wb_offset)); if (task->tk_status < 0) { req->wb_file->f_error = task->tk_status; @@ -1318,7 +1308,6 @@ nfs_writeback_done(struct rpc_task *task) next: nfs_unlock_request(req); } - nfs_writedata_release(task); } @@ -1332,7 +1321,7 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data) struct nfs_page *req; struct dentry *dentry; struct inode *inode; - unsigned long start, end, len; + loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ @@ -1346,7 +1335,7 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data) inode = dentry->d_inode; while (!list_empty(head)) { struct nfs_page *req; - unsigned long rqstart, rqend; + loff_t rqstart, rqend; req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &data->pages); @@ -1360,6 +1349,7 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data) data->args.fh = NFS_FH(dentry); data->args.offset = start; len = end - start; + /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ if (end >= inode->i_size || len > (~((u32)0) >> 1)) len = 0; data->res.count = data->args.count = (u32)len; @@ -1399,6 +1389,8 @@ nfs_commit_list(struct list_head *head, int how) rpc_init_task(task, clnt, nfs_commit_done, flags); task->tk_calldata = data; + /* Release requests */ + task->tk_release = nfs_writedata_release; msg.rpc_proc = NFS3PROC_COMMIT; msg.rpc_argp = &data->args; @@ -1441,11 +1433,11 @@ nfs_commit_done(struct rpc_task *task) req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - dprintk("NFS: commit (%s/%s %d@%ld)", + dprintk("NFS: commit (%s/%s %d@%Ld)", req->wb_file->f_dentry->d_parent->d_name.name, req->wb_file->f_dentry->d_name.name, req->wb_bytes, - page_offset(req->wb_page) + req->wb_offset); + (long long)(page_offset(req->wb_page) + req->wb_offset)); if (task->tk_status < 0) { req->wb_file->f_error = task->tk_status; nfs_inode_remove_request(req); @@ -1467,23 +1459,22 @@ nfs_commit_done(struct rpc_task *task) next: nfs_unlock_request(req); } - nfs_writedata_release(task); } #endif -int nfs_flush_file(struct inode *inode, struct file *file, unsigned long start, - unsigned int count, int how) +int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start, + unsigned int npages, int how) { LIST_HEAD(head); - int pages, + int res, error = 0; - pages = nfs_scan_dirty(inode, &head, file, start, count); - if (pages) + res = nfs_scan_dirty(inode, &head, file, idx_start, npages); + if (res) error = nfs_flush_list(inode, &head, how); if (error < 0) return error; - return pages; + return res; } int nfs_flush_timeout(struct inode *inode, int how) @@ -1501,19 +1492,19 @@ int nfs_flush_timeout(struct inode *inode, int how) } #ifdef CONFIG_NFS_V3 -int nfs_commit_file(struct inode *inode, struct file *file, unsigned long start, - unsigned int count, int how) +int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start, + unsigned int npages, int how) { LIST_HEAD(head); - int pages, + int res, error = 0; - pages = nfs_scan_commit(inode, &head, file, start, count); - if (pages) + res = nfs_scan_commit(inode, &head, file, idx_start, npages); + if (res) error = nfs_commit_list(&head, how); if (error < 0) return error; - return pages; + return res; } int nfs_commit_timeout(struct inode *inode, int how) @@ -1533,8 +1524,8 @@ int nfs_commit_timeout(struct inode *inode, int how) } #endif -int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start, - unsigned int count, int how) +int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start, + unsigned int npages, int how) { int error, wait; @@ -1548,12 +1539,12 @@ int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start, do { error = 0; if (wait) - error = nfs_wait_on_requests(inode, file, start, count); + error = nfs_wait_on_requests(inode, file, idx_start, npages); if (error == 0) - error = nfs_flush_file(inode, file, start, count, how); + error = nfs_flush_file(inode, file, idx_start, npages, how); #ifdef CONFIG_NFS_V3 if (error == 0) - error = nfs_commit_file(inode, file, start, count, how); + error = nfs_commit_file(inode, file, idx_start, npages, how); #endif } while (error > 0); return error; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c6ea9074c..53bfa0bea 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -246,14 +246,11 @@ exp_export(struct nfsctl_export *nxp) /* Look up the dentry */ err = -EINVAL; - dentry = lookup_dentry(nxp->ex_path, NULL, 0); + dentry = lookup_dentry(nxp->ex_path, LOOKUP_POSITIVE); if (IS_ERR(dentry)) goto out_unlock; - err = -ENOENT; inode = dentry->d_inode; - if (!inode) - goto finish; err = -EINVAL; if (inode->i_dev != dev || inode->i_ino != nxp->ex_ino) { printk(KERN_DEBUG "exp_export: i_dev = %x, dev = %x\n", @@ -443,7 +440,7 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, err = -EPERM; if (path) { - if (!(dentry = lookup_dentry(path, NULL, 0))) { + if (!(dentry = lookup_dentry(path, 0))) { printk("nfsd: exp_rootfh path not found %s", path); return -EPERM; } diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index c45d494ba..e15483e43 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -27,7 +27,8 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file *filp) /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); - memcpy((char*)&fh.fh_handle.fh_base, (char*)f, NFS_FHSIZE); + fh.fh_handle.fh_size = f->size; + memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); fh.fh_export = NULL; nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 969ff54a9..61ee94a3e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -74,22 +74,11 @@ decode_fh(u32 *p, struct svc_fh *fhp) static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { -#if 0 int size = fhp->fh_handle.fh_size; *p++ = htonl(size); if (size) p[XDR_QUADLEN(size)-1]=0; memcpy(p, &fhp->fh_handle.fh_base, size); return p + XDR_QUADLEN(size); -#else - /* until locked knows about var-length file handles, - * we always return NFS_FHSIZE handles - */ - int size = fhp->fh_handle.fh_size; - *p++ = htonl(NFS_FHSIZE); - memset(p, 0, NFS_FHSIZE); - memcpy(p, &fhp->fh_handle.fh_base, size); - return p + XDR_QUADLEN(NFS_FHSIZE); -#endif } /* diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 97b46f0c7..65c70164a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -130,7 +130,6 @@ nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res) else err = exp_rootfh(clp, 0, 0, data->gd_path, res, data->gd_maxlen); exp_unlock(); - /*HACK*/ res->fh_size = NFS_FHSIZE; /* HACK until lockd handles var-length handles */ return err; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5cd55fda8..56da94aea 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -91,7 +91,8 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, int len, struct svc_fh *resfh) { struct svc_export *exp; - struct dentry *dparent, *dchild; + struct dentry *dparent; + struct nameidata nd; int err; dprintk("nfsd: nfsd_lookup(fh %s, %s)\n", SVCFH_fmt(fhp), name); @@ -116,49 +117,51 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, /* checking mountpoint crossing is very different when stepping up */ if (dparent == exp->ex_dentry) { if (!EX_CROSSMNT(exp)) - dchild = dget(dparent); /* .. == . just like at / */ + nd.dentry = dget(dparent); /* .. == . just like at / */ else { struct svc_export *exp2 = NULL; struct dentry *dp; - dchild = dparent->d_covers->d_parent; - for (dp=dchild; + nd.dentry = dparent->d_covers->d_parent; + for (dp=nd.dentry; exp2 == NULL && dp->d_covers->d_parent != dp; dp=dp->d_covers->d_parent) exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino); - if (exp2==NULL || dchild->d_sb != exp2->ex_dentry->d_sb) { - dchild = dget(dparent); + if (exp2==NULL || nd.dentry->d_sb != exp2->ex_dentry->d_sb) { + nd.dentry = dget(dparent); } else { - dget(dchild); + dget(nd.dentry); exp = exp2; } } } else - dchild = dget(dparent->d_parent); + nd.dentry = dget(dparent->d_parent); } else { - dchild = lookup_dentry(name, dget(dparent), 0); - if (IS_ERR(dchild)) + nd.mnt = NULL; + nd.dentry = dget(dparent); + err = walk_name(name, 0, &nd); + if (err) goto out_nfserr; /* * check if we have crossed a mount point ... */ - if (dchild->d_sb != dparent->d_sb) { + if (nd.dentry->d_sb != dparent->d_sb) { struct svc_export *exp2 = NULL; exp2 = exp_get(rqstp->rq_client, - dchild->d_inode->i_dev, - dchild->d_inode->i_ino); + nd.dentry->d_inode->i_dev, + nd.dentry->d_inode->i_ino); if (exp2 && EX_CROSSMNT(exp2)) /* successfully crossed mount point */ exp = exp2; - else if (dchild->d_covers->d_sb == dparent->d_sb) { + else if (nd.dentry->d_covers->d_sb == dparent->d_sb) { /* stay in the original filesystem */ - struct dentry *tdentry = dget(dchild->d_covers); - dput(dchild); - dchild = tdentry; + struct dentry *tdentry = dget(nd.dentry->d_covers); + dput(nd.dentry); + nd.dentry = tdentry; } else { /* This cannot possibly happen */ - printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name); - dput(dchild); + printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, nd.dentry->d_name.name); + dput(nd.dentry); err = nfserr_acces; goto out; @@ -169,14 +172,14 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, * Note: we compose the file handle now, but as the * dentry may be negative, it may need to be updated. */ - err = fh_compose(resfh, exp, dchild); - if (!err && !dchild->d_inode) + err = fh_compose(resfh, exp, nd.dentry); + if (!err && !nd.dentry->d_inode) err = nfserr_noent; out: return err; out_nfserr: - err = nfserrno(PTR_ERR(dchild)); + err = nfserrno(err); goto out; } diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index f06e122aa..6ec49bfd7 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -3,7 +3,7 @@ O_TARGET := ntfs.o O_OBJS := fs.o sysctl.o support.o util.o inode.o dir.o super.o attr.o M_OBJS := $(O_TARGET) -EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"990411\" +EXTRA_CFLAGS = -DNTFS_IN_LINUX_KERNEL -DNTFS_VERSION=\"000410\" include $(TOPDIR)/Rules.make diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index e6abd178e..058e3a6f4 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -1,7 +1,8 @@ /* * fs.c - * NTFS driver for Linux 2.1 + * NTFS driver for Linux 2.3.x * + * Copyright (C) 2000, Anton Altaparmakov * Copyright (C) 1995-1997, 1999 Martin von Löwis * Copyright (C) 1996 Richard Russon * Copyright (C) 1996-1997 Régis Duchesne @@ -53,7 +54,6 @@ static void ntfs_getuser_update_vm (void *dest, ntfs_io *src, ntfs_size_t len) { struct ntfs_getuser_update_vm_s *p = src->param; copy_from_user (dest, p->user, len); - update_vm_cache (p->ino, p->off, dest, len); p->user += len; p->off += len; } @@ -141,11 +141,8 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length) file = fget(fd); if (!file) goto out; - error = -ENOENT; - if (!(dentry = file->f_dentry)) - goto out_putf; - if (!(inode = dentry->d_inode)) - goto out_putf; + dentry = file->f_dentry; + inode = dentry->d_inode; error = -EACCES; if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) goto out_putf; @@ -330,35 +327,41 @@ asmlinkage long sys_access(const char * filename, int mode) return res; } +/* MOUNT_REWRITE: pass &mnt to lookup_dentry */ asmlinkage long sys_chdir(const char * filename) { int error; - struct inode *inode; struct dentry *dentry, *tmp; + struct vfsmount *mnt = NULL, *tmp_mnt; + char *name; lock_kernel(); - dentry = namei(filename); + name = getname(filename); + error = PTR_ERR(name); + if (IS_ERR(name)) + goto out; + + dentry = lookup_dentry(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW | LOOKUP_DIRECTORY); + putname(name); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out; - inode = dentry->d_inode; - - error = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = permission(inode,MAY_EXEC); + error = permission(dentry->d_inode,MAY_EXEC); if (error) goto dput_and_out; /* exchange dentries */ tmp = current->fs->pwd; + tmp_mnt = current->fs->pwdmnt; current->fs->pwd = dentry; + current->fs->pwdmnt = mnt; dentry = tmp; + mnt = tmp_mnt; dput_and_out: + mntput(mnt); dput(dentry); out: unlock_kernel(); @@ -370,6 +373,7 @@ asmlinkage long sys_fchdir(unsigned int fd) struct file *file; struct dentry *dentry; struct inode *inode; + struct vfsmount *mnt; int error; error = -EBADF; @@ -377,11 +381,9 @@ asmlinkage long sys_fchdir(unsigned int fd) if (!file) goto out; - error = -ENOENT; - if (!(dentry = file->f_dentry)) - goto out_putf; - if (!(inode = dentry->d_inode)) - goto out_putf; + dentry = file->f_dentry; + mnt = file->f_vfsmnt; + inode = dentry->d_inode; error = -ENOTDIR; if (!S_ISDIR(inode->i_mode)) @@ -391,7 +393,10 @@ asmlinkage long sys_fchdir(unsigned int fd) error = permission(inode, MAY_EXEC); if (!error) { struct dentry *tmp = current->fs->pwd; + struct vfsmount *tmp_mnt = current->fs->pwdmnt; current->fs->pwd = dget(dentry); + current->fs->pwdmnt = mntget(mnt); + mntput(tmp_mnt); dput(tmp); } unlock_kernel(); @@ -401,26 +406,28 @@ out: return error; } +/* MOUNT_REWRITE: pass &mnt to lookup_dentry */ asmlinkage long sys_chroot(const char * filename) { int error; - struct inode *inode; struct dentry *dentry, *tmp; + struct vfsmount *mnt = NULL, *tmp_mnt; + char *name; lock_kernel(); - dentry = namei(filename); + name = getname(filename); + error = PTR_ERR(name); + if (IS_ERR(name)) + goto out; + + dentry = lookup_dentry(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW | LOOKUP_DIRECTORY); + putname(name); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out; - inode = dentry->d_inode; - - error = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = permission(inode,MAY_EXEC); + error = permission(dentry->d_inode,MAY_EXEC); if (error) goto dput_and_out; @@ -430,11 +437,15 @@ asmlinkage long sys_chroot(const char * filename) /* exchange dentries */ tmp = current->fs->root; + tmp_mnt = current->fs->rootmnt; current->fs->root = dentry; + current->fs->rootmnt = mnt; dentry = tmp; + mnt = tmp_mnt; error = 0; dput_and_out: + mntput(mnt); dput(dentry); out: unlock_kernel(); @@ -453,11 +464,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) if (!file) goto out; - err = -ENOENT; - if (!(dentry = file->f_dentry)) - goto out_putf; - if (!(inode = dentry->d_inode)) - goto out_putf; + dentry = file->f_dentry; + inode = dentry->d_inode; err = -EROFS; if (IS_RDONLY(inode)) @@ -612,20 +620,16 @@ asmlinkage long sys_lchown(const char * filename, uid_t user, gid_t group) asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) { - struct dentry * dentry; struct file * file; int error = -EBADF; file = fget(fd); if (!file) goto out; - error = -ENOENT; lock_kernel(); - if ((dentry = file->f_dentry) != NULL) - error = chown_common(dentry, user, group); + error = chown_common(file->f_dentry, user, group); unlock_kernel(); fput(file); - out: return error; } @@ -644,26 +648,25 @@ out: * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode, struct dentry * base) +struct file *filp_open(const char * filename, int flags, int mode) { - struct dentry * dentry; - int flag,error; + int namei_flags, error; + struct nameidata nd; - flag = flags; - if ((flag+1) & O_ACCMODE) - flag++; - if (flag & O_TRUNC) - flag |= 2; + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; + if (namei_flags & O_TRUNC) + namei_flags |= 2; - dentry = __open_namei(filename, flag, mode, base); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) - return dentry_open(dentry, flags); + error = open_namei(filename, namei_flags, mode, &nd); + if (!error) + return dentry_open(nd.dentry, nd.mnt, flags); return ERR_PTR(error); } -struct file *dentry_open(struct dentry *dentry, int flags) +struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { struct file * f; struct inode *inode; @@ -679,15 +682,14 @@ struct file *dentry_open(struct dentry *dentry, int flags) if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); if (error) - goto cleanup_dentry; + goto cleanup_file; } f->f_dentry = dentry; + f->f_vfsmnt = mnt; f->f_pos = 0; f->f_reada = 0; - f->f_op = NULL; - if (inode->i_op) - f->f_op = inode->i_fop; + f->f_op = inode->i_fop; if (inode->i_sb) file_move(f, &inode->i_sb->s_files); if (f->f_op && f->f_op->open) { @@ -703,9 +705,12 @@ cleanup_all: if (f->f_mode & FMODE_WRITE) put_write_access(inode); f->f_dentry = NULL; + f->f_vfsmnt = NULL; +cleanup_file: + put_filp(f); cleanup_dentry: dput(dentry); - put_filp(f); + mntput(mnt); return ERR_PTR(error); } @@ -795,7 +800,7 @@ asmlinkage long sys_open(const char * filename, int flags, int mode) if (fd >= 0) { struct file * f; lock_kernel(); - f = filp_open(tmp, flags, mode, NULL); + f = filp_open(tmp, flags, mode); unlock_kernel(); error = PTR_ERR(f); if (IS_ERR(f)) @@ -833,7 +838,6 @@ asmlinkage long sys_creat(const char * pathname, int mode) int filp_close(struct file *filp, fl_owner_t id) { int retval; - struct dentry *dentry = filp->f_dentry; if (!file_count(filp)) { printk("VFS: Close: file count is 0\n"); @@ -842,8 +846,7 @@ int filp_close(struct file *filp, fl_owner_t id) retval = 0; if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp); - if (dentry->d_inode) - locks_remove_posix(filp, id); + locks_remove_posix(filp, id); fput(filp); return retval; } diff --git a/fs/partitions/Config.in b/fs/partitions/Config.in index a0a58e3dd..c7a39b9f4 100644 --- a/fs/partitions/Config.in +++ b/fs/partitions/Config.in @@ -21,8 +21,8 @@ if [ "$CONFIG_PARTITION_ADVANCED" = "y" ]; then bool ' Solaris (x86) partition table support' CONFIG_SOLARIS_X86_PARTITION bool ' Unixware slices support' CONFIG_UNIXWARE_DISKLABEL fi - bool 'SGI partition support' CONFIG_SGI_PARTITION - bool 'Ultrix partition table support' CONFIG_ULTRIX_PARTITION + bool ' SGI partition support' CONFIG_SGI_PARTITION + bool ' Ultrix partition table support' CONFIG_ULTRIX_PARTITION bool 'Sun partition tables support' CONFIG_SUN_PARTITION else if [ "$ARCH" = "alpha" ]; then @@ -36,6 +36,9 @@ else if [ "$CONFIG_AMIGA" = "y" ]; then define_bool CONFIG_AMIGA_PARTITION y fi + if [ "$CONFIG_MAC" = "y" ]; then + define_bool CONFIG_MAC_PARTITION y + fi if [ "$CONFIG_ARCH_ACORN" = "y" ]; then define_bool CONFIG_ACORN_PARTITION y define_bool CONFIG_ACORN_PARTITION_ADFS y diff --git a/fs/proc/array.c b/fs/proc/array.c index 54e594634..63a1d5828 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -574,7 +574,9 @@ ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * if (map->vm_file != NULL) { dev = map->vm_file->f_dentry->d_inode->i_dev; ino = map->vm_file->f_dentry->d_inode->i_ino; - line = d_path(map->vm_file->f_dentry, buffer, PAGE_SIZE); + line = d_path(map->vm_file->f_dentry, + map->vm_file->f_vfsmnt, + buffer, PAGE_SIZE); buffer[PAGE_SIZE-1] = '\n'; line -= maxlen; if(line < buffer) diff --git a/fs/proc/base.c b/fs/proc/base.c index 4a191bfef..63bd4459b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -38,22 +38,28 @@ int proc_pid_status(struct task_struct*,char*); int proc_pid_statm(struct task_struct*,char*); int proc_pid_cpu(struct task_struct*,char*); -static struct dentry *proc_fd_link(struct inode *inode) +/* MOUNT_REWRITE: make all files have non-NULL ->f_vfsmnt (pipefs, sockfs) */ +static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - if (inode->u.proc_i.file) - return dget(inode->u.proc_i.file->f_dentry); - return NULL; + if (inode->u.proc_i.file) { + if (inode->u.proc_i.file->f_vfsmnt) { + *mnt = mntget(inode->u.proc_i.file->f_vfsmnt); + } + *dentry = dget(inode->u.proc_i.file->f_dentry); + return 0; + } + return -ENOENT; } -static struct dentry *proc_exe_link(struct inode *inode) +static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct mm_struct * mm; struct vm_area_struct * vma; - struct dentry *result = NULL; + int result = -ENOENT; struct task_struct *task = inode->u.proc_i.task; if (!task_lock(task)) - return NULL; + return result; mm = task->mm; if (!mm) goto out; @@ -62,7 +68,9 @@ static struct dentry *proc_exe_link(struct inode *inode) while (vma) { if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { - result = dget(vma->vm_file->f_dentry); + *mnt = mntget(vma->vm_file->f_vfsmnt); + *dentry = dget(vma->vm_file->f_dentry); + result = 0; break; } vma = vma->vm_next; @@ -73,25 +81,31 @@ out: return result; } -static struct dentry *proc_cwd_link(struct inode *inode) +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct dentry *result = NULL; + int result = -ENOENT; if (task_lock(inode->u.proc_i.task)) { struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) - result = dget(fs->pwd); + if (fs) { + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); + result = 0; + } task_unlock(inode->u.proc_i.task); } return result; } -static struct dentry *proc_root_link(struct inode *inode) +static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct dentry *result = NULL; + int result = -ENOENT; if (task_lock(inode->u.proc_i.task)) { struct fs_struct *fs = inode->u.proc_i.task->fs; - if (fs) - result = dget(fs->root); + if (fs) { + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + result = 0; + } task_unlock(inode->u.proc_i.task); } return result; @@ -160,16 +174,18 @@ static int proc_permission(struct inode *inode, int mask) { struct dentry *de, *base, *root; struct super_block *our_sb, *sb, *below; + struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; if (standard_permission(inode, mask) != 0) return -EACCES; base = current->fs->root; - de = root = proc_root_link(inode); /* Ewww... */ - - if (!de) + our_vfsmnt = current->fs->rootmnt; + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ return -ENOENT; + de = root; + mnt = vfsmnt; our_sb = base->d_inode->i_sb; sb = de->d_inode->i_sb; while (sb != our_sb) { @@ -184,9 +200,11 @@ static int proc_permission(struct inode *inode, int mask) goto out; dput(root); + mntput(mnt); return 0; out: dput(root); + mntput(mnt); return -EACCES; } @@ -345,30 +363,26 @@ static struct inode_operations proc_mem_inode_operations = { permission: proc_permission, }; -static struct dentry * proc_pid_follow_link(struct dentry *dentry, - struct dentry *base, - unsigned int follow) +static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - struct dentry * result; int error; /* We don't need a base pointer in the /proc filesystem */ - dput(base); + dput(nd->dentry); + mntput(nd->mnt); error = proc_permission(inode, MAY_EXEC); - result = ERR_PTR(error); if (error) goto out; - result = inode->u.proc_i.op.proc_get_link(inode); + error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); out: - if (!result) - result = ERR_PTR(-ENOENT); - return result; + return error; } -static int do_proc_readlink(struct dentry *dentry, char * buffer, int buflen) +static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, + char * buffer, int buflen) { struct inode * inode; char * tmp = (char*)__get_free_page(GFP_KERNEL), *path, *pattern; @@ -391,7 +405,7 @@ static int do_proc_readlink(struct dentry *dentry, char * buffer, int buflen) len = sprintf(tmp, pattern, inode->i_ino); path = tmp; } else { - path = d_path(dentry, tmp, PAGE_SIZE); + path = d_path(dentry, mnt, tmp, PAGE_SIZE); len = tmp + PAGE_SIZE - 1 - path; } @@ -406,22 +420,19 @@ static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen) { int error; struct inode *inode = dentry->d_inode; + struct vfsmount *mnt; error = proc_permission(inode, MAY_EXEC); if (error) goto out; - dentry = inode->u.proc_i.op.proc_get_link(inode); - error = -ENOENT; - if (!dentry) - goto out; - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + error = inode->u.proc_i.op.proc_get_link(inode, &dentry, &mnt); + if (error) goto out; - error = do_proc_readlink(dentry, buffer, buflen); + error = do_proc_readlink(dentry, mnt, buffer, buflen); dput(dentry); + mntput(mnt); out: return error; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 28e0c08e5..4d6662780 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -201,10 +201,10 @@ static int proc_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, s); } -static struct dentry *proc_follow_link(struct dentry *dentry, struct dentry *base, unsigned flags) +static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s=((struct proc_dir_entry *)dentry->d_inode->u.generic_ip)->data; - return vfs_follow_link(dentry, base, flags, s); + return vfs_follow_link(nd, s); } static struct inode_operations proc_link_inode_operations = { diff --git a/fs/proc/root.c b/fs/proc/root.c index cce48d845..af01f0281 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -32,13 +32,11 @@ static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry,buffer,buflen,tmp); } -static struct dentry * proc_self_follow_link(struct dentry *dentry, - struct dentry *base, - unsigned int follow) +static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[30]; sprintf(tmp, "%d", current->pid); - return vfs_follow_link(dentry,base,follow,tmp); + return vfs_follow_link(nd,tmp); } static struct inode_operations proc_self_inode_operations = { diff --git a/fs/ramfs/.cvsignore b/fs/ramfs/.cvsignore new file mode 100644 index 000000000..857dd22e9 --- /dev/null +++ b/fs/ramfs/.cvsignore @@ -0,0 +1,2 @@ +.depend +.*.flags diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile new file mode 100644 index 000000000..f57d5966c --- /dev/null +++ b/fs/ramfs/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the linux ramfs routines. +# + +O_TARGET := ramfs.o + +O_OBJS := inode.o + +M_OBJS := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c new file mode 100644 index 000000000..e218051f3 --- /dev/null +++ b/fs/ramfs/inode.c @@ -0,0 +1,393 @@ +/* + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * + * This file is released under the GPL. + */ + +/* + * NOTE! This filesystem is probably most useful + * not as a real filesystem, but as an example of + * how virtual filesystems can be written. + * + * It doesn't get much simpler than this. Consider + * that this file implements the full semantics of + * a POSIX-compliant read-write filesystem. + * + * Note in particular how the filesystem does not + * need to implement any data structures of its own + * to keep track of the virtual data: using the VFS + * caches is sufficient. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/locks.h> + +#include <asm/uaccess.h> + +/* some random number */ +#define RAMFS_MAGIC 0x858458f6 + +static struct super_operations ramfs_ops; +static struct address_space_operations ramfs_aops; +static struct file_operations ramfs_dir_operations; +static struct file_operations ramfs_file_operations; +static struct inode_operations ramfs_dir_inode_operations; + +static int ramfs_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = RAMFS_MAGIC; + buf->f_bsize = PAGE_CACHE_SIZE; + buf->f_namelen = 255; + return 0; +} + +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry * ramfs_lookup(struct inode *dir, struct dentry *dentry) +{ + d_add(dentry, NULL); + return NULL; +} + +/* + * Read a page. Again trivial. If it didn't already exist + * in the page cache, it is zero-filled. + */ +static int ramfs_readpage(struct dentry *dentry, struct page * page) +{ + if (!Page_Uptodate(page)) { + memset((void *) page_address(page), 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } + UnlockPage(page); + return 0; +} + +/* + * Writing: just make sure the page gets marked dirty, so that + * the page stealer won't grab it. + */ +static int ramfs_writepage(struct dentry * dentry, struct page *page) +{ + SetPageDirty(page); + return 0; +} + +static int ramfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + void *addr; + + addr = (void *) kmap(page); + if (!Page_Uptodate(page)) { + memset(addr, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } + SetPageDirty(page); + return 0; +} + +static int ramfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + struct inode *inode = (struct inode*)page->mapping->host; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + kunmap(page); + if (pos > inode->i_size) + inode->i_size = pos; + return 0; +} + +struct inode *ramfs_get_inode(struct super_block *sb, int mode, int dev) +{ + struct inode * inode = get_empty_inode(); + + if (inode) { + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_size = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_rdev = dev; + inode->i_nlink = 1; + inode->i_op = NULL; + inode->i_fop = NULL; + inode->i_mapping->a_ops = &ramfs_aops; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_fop = &ramfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ramfs_dir_inode_operations; + inode->i_fop = &ramfs_dir_operations; + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +static int ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) +{ + struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + } + return error; +} + +static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + return ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); +} + +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode) +{ + return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +/* + * Link a file.. + */ +static int ramfs_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) +{ + struct inode *inode = old_dentry->d_inode; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + inode->i_nlink++; + inode->i_count++; /* New dentry reference */ + dget(dentry); /* Extra pinning count for the created dentry */ + d_instantiate(dentry, inode); + return 0; +} + +static inline int ramfs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +/* + * Check that a directory is empty (this works + * for regular files too, they'll just always be + * considered empty..). + * + * Note that an empty directory can still have + * children, they just all have to be negative.. + */ +static int ramfs_empty(struct dentry *dentry) +{ + struct list_head *list = dentry->d_subdirs.next; + + while (list != &dentry->d_subdirs) { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (ramfs_positive(de)) + return 0; + list = list->next; + } + return 1; +} + +/* + * This works for both directories and regular files. + * (non-directories will always have empty subdirs) + */ +static int ramfs_unlink(struct inode * dir, struct dentry *dentry) +{ + int retval = -ENOTEMPTY; + + if (ramfs_empty(dentry)) { + struct inode *inode = dentry->d_inode; + + inode->i_nlink--; + dput(dentry); /* Undo the count from "create" - this does all the work */ + d_delete(dentry); + retval = 0; + } + return retval; +} + +#define ramfs_rmdir ramfs_unlink + +/* + * The VFS layer already does all the dentry stuff for rename, + * we just have to decrement the usage count for the target if + * it exists so that the VFS layer correctly free's it when it + * gets overwritten. + */ +static int ramfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) +{ + int error = -ENOTEMPTY; + + if (ramfs_empty(new_dentry)) { + struct inode *inode = new_dentry->d_inode; + if (inode) { + inode->i_nlink--; + dput(new_dentry); + } + error = 0; + } + return error; +} + +static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + int error; + + error = ramfs_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0); + if (!error) { + int l = strlen(symname)+1; + struct inode *inode = dentry->d_inode; + error = block_symlink(inode, symname, l); + } + return error; +} + +/* + * This really should be the same as the proc filldir, + * once proc does the "one dentry tree" thing.. + */ +static int ramfs_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + int i; + struct dentry *dentry = filp->f_dentry; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino) < 0) + break; + i++; + filp->f_pos++; + /* fallthrough */ + case 1: + if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino) < 0) + break; + i++; + filp->f_pos++; + /* fallthrough */ + default: { + struct list_head *list = dentry->d_subdirs.next; + + int j = i-2; + for (;;) { + if (list == &dentry->d_subdirs) + return 0; + if (!j) + break; + j--; + list = list->next; + } + + do { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (ramfs_positive(de)) { + if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino) < 0) + break; + } + filp->f_pos++; + list = list->next; + } while (list != &dentry->d_subdirs); + } + } + return 0; +} + +static struct address_space_operations ramfs_aops = { + readpage: ramfs_readpage, + writepage: ramfs_writepage, + prepare_write: ramfs_prepare_write, + commit_write: ramfs_commit_write +}; + +static struct file_operations ramfs_file_operations = { + read: generic_file_read, + write: generic_file_write, + mmap: generic_file_mmap +}; + +static struct file_operations ramfs_dir_operations = { + read: generic_read_dir, + readdir: ramfs_readdir, +}; + +static struct inode_operations ramfs_dir_inode_operations = { + create: ramfs_create, + lookup: ramfs_lookup, + link: ramfs_link, + unlink: ramfs_unlink, + symlink: ramfs_symlink, + mkdir: ramfs_mkdir, + rmdir: ramfs_rmdir, + mknod: ramfs_mknod, + rename: ramfs_rename, +}; + + +static struct super_operations ramfs_ops = { + statfs: ramfs_statfs, +}; + +static struct super_block *ramfs_read_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = RAMFS_MAGIC; + sb->s_op = &ramfs_ops; + inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return NULL; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return NULL; + } + sb->s_root = root; + return sb; +} + +static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, 0); + +static int __init init_ramfs_fs(void) +{ + return register_filesystem(&ramfs_fs_type); +} + +static void __exit exit_ramfs_fs(void) +{ + unregister_filesystem(&ramfs_fs_type); +} + +module_init(init_ramfs_fs) +module_exit(exit_ramfs_fs) diff --git a/fs/read_write.c b/fs/read_write.c index e82aaea96..4569ee18a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -64,17 +64,11 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) { off_t retval; struct file * file; - struct dentry * dentry; - struct inode * inode; retval = -EBADF; file = fget(fd); if (!file) goto bad; - /* N.B. Shouldn't this be ENOENT?? */ - if (!(dentry = file->f_dentry) || - !(inode = dentry->d_inode)) - goto out_putf; retval = -EINVAL; if (origin <= 2) { loff_t res = llseek(file, offset, origin); @@ -82,7 +76,6 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } -out_putf: fput(file); bad: return retval; @@ -95,18 +88,12 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, { int retval; struct file * file; - struct dentry * dentry; - struct inode * inode; loff_t offset; retval = -EBADF; file = fget(fd); if (!file) goto bad; - /* N.B. Shouldn't this be ENOENT?? */ - if (!(dentry = file->f_dentry) || - !(inode = dentry->d_inode)) - goto out_putf; retval = -EINVAL; if (origin > 2) goto out_putf; diff --git a/fs/super.c b/fs/super.c index 302487807..c7848eb90 100644 --- a/fs/super.c +++ b/fs/super.c @@ -91,6 +91,19 @@ static struct file_system_type **find_filesystem(const char *name) return p; } +/** + * register_filesystem - register a new filesystem + * @fs: the file system structure + * + * Adds the file system passed to the list of file systems the kernel + * is aware of for by mount and other syscalls. Returns 0 on success, + * or a negative errno code on an error. + * + * The file_system_type that is passed is linked into the kernel + * structures and must not be freed until the file system has been + * unregistered. + */ + int register_filesystem(struct file_system_type * fs) { int res = 0; @@ -110,6 +123,18 @@ int register_filesystem(struct file_system_type * fs) return res; } +/** + * unregister_filesystem - unregister a file system + * @fs: filesystem to unregister + * + * Remove a file system that was previously successfully registered + * with the kernel. An error is returned if the file system is not found. + * Zero is returned on a success. + * + * Once this function has returned the file_system_type structure may be + * freed or reused. + */ + int unregister_filesystem(struct file_system_type * fs) { struct file_system_type ** tmp; @@ -192,7 +217,6 @@ asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2) { int retval = -EINVAL; - lock_kernel(); switch (option) { case 1: retval = fs_index((const char *) arg1); @@ -206,7 +230,6 @@ asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2) retval = fs_maxindex(); break; } - unlock_kernel(); return retval; } @@ -253,7 +276,7 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, const char *dev_name, const char *dir_name) { struct vfsmount *lptr; - char *tmp, *name; + char *name; lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL); if (!lptr) @@ -264,21 +287,19 @@ static struct vfsmount *add_vfsmnt(struct super_block *sb, lptr->mnt_dev = sb->s_dev; /* N.B. Is it really OK to have a vfsmount without names? */ - if (dev_name && !IS_ERR(tmp = getname(dev_name))) { - name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL); + if (dev_name) { + name = (char *) kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { - strcpy(name, tmp); + strcpy(name, dev_name); lptr->mnt_devname = name; } - putname(tmp); } - if (dir_name && !IS_ERR(tmp = getname(dir_name))) { - name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL); + if (dir_name) { + name = (char *) kmalloc(strlen(dir_name)+1, GFP_KERNEL); if (name) { - strcpy(name, tmp); + strcpy(name, dir_name); lptr->mnt_dirname = name; } - putname(tmp); } if (vfsmntlist == (struct vfsmount *)NULL) { @@ -343,13 +364,16 @@ static struct proc_fs_info { static struct proc_nfs_info { int flag; char *str; + char *nostr; } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft" }, - { NFS_MOUNT_INTR, ",intr" }, - { NFS_MOUNT_POSIX, ",posix" }, - { NFS_MOUNT_NOCTO, ",nocto" }, - { NFS_MOUNT_NOAC, ",noac" }, - { 0, NULL } + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_TCP, ",tcp", ",udp" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { 0, NULL, NULL } }; int get_filesystem_info( char *buf ) @@ -362,9 +386,10 @@ int get_filesystem_info( char *buf ) char *path,*buffer = (char *) __get_free_page(GFP_KERNEL); if (!buffer) return 0; - for (tmp = vfsmntlist; tmp && len < PAGE_SIZE - 160; - tmp = tmp->mnt_next) { - path = d_path(tmp->mnt_sb->s_root, buffer, PAGE_SIZE); + for (tmp = vfsmntlist; tmp && len < PAGE_SIZE - 160; tmp = tmp->mnt_next) { + if (!tmp->mnt_sb || !tmp->mnt_sb->s_root) + continue; + path = d_path(tmp->mnt_sb->s_root, tmp, buffer, PAGE_SIZE); if (!path) continue; len += sprintf( buf + len, "%s %s %s %s", @@ -379,14 +404,11 @@ int get_filesystem_info( char *buf ) } if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) { nfss = &tmp->mnt_sb->u.nfs_sb.s_server; - if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) { - len += sprintf(buf+len, ",rsize=%d", - nfss->rsize); - } - if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) { - len += sprintf(buf+len, ",wsize=%d", - nfss->wsize); - } + len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version); + + len += sprintf(buf+len, ",rsize=%d", nfss->rsize); + + len += sprintf(buf+len, ",wsize=%d", nfss->wsize); #if 0 if (nfss->timeo != 7*HZ/10) { len += sprintf(buf+len, ",timeo=%d", @@ -414,10 +436,13 @@ int get_filesystem_info( char *buf ) nfss->acdirmax/HZ); } for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) { - strcpy(buf + len, nfs_infop->str); - len += strlen(nfs_infop->str); - } + char *str; + if (nfss->flags & nfs_infop->flag) + str = nfs_infop->str; + else + str = nfs_infop->nostr; + strcpy(buf + len, str); + len += strlen(str); } len += sprintf(buf+len, ",addr=%s", nfss->hostname); @@ -429,6 +454,14 @@ int get_filesystem_info( char *buf ) return len; } +/** + * __wait_on_super - wait on a superblock + * @sb: superblock to wait on + * + * Waits for a superblock to become unlocked and then returns. It does + * not take the lock. This is an internal function. See wait_on_super. + */ + void __wait_on_super(struct super_block * sb) { DECLARE_WAITQUEUE(wait, current); @@ -473,6 +506,14 @@ void sync_supers(kdev_t dev) } } +/** + * get_super - get the superblock of a device + * @dev: device to get the super block for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. NULL is returned if no match is found. + */ + struct super_block * get_super(kdev_t dev) { struct super_block * s; @@ -517,9 +558,15 @@ out: return err; } -/* - * Find a super_block with no device assigned. +/** + * get_empty_super - find empty superblocks + * + * Find a super_block with no device assigned. A free superblock is + * found and returned. If neccessary new superblocks are allocated. + * NULL is returned if there are insufficient resources to complete + * the request */ + struct super_block *get_empty_super(void) { struct super_block *s; @@ -850,7 +897,7 @@ int fs_may_mount(kdev_t dev) * Anyone using this new feature must know what he/she is doing. */ -int do_mount(struct block_device *bdev, const char *dev_name, +static int do_mount(struct block_device *bdev, const char *dev_name, const char *dir_name, const char * type, int flags, void * data) { kdev_t dev; @@ -881,7 +928,7 @@ int do_mount(struct block_device *bdev, const char *dev_name, /* * Do the lookup first to force automounting. */ - dir_d = namei(dir_name); + dir_d = lookup_dentry(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE); error = PTR_ERR(dir_d); if (IS_ERR(dir_d)) goto out; @@ -981,11 +1028,10 @@ static int do_remount_sb(struct super_block *sb, int flags, char *data) sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); /* - * Invalidate the inodes, as some mount options may be changed. - * N.B. If we are changing media, we should check the return - * from invalidate_inodes ... can't allow _any_ open files. + * We can't invalidate inodes as we can loose data when remounting + * (someone might manage to alter data while we are waiting in lock_super() + * or in foo_remount_fs())) */ - invalidate_inodes(sb); return 0; } @@ -995,11 +1041,10 @@ static int do_remount(const char *dir,int flags,char *data) struct dentry *dentry; int retval; - dentry = namei(dir); + dentry = lookup_dentry(dir, LOOKUP_FOLLOW|LOOKUP_POSITIVE); retval = PTR_ERR(dentry); if (!IS_ERR(dentry)) { struct super_block * sb = dentry->d_inode->i_sb; - retval = -ENODEV; if (sb) { retval = -EINVAL; @@ -1062,8 +1107,8 @@ static int copy_mount_options (const void * data, unsigned long *where) * aren't used, as the syscall assumes we are talking to an older * version that didn't understand them. */ -long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, - unsigned long new_flags, unsigned long data_page) +long do_sys_mount(char * dev_name, char * dir_name, char *type_page, + unsigned long new_flags, void *data_page) { struct file_system_type * fstype; struct dentry * dentry = NULL; @@ -1071,6 +1116,15 @@ long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, struct block_device *bdev = NULL; int retval; unsigned long flags = 0; + + /* Basic sanity checks */ + + if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) + return -EINVAL; + if (!type_page || !memchr(type_page, 0, PAGE_SIZE)) + return -EINVAL; + if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) + return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1083,7 +1137,7 @@ long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, goto out; } - fstype = get_fs_type((char *) type_page); + fstype = get_fs_type(type_page); retval = -ENODEV; if (!fstype) goto out; @@ -1091,7 +1145,10 @@ long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, if (fstype->fs_flags & FS_REQUIRES_DEV) { struct block_device_operations *bdops; - dentry = namei(dev_name); + retval = -EINVAL; + if (!dev_name || !*dev_name) + goto fs_out; + dentry = lookup_dentry(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE); retval = PTR_ERR(dentry); if (IS_ERR(dentry)) goto fs_out; @@ -1114,7 +1171,7 @@ long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page, flags = new_flags & ~MS_MGC_MSK; retval = do_mount(bdev, dev_name, dir_name, fstype->name, flags, - (void *) data_page); + data_page); dput_and_out: dput(dentry); @@ -1130,6 +1187,8 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, int retval; unsigned long data_page = 0; unsigned long type_page = 0; + unsigned long dev_page = 0; + char *dir_page; lock_kernel(); retval = copy_mount_options (type, &type_page); @@ -1146,12 +1205,24 @@ asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, goto out; } + dir_page = getname(dir_name); + retval = PTR_ERR(dir_page); + if (IS_ERR(dir_page)) + goto out1; + + retval = copy_mount_options (dev_name, &dev_page); + if (retval < 0) + goto out2; retval = copy_mount_options (data, &data_page); if (retval >= 0) { - retval = do_sys_mount(dev_name, dir_name, type_page, - new_flags, data_page); + retval = do_sys_mount((char*)dev_page,dir_page,(char*)type_page, + new_flags, (void*)data_page); free_page(data_page); } + free_page(dev_page); +out2: + putname(dir_page); +out1: free_page(type_page); out: unlock_kernel(); @@ -1162,7 +1233,7 @@ void __init mount_root(void) { struct file_system_type * fs_type; struct super_block * sb; - struct vfsmount *vfsmnt; + struct vfsmount *vfsmnt = NULL; struct block_device *bdev = NULL; mode_t mode; int retval; @@ -1186,7 +1257,9 @@ void __init mount_root(void) sb->s_dirt = 0; sb->s_type = fs_type; current->fs->root = dget(sb->s_root); + current->fs->rootmnt = mntget(vfsmnt); current->fs->pwd = dget(sb->s_root); + current->fs->pwdmnt = mntget(vfsmnt); ROOT_DEV = sb->s_dev; printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : ""); return; @@ -1298,7 +1371,9 @@ void __init mount_root(void) mount_it: sb->s_flags = root_mountflags; current->fs->root = dget(sb->s_root); + current->fs->rootmnt = mntget(vfsmnt); current->fs->pwd = dget(sb->s_root); + current->fs->pwdmnt = mntget(vfsmnt); printk ("VFS: Mounted root (%s filesystem)%s.\n", fs_type->name, (sb->s_flags & MS_RDONLY) ? " readonly" : ""); @@ -1321,22 +1396,28 @@ mount_it: static void chroot_fs_refs(struct dentry *old_root, - struct dentry *new_root) + struct vfsmount *old_rootmnt, + struct dentry *new_root, + struct vfsmount *new_rootmnt) { struct task_struct *p; read_lock(&tasklist_lock); for_each_task(p) { if (!p->fs) continue; - if (p->fs->root == old_root) { - dput(old_root); + if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt) { p->fs->root = dget(new_root); + p->fs->rootmnt = mntget(new_rootmnt); + mntput(old_rootmnt); + dput(old_root); printk(KERN_DEBUG "chroot_fs_refs: changed root of " "process %d\n",p->pid); } - if (p->fs->pwd == old_root) { - dput(old_root); + if (p->fs->pwd == old_root && p->fs->pwdmnt == old_rootmnt) { p->fs->pwd = dget(new_root); + p->fs->pwdmnt = mntget(new_rootmnt); + mntput(old_rootmnt); + dput(old_root); printk(KERN_DEBUG "chroot_fs_refs: changed cwd of " "process %d\n",p->pid); } @@ -1344,7 +1425,6 @@ static void chroot_fs_refs(struct dentry *old_root, read_unlock(&tasklist_lock); } - /* * Moves the current root to put_root, and sets root/cwd of all processes * which had them on the old root to new_root. @@ -1360,9 +1440,11 @@ static void chroot_fs_refs(struct dentry *old_root, asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) { struct dentry *root = current->fs->root; + struct vfsmount *root_mnt = current->fs->rootmnt; struct dentry *d_new_root, *d_put_old, *covered; struct dentry *root_dev_root, *new_root_dev_root; struct dentry *walk, *next; + struct vfsmount *new_root_mnt = NULL; int error; if (!capable(CAP_SYS_ADMIN)) @@ -1414,7 +1496,7 @@ asmlinkage long sys_pivot_root(const char *new_root, const char *put_old) root_dev_root = root->d_sb->s_root; root_dev_root->d_covers = dget(d_put_old); d_put_old->d_mounts = root_dev_root; - chroot_fs_refs(root,d_new_root); + chroot_fs_refs(root,root_mnt,d_new_root,new_root_mnt); error = 0; out2: up(&mount_sem); @@ -1444,7 +1526,7 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) return -EBUSY; } /* First unmount devfs if mounted */ - dir_d = lookup_dentry ("/dev", NULL, 1); + dir_d = lookup_dentry ("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE); if (!IS_ERR(dir_d)) { struct super_block *sb = dir_d->d_inode->i_sb; @@ -1467,12 +1549,9 @@ int __init change_root(kdev_t new_root_dev,const char *put_old) /* * Get the new mount directory */ - dir_d = lookup_dentry(put_old, NULL, 1); + dir_d = lookup_dentry(put_old, LOOKUP_FOLLOW|LOOKUP_POSITIVE); if (IS_ERR(dir_d)) { error = PTR_ERR(dir_d); - } else if (!dir_d->d_inode) { - dput(dir_d); - error = -ENOENT; } else { error = 0; } diff --git a/fs/udf/file.c b/fs/udf/file.c index 96297521b..cbaf77929 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -315,7 +315,7 @@ static int udf_release_file(struct inode * inode, struct file * filp) */ static int udf_open_file(struct inode * inode, struct file * filp) { - if ((inode->i_size & 0xFFFFFFFF00000000UL) && !(filp->f_flags & O_LARGEFILE)) + if ((inode->i_size & 0xFFFFFFFF00000000ULL) && !(filp->f_flags & O_LARGEFILE)) return -EFBIG; return 0; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ed1507fa7..b0a9c42c0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -78,7 +78,9 @@ void udf_put_inode(struct inode * inode) { lock_kernel(); udf_discard_prealloc(inode); - write_inode_now(inode); + /* write the root inode on put, if dirty */ + if (!inode->i_sb->s_root && inode->i_state & I_DIRTY) + udf_update_inode(inode, IS_SYNC(inode)); unlock_kernel(); } } @@ -111,7 +113,7 @@ void udf_delete_inode(struct inode * inode) inode->i_size = 0; udf_truncate(inode); - write_inode_now(inode); + udf_update_inode(inode, IS_SYNC(inode)); udf_free_inode(inode); out: unlock_kernel(); @@ -1005,10 +1007,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_nlink = 1; inode->i_size = le64_to_cpu(fe->informationLength); -#if BITS_PER_LONG < 64 - if (le64_to_cpu(fe->informationLength) & 0xFFFFFFFF00000000ULL) - inode->i_size = (Uint32)-1; -#endif inode->i_mode = udf_convert_permissions(fe); inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask; diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index 4550f44cf..59a44e22e 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -33,10 +33,10 @@ static int ufs_readlink(struct dentry *dentry, char *buffer, int buflen) return vfs_readlink(dentry, buffer, buflen, s); } -static struct dentry *ufs_follow_link(struct dentry *dentry, struct dentry *base, unsigned flags) +static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s = (char *)dentry->d_inode->u.ufs_i.i_u1.i_symlink; - return vfs_follow_link(dentry, base, flags, s); + return vfs_follow_link(nd, s); } struct inode_operations ufs_fast_symlink_inode_operations = { diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index 18dd76572..29eebb3f2 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -651,9 +651,7 @@ char * umsdos_d_path(struct dentry *dentry, char * buffer, int len) struct dentry * old_root = current->fs->root; char * path; - /* N.B. not safe -- fix this soon! */ - current->fs->root = dentry->d_sb->s_root; - path = d_path(dentry, buffer, len); + path = __d_path(dentry, NULL, dentry->d_sb->s_root, NULL, buffer, len); if (*path == '/') path++; /* skip leading '/' */ @@ -665,7 +663,6 @@ char * umsdos_d_path(struct dentry *dentry, char * buffer, int len) memcpy(path, UMSDOS_PSDROOT_NAME, UMSDOS_PSDROOT_LEN); } - current->fs->root = old_root; return path; } |